資源簡介
文件內(nèi)容比較
代碼片段和文件信息
using?PanGu;
using?PanGu.Match;
using?SimilarCompareCore.Model;
using?System;
using?System.Collections.Generic;
using?System.Linq;
using?System.Text;
using?System.Text.Regularexpressions;
using?System.Threading.Tasks;
namespace?SimilarCompareCore
{
????public?class?CompareCore
????{
????????///?
????????///?默認(rèn)最小匹配數(shù)
????????///?
????????private?int?DefaultMinSameCount?=?4;
????????///?
????????///?默認(rèn)最小相似度
????????///?
????????private?double?DefaultMinMatchPercent?=?0.6;
????????///?
????????///?默認(rèn)行長度比:對(duì)比的行長度/目標(biāo)行長度
????????///?
????????private?double?DefaultMinLineLengthPercent?=?0.6;
????????///?
????????///?設(shè)置最小匹配字?jǐn)?shù)
????????///?
????????///?
????????public?void?SetDefaultMinSameCount(int?value)
????????{
????????????if?(value?1)?value?=?1;
????????????DefaultMinSameCount?=?value;
????????}
????????///?
????????///?設(shè)置最小匹配相似度
????????///?
????????///?
????????public?void?SetDefaultMinMatchPercent(double?value)
????????{
????????????if?(value?<=0)?value?=?0.1;
????????????DefaultMinMatchPercent?=?value;
????????}
????????///?
????????///?設(shè)置默認(rèn)行長度比
????????///?
????????///?
????????public?void?SetDefaultMinLineLengthPercent(double?value)
????????{
????????????if?(value?<=?0)?value?=?0.5;
????????????DefaultMinLineLengthPercent?=?value;
????????}
????????///?
????????///?通過分詞將內(nèi)容拆分
????????///?
????????///?
????????///?
????????public?virtual?IEnumerable?GetWordList(string?content)
????????{
????????????//TODO?將分詞結(jié)果緩存起來
????????????var?matchOptions?=?new?MatchOptions();
????????????matchOptions.FrequencyFirst?=?true;
????????????Segment?segment?=?new?Segment();
????????????var?words?=?segment.DoSegment(content?matchOptions).Select(word?=>?word.Word);
????????????return?words;
????????}
????????///?
????????///?拆分內(nèi)容成行
????????///?
????????///?
????????///?
????????///?
????????public??List?GetLines(string?content?string?pattern)
????????{
????????????/*
?????????????*?根據(jù)正則來拆分,同時(shí),根據(jù)正則匹配的結(jié)果,把拆分的分隔符記錄下來
?????????????*?因?yàn)槊看尾鸱侄紩?huì)是根據(jù)匹配的符號(hào)拆分,所以必定是拆分的每一段內(nèi)容都有相應(yīng)的分隔符
?????????????*?將內(nèi)容和分割符單獨(dú)保存下來,方便后面重新將一句句的重新拼接起來
?????????????*?
?????????????*/
????????????var?regex?=?new?Regex(pattern);
????????????if?(!regex.IsMatch(content))?return?null;
????????????var?matches?=?regex.Matches(content);
????????????var?lines?=?regex.Split(content);
????????????var?LineList?=?new?List();
????????????for?(int?i?=?0;?i?????????????{
????????????????if?(string.IsNullOrEmpty(lines[i]))?continue;
????????????????var?lineModel?=?new?LineModel?{?Content?=?lines[i]?RedTagIDList?=?new?List()?SameWordCountList?=?new?List()?SimilarPercentList?=?new?List
?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
????.......??????4979??2019-02-19?15:40??SimilarCompareCoreTest\Data\content.txt
????.......??????5571??2019-02-19?15:40??SimilarCompareCoreTest\Data\content2.txt
????.......??????1135??2019-02-19?15:40??SimilarCompareCoreTest\Data\htmltemplate.html
????.......????266012??2019-02-19?15:40??SimilarCompareCoreTest\Data\jquery.js
?????文件????????208??2021-01-29?15:03??SimilarCompareCoreTest\obj\Debug\.NETfr
????.......??????7163??2019-02-19?15:40??SimilarCompareCoreTest\obj\Debug\DesignTimeResolveAssemblyReferencesInput.cache
????.......??????2333??2019-02-19?15:40??SimilarCompareCoreTest\obj\Debug\SimilarCompareCoreTest.csproj.FileListAbsolute.txt
?????文件??????26172??2021-01-29?15:03??SimilarCompareCoreTest\obj\Debug\SimilarCompareCoreTest.csprojAssemblyReference.cache
????.......?????38181??2019-02-19?15:40??SimilarCompareCoreTest\obj\Debug\SimilarCompareCoreTest.csprojResolveAssemblyReference.cache
????.......??????7680??2019-02-19?15:40??SimilarCompareCoreTest\obj\Debug\SimilarCompareCoreTest.exe
????.......?????15872??2019-02-19?15:40??SimilarCompareCoreTest\obj\Debug\SimilarCompareCoreTest.pdb
????.......??????3034??2019-02-19?15:40??SimilarCompareCoreTest\Program.cs
????.......??????1332??2019-02-19?15:40??SimilarCompareCoreTest\Properties\AssemblyInfo.cs
????.......??????3700??2019-02-19?15:40??SimilarCompareCoreTest\SimilarCompareCoreTest.csproj
?????文件???????1515??2019-02-19?15:40??filediff.sln
?????文件??????54784??2019-02-19?15:40??filediffCore.v12.suo
????..A..H.?????33280??2021-01-29?15:03??.vs\filediff\v16\.suo
????.......?????60416??2019-02-19?15:40??.vs\SimilarCompareCore\v15\.suo
????.......?????????0??2019-02-19?15:40??.vs\SimilarCompareCore\v15\Server\sqlite3\db.lock
????.......??????4096??2019-02-19?15:40??.vs\SimilarCompareCore\v15\Server\sqlite3\storage.ide
????.......?????32768??2019-02-19?15:40??.vs\SimilarCompareCore\v15\Server\sqlite3\storage.ide-shm
????.......???2954072??2019-02-19?15:40??.vs\SimilarCompareCore\v15\Server\sqlite3\storage.ide-wal
????..A..H.?????75264??2021-01-29?15:03??.vs\SimilarCompareCore\v16\.suo
????.......?????10411??2019-02-19?15:40??SimilarCompareCore\CompareCore.cs
????.......????454656??2019-02-19?15:40??SimilarCompareCore\Libs\Lucene.Net.dll
????.......???3801600??2019-02-19?15:40??SimilarCompareCore\Libs\PanGu.dll
????.......?????12288??2019-02-19?15:40??SimilarCompareCore\Libs\PanGu.HighLight.dll
????.......??????7168??2019-02-19?15:40??SimilarCompareCore\Libs\PanGu.Lucene.Analyzer.dll
????.......???????275??2019-02-19?15:40??SimilarCompareCore\Model\ba
????.......???????292??2019-02-19?15:40??SimilarCompareCore\Model\CompareArticalResultModel.cs
............此處省略41個(gè)文件信息
評(píng)論
共有 條評(píng)論