ToolGood.Words

一款高性能敏感词(非法词/脏字)检测过滤组件,附带繁体简体互换,支持全角半角互换,汉字转拼音,模糊搜索等功能。

APACHE-2.0 License

Stars
4.7K
Committers
8

ToolGood.Words

()

C#``StringSearchEx2.Replace48k3cpu i7 8750h

csharp

ToolGood.Pinyin.Build:          
ToolGood.Pinyin.Pretreatment:   
ToolGood.Transformation.Build  https://github.com/BYVoid/OpenCC
ToolGood.Words.Contrast:        
ToolGood.Words.Test:            
ToolGood.Words:                 

()

()StringSearch``StringSearchEx``StringSearchEx2``WordsSearch``WordsSearchEx``WordsSearchEx2``IllegalWordsSearch;

  • StringSearch``StringSearchEx``StringSearchEx2``StringSearchEx3: FindFirst``string
  • WordsSearch``WordsSearchEx``WordsSearchEx2``WordsSearchEx3: FindFirst``WordsSearchResult,
    WordsSearchResult
  • IllegalWordsSearch: ****
    FindFirst``IllegalWordsSearchResult,
  • IllegalWordsSearch``StringSearchEx``StringSearchEx2``WordsSearchEx``WordsSearchEx2 Save``Load
  • SetKeywords``ContainsAny``FindFirst``FindAll``Replace
  • IllegalWordsSearch``SetSkipWords``SetBlacklist
  • IllegalWordsSearch``UseIgnoreCase,SetKeywords``Load
  • StringSearchEx3``WordsSearchEx3
    string s = "||zg";
    string test = "";

    StringSearch iwords = new StringSearch();
    iwords.SetKeywords(s.Split('|'));
    
    var b = iwords.ContainsAny(test);
    Assert.AreEqual(true, b);

    var f = iwords.FindFirst(test);
    Assert.AreEqual("", f);

    var all = iwords.FindAll(test);
    Assert.AreEqual("", all[0]);
    Assert.AreEqual("", all[1]);
    Assert.AreEqual(2, all.Count);

    var str = iwords.Replace(test, '*');
    Assert.AreEqual("***", str);

()

()StringMatch``StringMatchEx``WordsMatch``WordsMatchEx

.``?() []() (|)()

    string s = ".[]||zg";
    string test = "";

    WordsMatch wordsSearch = new WordsMatch();
    wordsSearch.SetKeywords(s.Split('|'));

    var b = wordsSearch.ContainsAny(test);
    Assert.AreEqual(true, b);

    var f = wordsSearch.FindFirst(test);
    Assert.AreEqual("", f.Keyword);

    var alls = wordsSearch.FindAll(test);
    Assert.AreEqual("", alls[0].Keyword);
    Assert.AreEqual(".[]", alls[0].MatchKeyword);
    Assert.AreEqual(1, alls[0].Start);
    Assert.AreEqual(3, alls[0].End);
    Assert.AreEqual(0, alls[0].Index);//Index,0
    Assert.AreEqual("", alls[1].Keyword);
    Assert.AreEqual(2, alls.Count);

    var t = wordsSearch.Replace(test, '*');
    Assert.AreEqual("****", t);

    // 
    WordsHelper.ToSimplifiedChinese("");
    WordsHelper.ToSimplifiedChinese("",1);//   
    WordsHelper.ToSimplifiedChinese("",2);//   
    // 
    WordsHelper.ToTraditionalChinese("");
    WordsHelper.ToTraditionalChinese("",1);//   
    WordsHelper.ToTraditionalChinese("",2);//   
    // 
    WordsHelper.ToSBC("abcABC123");
    // 
    WordsHelper.ToDBC("");
    // 
    WordsHelper.ToChineseRMB(12345678901.12);
    // 
    WordsHelper.ToNumber("");
    // 
    WordsHelper.GetPinyin("");//WoAiZhongGuo   
    WordsHelper.GetPinyin("",",");//Wo,Ai,Zhong,Guo   
    WordsHelper.GetPinyin("",true);//WiZhngGu

    // 
    WordsHelper.GetFirstPinyin("");//WAZG
    // 
    WordsHelper.GetAllPinyin('');//Chuan,Zhuan
    // 
    WordsHelper.GetPinyinForName("")//ShanYiYi
    WordsHelper.GetPinyinForName("",",")//Shan,Yi,Yi
    WordsHelper.GetPinyinForName("",true)//ShnYY

ToolGood.Words.Pinyin C#

PinyinMatch``SetKeywords``SetIndexs``Find``FindIndex

PinyinMatch<T>``SetKeywordsFunc``SetPinyinFunc``SetPinyinSplitChar``Find

    string s = "|||||||||||||||||||||||||||||||||";

    PinyinMatch match = new PinyinMatch();
    match.SetKeywords(s.Split('|').ToList());

    var all = match.Find("BJ");
    Assert.AreEqual("", all[0]);
    Assert.AreEqual(1, all.Count);

    all = match.Find("J");
    Assert.AreEqual("", all[0]);
    Assert.AreEqual(1, all.Count);

    all = match.Find("Ji");
    Assert.AreEqual("", all[0]);
    Assert.AreEqual(1, all.Count);

    all = match.Find("S");
    Assert.AreEqual("", all[0]);
    Assert.AreEqual("", all[1]);

    var all2 = match.FindIndex("BJ");
    Assert.AreEqual(0, all2[0]);
    Assert.AreEqual(1, all.Count);

10

:C#StringSearchEx2.ContainsAny``Regex.IsMatch8.8

Regex.Matches``IQueryable``MatchCollection,

Find All,

FastFilter7

StringSearch14

Regex.Matches3msRegex.Matches

Regex.Matches11

Lua

wenlifan https://github.com/wenlifan/SensitiveWordFilter

ToolGood WindowsLinux100M

https://toolgood.com/

https://github.com/toolgood/ToolGood.TextFilter

Q128994346

1

2

3

4

5ToolGood.Words 30KFC

6ToolGood.TextFilter 300 IllegalWordsSearchToolGood.TextFilter

7DFAC#JAVA 30KFC ToolGood.TextFilterDFA

8C#30KFC

(3G)25

50

500