Skip to content

Commit f2b39ba

Browse files
DYN-7442 Search Nodes with Special Characters (DynamoDS#15513)
1 parent 0577cab commit f2b39ba

File tree

2 files changed

+47
-31
lines changed

2 files changed

+47
-31
lines changed

src/DynamoCore/PublicAPI.Unshipped.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2256,7 +2256,7 @@ Dynamo.Selection.ISelectable.IsSelected.set -> void
22562256
Dynamo.Selection.ISelectable.Select() -> void
22572257
Dynamo.Updates.BinaryVersion
22582258
Dynamo.Utilities.LuceneCustomAnalyzer
2259-
Dynamo.Utilities.LuceneCustomAnalyzer.LuceneCustomAnalyzer(Lucene.Net.Util.LuceneVersion matchVersion) -> void
2259+
Dynamo.Utilities.LuceneCustomAnalyzer.LuceneCustomAnalyzer(Lucene.Net.Util.LuceneVersion matchVersion, string language) -> void
22602260
Dynamo.Visualization.DefaultRenderPackage
22612261
Dynamo.Visualization.DefaultRenderPackage.AddLineStripVertex(double x, double y, double z) -> void
22622262
Dynamo.Visualization.DefaultRenderPackage.AddLineStripVertexColor(byte red, byte green, byte blue, byte alpha) -> void

src/DynamoCore/Utilities/LuceneSearchUtility.cs

Lines changed: 46 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -552,34 +552,7 @@ private WildcardQuery CalculateFieldWeight(string fieldName, string searchTerm,
552552
/// <returns></returns>
553553
internal Analyzer CreateAnalyzerByLanguage(string language)
554554
{
555-
switch (language)
556-
{
557-
case "en-US":
558-
return new LuceneCustomAnalyzer(LuceneConfig.LuceneNetVersion);
559-
case "cs-CZ":
560-
return new CzechAnalyzer(LuceneConfig.LuceneNetVersion);
561-
case "de-DE":
562-
return new GermanAnalyzer(LuceneConfig.LuceneNetVersion);
563-
case "es-ES":
564-
return new SpanishAnalyzer(LuceneConfig.LuceneNetVersion);
565-
case "fr-FR":
566-
return new FrenchAnalyzer(LuceneConfig.LuceneNetVersion);
567-
case "it-IT":
568-
return new ItalianAnalyzer(LuceneConfig.LuceneNetVersion);
569-
case "ja-JP":
570-
case "ko-KR":
571-
case "zh-CN":
572-
case "zh-TW":
573-
return new CJKAnalyzer(LuceneConfig.LuceneNetVersion);
574-
case "pl-PL":
575-
return new LuceneCustomAnalyzer(LuceneConfig.LuceneNetVersion);
576-
case "pt-BR":
577-
return new BrazilianAnalyzer(LuceneConfig.LuceneNetVersion);
578-
case "ru-RU":
579-
return new RussianAnalyzer(LuceneConfig.LuceneNetVersion);
580-
default:
581-
return new LuceneCustomAnalyzer(LuceneConfig.LuceneNetVersion);
582-
}
555+
return new LuceneCustomAnalyzer(LuceneConfig.LuceneNetVersion, language);
583556
}
584557

585558
/// <summary>
@@ -663,10 +636,12 @@ internal void AddNodeTypeToSearchIndex(NodeSearchElement node, Document doc)
663636
public class LuceneCustomAnalyzer : Analyzer
664637
{
665638
private readonly LuceneVersion luceneVersion;
639+
private readonly string analyzerLanguage;
666640

667-
public LuceneCustomAnalyzer(LuceneVersion matchVersion)
641+
public LuceneCustomAnalyzer(LuceneVersion matchVersion, string language)
668642
{
669643
luceneVersion = matchVersion;
644+
analyzerLanguage = language ?? "en-US";
670645
}
671646

672647
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
@@ -681,10 +656,51 @@ protected override TokenStreamComponents CreateComponents(string fieldName, Text
681656
//Lowercase all the text
682657
tok = new LowerCaseFilter(luceneVersion, tok);
683658

659+
CharArraySet languageSet = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
660+
661+
662+
switch (analyzerLanguage)
663+
{
664+
case "cs-CZ":
665+
languageSet = CzechAnalyzer.DefaultStopSet;
666+
break;
667+
case "de-DE":
668+
languageSet = GermanAnalyzer.DefaultStopSet;
669+
break;
670+
case "es-ES":
671+
languageSet = SpanishAnalyzer.DefaultStopSet;
672+
break;
673+
case "fr-FR":
674+
languageSet = FrenchAnalyzer.DefaultStopSet;
675+
break;
676+
case "it-IT":
677+
languageSet = ItalianAnalyzer.DefaultStopSet;
678+
break;
679+
case "ja-JP":
680+
case "ko-KR":
681+
case "zh-CN":
682+
case "zh-TW":
683+
languageSet = CJKAnalyzer.DefaultStopSet;
684+
break;
685+
case "pl-PL":
686+
languageSet = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
687+
break;
688+
case "pt-BR":
689+
languageSet = BrazilianAnalyzer.DefaultStopSet;
690+
break;
691+
case "ru-RU":
692+
languageSet = RussianAnalyzer.DefaultStopSet;
693+
break;
694+
default:
695+
languageSet = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
696+
break;
697+
698+
}
699+
684700
//List of stopwords that will be removed by the StopFilter like "a", "an", "and", "are", "as", "at", "be", "but", "by"
685701
CharArraySet stopWords = new CharArraySet(luceneVersion, 1, true)
686702
{
687-
StopAnalyzer.ENGLISH_STOP_WORDS_SET,
703+
languageSet
688704
};
689705

690706
tok = new StopFilter(LuceneConfig.LuceneNetVersion, tok, stopWords);

0 commit comments

Comments
 (0)