@@ -552,34 +552,7 @@ private WildcardQuery CalculateFieldWeight(string fieldName, string searchTerm,
552
552
/// <returns></returns>
553
553
internal Analyzer CreateAnalyzerByLanguage ( string language )
554
554
{
555
- switch ( language )
556
- {
557
- case "en-US" :
558
- return new LuceneCustomAnalyzer ( LuceneConfig . LuceneNetVersion ) ;
559
- case "cs-CZ" :
560
- return new CzechAnalyzer ( LuceneConfig . LuceneNetVersion ) ;
561
- case "de-DE" :
562
- return new GermanAnalyzer ( LuceneConfig . LuceneNetVersion ) ;
563
- case "es-ES" :
564
- return new SpanishAnalyzer ( LuceneConfig . LuceneNetVersion ) ;
565
- case "fr-FR" :
566
- return new FrenchAnalyzer ( LuceneConfig . LuceneNetVersion ) ;
567
- case "it-IT" :
568
- return new ItalianAnalyzer ( LuceneConfig . LuceneNetVersion ) ;
569
- case "ja-JP" :
570
- case "ko-KR" :
571
- case "zh-CN" :
572
- case "zh-TW" :
573
- return new CJKAnalyzer ( LuceneConfig . LuceneNetVersion ) ;
574
- case "pl-PL" :
575
- return new LuceneCustomAnalyzer ( LuceneConfig . LuceneNetVersion ) ;
576
- case "pt-BR" :
577
- return new BrazilianAnalyzer ( LuceneConfig . LuceneNetVersion ) ;
578
- case "ru-RU" :
579
- return new RussianAnalyzer ( LuceneConfig . LuceneNetVersion ) ;
580
- default :
581
- return new LuceneCustomAnalyzer ( LuceneConfig . LuceneNetVersion ) ;
582
- }
555
+ return new LuceneCustomAnalyzer ( LuceneConfig . LuceneNetVersion , language ) ;
583
556
}
584
557
585
558
/// <summary>
@@ -663,10 +636,12 @@ internal void AddNodeTypeToSearchIndex(NodeSearchElement node, Document doc)
663
636
public class LuceneCustomAnalyzer : Analyzer
664
637
{
665
638
private readonly LuceneVersion luceneVersion ;
639
+ private readonly string analyzerLanguage ;
666
640
667
- public LuceneCustomAnalyzer ( LuceneVersion matchVersion )
641
+ public LuceneCustomAnalyzer ( LuceneVersion matchVersion , string language )
668
642
{
669
643
luceneVersion = matchVersion ;
644
+ analyzerLanguage = language ?? "en-US" ;
670
645
}
671
646
672
647
protected override TokenStreamComponents CreateComponents ( string fieldName , TextReader reader )
@@ -681,10 +656,51 @@ protected override TokenStreamComponents CreateComponents(string fieldName, Text
681
656
//Lowercase all the text
682
657
tok = new LowerCaseFilter ( luceneVersion , tok ) ;
683
658
659
+ CharArraySet languageSet = StopAnalyzer . ENGLISH_STOP_WORDS_SET ;
660
+
661
+
662
+ switch ( analyzerLanguage )
663
+ {
664
+ case "cs-CZ" :
665
+ languageSet = CzechAnalyzer . DefaultStopSet ;
666
+ break ;
667
+ case "de-DE" :
668
+ languageSet = GermanAnalyzer . DefaultStopSet ;
669
+ break ;
670
+ case "es-ES" :
671
+ languageSet = SpanishAnalyzer . DefaultStopSet ;
672
+ break ;
673
+ case "fr-FR" :
674
+ languageSet = FrenchAnalyzer . DefaultStopSet ;
675
+ break ;
676
+ case "it-IT" :
677
+ languageSet = ItalianAnalyzer . DefaultStopSet ;
678
+ break ;
679
+ case "ja-JP" :
680
+ case "ko-KR" :
681
+ case "zh-CN" :
682
+ case "zh-TW" :
683
+ languageSet = CJKAnalyzer . DefaultStopSet ;
684
+ break ;
685
+ case "pl-PL" :
686
+ languageSet = StopAnalyzer . ENGLISH_STOP_WORDS_SET ;
687
+ break ;
688
+ case "pt-BR" :
689
+ languageSet = BrazilianAnalyzer . DefaultStopSet ;
690
+ break ;
691
+ case "ru-RU" :
692
+ languageSet = RussianAnalyzer . DefaultStopSet ;
693
+ break ;
694
+ default :
695
+ languageSet = StopAnalyzer . ENGLISH_STOP_WORDS_SET ;
696
+ break ;
697
+
698
+ }
699
+
684
700
//List of stopwords that will be removed by the StopFilter like "a", "an", "and", "are", "as", "at", "be", "but", "by"
685
701
CharArraySet stopWords = new CharArraySet ( luceneVersion , 1 , true )
686
702
{
687
- StopAnalyzer . ENGLISH_STOP_WORDS_SET ,
703
+ languageSet
688
704
} ;
689
705
690
706
tok = new StopFilter ( LuceneConfig . LuceneNetVersion , tok , stopWords ) ;
0 commit comments