diff --git a/pom.xml b/pom.xml
index 0be044e..6c87176 100644
--- a/pom.xml
+++ b/pom.xml
@@ -212,6 +212,12 @@
9.12.0
+
+
+ org.apache.lucene
+ lucene-analysis-icu
+ 9.12.0
+
@@ -312,11 +318,11 @@
${project.build.directory}/jacoco
-
+
true
false
true
-
+
csw/korea/festival/main/common/annotation/**
diff --git a/src/main/java/csw/korea/festival/main/config/lucene/HibernateSearchConfig.java b/src/main/java/csw/korea/festival/main/config/lucene/HibernateSearchConfig.java
index 9bffde9..172879f 100644
--- a/src/main/java/csw/korea/festival/main/config/lucene/HibernateSearchConfig.java
+++ b/src/main/java/csw/korea/festival/main/config/lucene/HibernateSearchConfig.java
@@ -7,11 +7,16 @@
import org.apache.lucene.analysis.core.StopFilterFactory;
import org.apache.lucene.analysis.en.EnglishPossessiveFilterFactory;
import org.apache.lucene.analysis.en.PorterStemFilterFactory;
+import org.apache.lucene.analysis.icu.ICUNormalizer2FilterFactory;
+import org.apache.lucene.analysis.icu.segmentation.ICUTokenizerFactory;
import org.apache.lucene.analysis.ko.KoreanNumberFilterFactory;
import org.apache.lucene.analysis.ko.KoreanPartOfSpeechStopFilterFactory;
import org.apache.lucene.analysis.ko.KoreanReadingFormFilterFactory;
import org.apache.lucene.analysis.ko.KoreanTokenizerFactory;
+import org.apache.lucene.analysis.ngram.EdgeNGramFilterFactory;
+import org.apache.lucene.analysis.ngram.NGramFilterFactory;
import org.apache.lucene.analysis.standard.StandardTokenizerFactory;
+import org.apache.lucene.analysis.synonym.SynonymGraphFilterFactory;
import org.hibernate.search.backend.lucene.analysis.LuceneAnalysisConfigurer;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -30,28 +35,53 @@ public LuceneAnalysisConfigurer luceneAnalysisConfigurer() {
.tokenizer(StandardTokenizerFactory.class)
.charFilter(HTMLStripCharFilterFactory.class)
.tokenFilter(LowerCaseFilterFactory.class)
+ .tokenFilter(SynonymGraphFilterFactory.class)
+ .param("synonyms", "lucene/english_synonyms.txt")
+ .param("ignoreCase", "true")
+ .param("expand", "true")
+
+ .tokenFilter(EdgeNGramFilterFactory.class)
+ .param("minGramSize", "3")
+ .param("maxGramSize", "10")
.tokenFilter(EnglishPossessiveFilterFactory.class)
+
.tokenFilter(StopFilterFactory.class)
- // let it use the default EnglishAnalyzer stopwords
.param("ignoreCase", "true")
.tokenFilter(PorterStemFilterFactory.class);
+
// Korean Analyzer
context.analyzer("korean").custom()
.tokenizer(KoreanTokenizerFactory.class)
.charFilter(HTMLStripCharFilterFactory.class)
.charFilter(CJKWidthCharFilterFactory.class)
+
+ .tokenFilter(LowerCaseFilterFactory.class)
+ .tokenFilter(SynonymGraphFilterFactory.class)
+ .param("synonyms", "lucene/korean_synonyms.txt")
+ .param("ignoreCase", "true")
+ .param("expand", "true")
+
.tokenFilter(KoreanReadingFormFilterFactory.class)
.tokenFilter(KoreanPartOfSpeechStopFilterFactory.class)
- // FIX ME: If adverbs are not meaningful for your searches, consider adding MAG and MAJ to the list.
+ // FIX ME: If adverbs are not meaningful for searches, consider adding MAG and MAJ to the list.
.param("tags", "E,EP,EF,EC,ETN,ETM,IC,J,MM,SP,SSC,SSO,SC,SE,XPN,SF,SY,XSA,UNKNOWN")
- .tokenFilter(KoreanNumberFilterFactory.class);
+ .tokenFilter(KoreanNumberFilterFactory.class)
+
+ .tokenFilter(NGramFilterFactory.class)
+ .param("minGramSize", "2")
+ .param("maxGramSize", "5");
// Multi-lingual Analyzer
context.analyzer("multilingual").custom()
- .tokenizer(StandardTokenizerFactory.class)
+ .tokenizer(ICUTokenizerFactory.class)
.charFilter(HTMLStripCharFilterFactory.class)
- .tokenFilter(LowerCaseFilterFactory.class);
+ .tokenFilter(LowerCaseFilterFactory.class)
+ .tokenFilter(ICUNormalizer2FilterFactory.class)
+ .param("name", "nfkc_cf")
+ .tokenFilter(StopFilterFactory.class)
+ .param("ignoreCase", "true");
+// .param("words", "stopwords.txt");
try {
context.analyzer("seok").instance(new CustomKoreanAnalyzer());
diff --git a/src/main/java/csw/korea/festival/main/festival/model/Festival.java b/src/main/java/csw/korea/festival/main/festival/model/Festival.java
index ee0067c..14792b3 100644
--- a/src/main/java/csw/korea/festival/main/festival/model/Festival.java
+++ b/src/main/java/csw/korea/festival/main/festival/model/Festival.java
@@ -27,10 +27,10 @@ public class Festival {
@Column(unique = true, nullable = false)
private String festivalId; // Festival ID
- @FullTextField(analyzer = "seok")
+ @FullTextField(analyzer = "korean")
private String name; // Festival Name
- @FullTextField(analyzer = "seok")
+ @FullTextField(analyzer = "korean")
@Column(length = 1000)
private String summary; // Festival Summary
@@ -42,7 +42,7 @@ public class Festival {
@Column(name = "end_date")
private LocalDate endDate;
- @FullTextField(analyzer = "seok")
+ @FullTextField(analyzer = "korean")
private String address; // Address
private String usageFeeInfo; // Festival Usage Fee Information
diff --git a/src/main/java/csw/korea/festival/main/festival/service/FestivalSearchService.java b/src/main/java/csw/korea/festival/main/festival/service/FestivalSearchService.java
index 1035b66..75437c2 100644
--- a/src/main/java/csw/korea/festival/main/festival/service/FestivalSearchService.java
+++ b/src/main/java/csw/korea/festival/main/festival/service/FestivalSearchService.java
@@ -35,56 +35,77 @@ public class FestivalSearchService {
public FestivalPage searchFestivals(String query, int page, int size) {
SearchSession searchSession = Search.session(entityManager);
- // 1. 쿼리를 개별 단어로 분할
- // String[] terms = query.split("\\s+");
+ // 1. Split the query into individual terms
+ String[] terms = query.split("\\s+");
- String qwertyKorean = Korean.toHangul(query); // can be just normal English words but also trying to convert to Korean
+ // 2. Convert each term using qwerty to Korean conversion
+ String[] qwertyTerms = Arrays.stream(terms)
+ .map(Korean::toHangul)
+ .toArray(String[]::new);
- // 2. 부울 쿼리 구성
+ // 3. Build the boolean query dynamically
SearchResult result = searchSession.search(Festival.class)
- .where(f -> f.bool()
- .should(f.match()
- .fields("name", "nameEn")
- .matching(query)
- .boost(4.0f))
- .should(f.match()
- .fields("name", "nameEn")
- .matching(qwertyKorean)
- .boost(3.0f))
+ .where(f -> {
+ // Start a boolean predicate
+ BooleanPredicateClausesStep> boolQuery = f.bool();
- .should(f.match()
- .fields("summary", "summaryEn")
- .matching(query)
- .boost(2.0f))
- .should(f.match()
+ // For each term, add 'should' clauses
+ for (String term : terms) {
+ boolQuery.should(f.match()
+ .fields("name", "nameEn")
+ .matching(term)
+ .boost(9.0f));
+ boolQuery.should(f.match()
.fields("summary", "summaryEn")
- .matching(qwertyKorean)
- .boost(1.0f))
-
- .should(f.match()
+ .matching(term)
+ .boost(7.0f));
+ boolQuery.should(f.match()
.field("categoryDisplayNames")
- .matching(query)
- .boost(1.0f))
-
- .should(f.wildcard()
+ .matching(term)
+ .boost(6.0f));
+ boolQuery.should(f.wildcard()
.field("address")
- .matching(STR."*\{query}*")
- .boost(5.0f))
- .should(f.wildcard()
- .field("address")
- .matching(STR."*\{qwertyKorean}*")
- .boost(2.0f))
-
- .should(f.phrase()
- .fields("province", "city", "district", "town", "street")
- .matching(query)
- .boost(6.0f))
- .should(f.phrase()
+ .matching("*" + term + "*")
+ .boost(10.0f));
+ boolQuery.should(f.phrase()
.fields("province", "city", "district", "town", "street")
- .matching(qwertyKorean)
- .boost(3.0f))
- .minimumShouldMatchNumber(1) // at least one should clause must match for a document to be included.
- )
+ .matching(term)
+ // .slop(2)
+ .boost(15.0f));
+ }
+
+ // Do the same for qwerty Korean converted terms
+// for (String term : qwertyTerms) {
+// boolQuery.should(f.match()
+// .fields("name", "nameEn")
+// .matching(term)
+// .boost(3.0f));
+// boolQuery.should(f.match()
+// .fields("summary", "summaryEn")
+// .matching(term)
+// .boost(1.0f));
+// boolQuery.should(f.wildcard()
+// .field("address")
+// .matching("*" + term + "*")
+// .boost(2.0f));
+// boolQuery.should(f.phrase()
+// .fields("province", "city", "district", "town", "street")
+// .matching(term)
+// .slop(2)
+// .boost(3.0f));
+// }
+
+ // 4. minimumShouldMatch to prefer documents that match more terms
+ // at least half of the terms to match
+ // int minShouldMatch = Math.max(1, terms.length);
+
+ // Require a percentage of terms to match.
+ boolQuery.minimumShouldMatchPercent(50);
+
+
+ return boolQuery;
+ })
+ // .highlighter(f -> f.field("name").field("summary"))
.fetch(page * size, size);
int totalHits = (int) result.total().hitCount();
@@ -92,16 +113,17 @@ public FestivalPage searchFestivals(String query, int page, int size) {
int end = Math.min(start + size, totalHits);
List paginatedFestivals = result.hits().subList(start, end);
- // 페이지네이션된 페스티벌의 날씨 정보 가져오기
+ // Process the festivals to include weather information
paginatedFestivals = festivalWeatherService.processFestivalsWeather(paginatedFestivals);
- // 페스티벌 페이지 객체 생성
+ // Create and return the festival page object
FestivalPage festivalPage = new FestivalPage();
festivalPage.setContent(paginatedFestivals);
festivalPage.setPageNumber(page);
festivalPage.setPageSize(size);
festivalPage.setTotalElements(totalHits);
- festivalPage.setTotalPages((totalHits + size - 1) / size); // 올림 계산
+ festivalPage.setTotalPages((totalHits + size - 1) / size);
+
return festivalPage;
}
diff --git a/src/main/resources/lucene/english_synonyms.txt b/src/main/resources/lucene/english_synonyms.txt
new file mode 100644
index 0000000..8f292f5
--- /dev/null
+++ b/src/main/resources/lucene/english_synonyms.txt
@@ -0,0 +1,28 @@
+festival, fest, festa, fair, event, gala, celebration, party, gathering
+magic festival, magic festa, magic fair
+light festival, light festa, illumination festival
+lantern festival, lantern festa, lantern fair
+seaweed festival, seaweed festa, seaweed fair
+beer festival, beer festa, beer fair
+autumn festival, fall festival, autumn festa, fall festa
+winter festival, cold festival, winter festa, cold festa
+spring festival, spring festa, spring fair
+summer festival, summer festa, summer fair
+concert, performance, show, gig, recital, live show
+busking, street performance, street busking
+dance, dancing, dance performance
+art, arts, artistic performance
+cultural performance, cultural show, cultural gala
+food festival, culinary festival, food fair, cuisine festival
+food trucks, food carts, mobile kitchens
+craft market, handmade market, artisan market
+flea market, bazaar, swap meet
+photo zone, photo spot, photo area
+fireworks, fireworks show, pyrotechnics
+experience booth, experience stand, interactive booth
+handmade, handcrafted, artisanal
+celebratory performance, celebratory show
+family-friendly, family-oriented, family suitable
+kids activities, children activities, kids programs
+traditional performance, traditional show, heritage performance
+night market, night bazaar, evening market
diff --git a/src/main/resources/lucene/korean_synonyms.txt b/src/main/resources/lucene/korean_synonyms.txt
new file mode 100644
index 0000000..ff36d9f
--- /dev/null
+++ b/src/main/resources/lucene/korean_synonyms.txt
@@ -0,0 +1,30 @@
+축제, 페스티벌, 행사, 축제회, 잔치, 모임, 갈라, 파티, 모임
+매직 페스티벌, 매직 축제, 마술 축제
+불빛 축제, 빛 축제, 조명 축제
+유등 축제, 랜턴 축제, 등불 축제
+해조 축제, 김 축제, 해초 축제
+맥주 축제, 맥주 페스티벌, 맥주 잔치
+가을 축제, 추계 축제, 가을 행사
+겨울 축제, 추운 축제, 겨울 행사
+봄 축제, 봄 행사, 봄 축제회
+여름 축제, 여름 행사, 여름 축제회
+공연, 쇼, 공연회, 콘서트, 리사이틀
+버스킹, 거리 공연, 길거리 공연
+춤, 댄싱, 춤 공연, 무용
+예술, 예술 공연, 아트 공연
+문화 공연, 문화 쇼, 문화 갈라
+음식 축제, 요리 축제, 음식 행사, 요리 행사
+푸드트럭, 푸드 카트, 모바일 주방
+수공예 시장, 핸드메이드 시장, 장인 시장
+플리마켓, 벼룩 시장, 스왑 미트
+포토존, 사진 촬영 장소, 사진 스팟
+불꽃놀이, 불꽃쇼, 파이로테크닉
+체험 부스, 체험 스탠드, 인터랙티브 부스
+수제, 수공예, 아티산
+축하 공연, 축하 쇼, 축하 갈라
+가족 친화적, 가족 지향적, 가족 적합
+어린이 활동, 아이들 활동, 어린이 프로그램
+전통 공연, 전통 쇼, 유산 공연
+야시장, 야간 시장, 저녁 시장
+야외 공연, 야외 쇼, 야외 공연회
+디저트, 디저트 부스, 스위트, 스위트 부스, 먹거리
\ No newline at end of file