From e090fa3c765377ec7116e2a54d7c7b972e9fb1db Mon Sep 17 00:00:00 2001 From: Adam Collins Date: Mon, 17 Feb 2025 09:39:58 +1000 Subject: [PATCH 1/2] #945 fix queries with values containing colons --- .../au/org/ala/biocache/util/QueryFormatUtils.java | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/main/java/au/org/ala/biocache/util/QueryFormatUtils.java b/src/main/java/au/org/ala/biocache/util/QueryFormatUtils.java index 35bde6431..f4fa720a5 100644 --- a/src/main/java/au/org/ala/biocache/util/QueryFormatUtils.java +++ b/src/main/java/au/org/ala/biocache/util/QueryFormatUtils.java @@ -979,7 +979,7 @@ public String formatString(String text, boolean isQuery) { // Queries containing OR, AND or Intersects( must already be correctly escaped for SOLR // Note: if escaping is required, extract expressions from nested () [] "" for escaping with formatString. - if (isQuery && text.contains(" OR ") || text.contains(" AND ") || text.contains("Intersects(")) return text; + if (isQuery && (text.contains(" OR ") || text.contains(" AND ") || text.contains("Intersects("))) return text; try { String formatted = ""; @@ -1036,13 +1036,11 @@ public String formatString(String text, boolean isQuery) { extractedValue = extractedValue.substring(0, extractedValue.indexOf(" OR ")); } - // search for term in the extractedValue and clip - // NOTE: the if the quoted term value contains content that looks like a term "name" then it will be - // treated as a new term. - Matcher termMatcher = termPattern.matcher(extractedValue); - if (termMatcher.find()) { - extractedValue = extractedValue.substring(0, termMatcher.start()); - } + // Note: Removed some code that was intended to remove characters from the beginning of the + // extractedValue that were not part of the term's value. This was causing issues with + // quoted values that contained colons. On review, no use case was found for the need + // to remove and discard characters from the beginning of the extractedValue. The entire + // QueryFormatUtils should contain an actual parser instead of this legacy code. // below code fragment extracts the filter value and try to format for solr query or get display value // &fq = taxon_name:""Cyclophora"+lechriostropha" From 820f40496008f582b7d25e7efff9a6a14574969f Mon Sep 17 00:00:00 2001 From: Adam Collins Date: Mon, 17 Feb 2025 18:14:44 +1000 Subject: [PATCH 2/2] #945 fix queries with values containing colons --- .../au/org/ala/biocache/util/QueryFormatUtils.java | 13 ++++++++----- .../org/ala/biocache/dao/FilterQueryParserTest.java | 1 - .../au/org/ala/biocache/dao/QueryFormatTest.java | 1 + 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/main/java/au/org/ala/biocache/util/QueryFormatUtils.java b/src/main/java/au/org/ala/biocache/util/QueryFormatUtils.java index f4fa720a5..061993409 100644 --- a/src/main/java/au/org/ala/biocache/util/QueryFormatUtils.java +++ b/src/main/java/au/org/ala/biocache/util/QueryFormatUtils.java @@ -1036,11 +1036,14 @@ public String formatString(String text, boolean isQuery) { extractedValue = extractedValue.substring(0, extractedValue.indexOf(" OR ")); } - // Note: Removed some code that was intended to remove characters from the beginning of the - // extractedValue that were not part of the term's value. This was causing issues with - // quoted values that contained colons. On review, no use case was found for the need - // to remove and discard characters from the beginning of the extractedValue. The entire - // QueryFormatUtils should contain an actual parser instead of this legacy code. + // search for term in the extractedValue and clip, this will be after the second unescaped ". + // Note that it already confirmed that the first character is an unescaped double quote. + for (int i = 1; i < extractedValue.length(); i++) { + if (extractedValue.charAt(i) == '"' && extractedValue.charAt(i - 1) != '\\') { + extractedValue = extractedValue.substring(0, i); + break; + } + } // below code fragment extracts the filter value and try to format for solr query or get display value // &fq = taxon_name:""Cyclophora"+lechriostropha" diff --git a/src/test/java/au/org/ala/biocache/dao/FilterQueryParserTest.java b/src/test/java/au/org/ala/biocache/dao/FilterQueryParserTest.java index 920485f31..c3e1c405c 100644 --- a/src/test/java/au/org/ala/biocache/dao/FilterQueryParserTest.java +++ b/src/test/java/au/org/ala/biocache/dao/FilterQueryParserTest.java @@ -33,7 +33,6 @@ import static org.hamcrest.Matchers.containsInAnyOrder; import static org.junit.Assert.*; -//TODO: update hubs, then remove fqs[0].substring(0, fqs[0].indexOf(':')) @RunWith(MockitoJUnitRunner.class) public class FilterQueryParserTest { diff --git a/src/test/java/au/org/ala/biocache/dao/QueryFormatTest.java b/src/test/java/au/org/ala/biocache/dao/QueryFormatTest.java index 7456e0b13..615f925ce 100644 --- a/src/test/java/au/org/ala/biocache/dao/QueryFormatTest.java +++ b/src/test/java/au/org/ala/biocache/dao/QueryFormatTest.java @@ -180,6 +180,7 @@ public static SearchQueryTester[] data() { new SearchQueryTester("spatial_list:dr123", "", "", false), new SearchQueryTester("month:03 month:04", "month:03 month:04", "Month:March Month:April", true), new SearchQueryTester("month:\"03\" month:\"04\"", "month:\"03\" month:\"04\"", "Month:\"March\" Month:\"April\"", true), + new SearchQueryTester("collection:\"ABC:def:123:456\"", "collection:\"ABC\\:def\\:123\\:456\"", "collection:\"ABC:def:123:456\"", true), }; }