diff --git a/api/api/controllers/search_controller.py b/api/api/controllers/search_controller.py index 1a5a4eb62b4..cf5f76a040a 100644 --- a/api/api/controllers/search_controller.py +++ b/api/api/controllers/search_controller.py @@ -2,6 +2,7 @@ import logging import logging as log +import re from math import ceil from typing import TYPE_CHECKING @@ -58,6 +59,13 @@ QUERY_SPECIAL_CHARACTER_ERROR = "Unescaped special characters are not allowed." DEFAULT_BOOST = 10000 DEFAULT_SEARCH_FIELDS = ["title", "description", "tags.name"] +DEFAULT_SQS_FLAGS = "AND|NOT|PHRASE|WHITESPACE" +UNUSED_SQS_FLAGS = [ + ("PRECEDENCE", r"\(.*\)"), + ("ESCAPE", r"\\"), + ("FUZZY|SLOP", r"~\d"), + ("PREFIX", r"\*"), +] def _quote_escape(query_string): @@ -287,8 +295,11 @@ def build_search_query( # individual field-level queries specified. if "q" in search_params.data: query = _quote_escape(search_params.data["q"]) + log_query_features(query, query_name="q") + base_query_kwargs = { "query": query, + "flags": DEFAULT_SQS_FLAGS, "fields": DEFAULT_SEARCH_FIELDS, "default_operator": "AND", } @@ -301,6 +312,7 @@ def build_search_query( quotes_stripped = query.replace('"', "") exact_match_boost = Q( "simple_query_string", + flags=DEFAULT_SQS_FLAGS, fields=["title"], query=f"{quotes_stripped}", boost=10000, @@ -313,9 +325,11 @@ def build_search_query( ("tags", "tags.name"), ]: if field_value := search_params.data.get(field): + log_query_features(field_value, query_name="field") search_queries["must"].append( Q( "simple_query_string", + flags=DEFAULT_SQS_FLAGS, query=_quote_escape(field_value), fields=[field_name], ) @@ -339,6 +353,22 @@ def build_search_query( ) +def log_query_features(query: str, query_name) -> None: + query_flags = [] + for flag, pattern in UNUSED_SQS_FLAGS: + if bool(re.search(pattern, query)): + query_flags.append(flag) + if query_flags: + log.info( + { + "log_message": "Special features present in query", + "query_name": query_name, + "query": query, + "flags": query_flags, + } + ) + + def build_collection_query( search_params: MediaListRequestSerializer, collection_params: dict[str, str], diff --git a/api/test/unit/controllers/test_search_controller_search_query.py b/api/test/unit/controllers/test_search_controller_search_query.py index c327a8223ed..9b574cd680f 100644 --- a/api/test/unit/controllers/test_search_controller_search_query.py +++ b/api/test/unit/controllers/test_search_controller_search_query.py @@ -5,6 +5,7 @@ from api.controllers import search_controller from api.controllers.search_controller import ( + DEFAULT_SQS_FLAGS, FILTERED_PROVIDERS_CACHE_KEY, FILTERED_PROVIDERS_CACHE_VERSION, ) @@ -71,6 +72,7 @@ def test_create_search_query_q_search_no_filters(media_type_config): "default_operator": "AND", "fields": ["title", "description", "tags.name"], "query": "cat", + "flags": DEFAULT_SQS_FLAGS, } } ], @@ -80,6 +82,7 @@ def test_create_search_query_q_search_no_filters(media_type_config): "boost": 10000, "fields": ["title"], "query": "cat", + "flags": DEFAULT_SQS_FLAGS, } }, {"rank_feature": {"boost": 10000, "field": "standardized_popularity"}}, @@ -87,7 +90,7 @@ def test_create_search_query_q_search_no_filters(media_type_config): } -def test_create_search_query_q_search_with_quotes_adds_exact_suffix(media_type_config): +def test_create_search_query_q_search_with_quotes_adds_raw_suffix(media_type_config): serializer = media_type_config.search_request_serializer( data={"q": '"The cutest cat"'} ) @@ -104,6 +107,7 @@ def test_create_search_query_q_search_with_quotes_adds_exact_suffix(media_type_c "fields": ["title", "description", "tags.name"], "query": '"The cutest cat"', "quote_field_suffix": ".raw", + "flags": DEFAULT_SQS_FLAGS, } } ], @@ -113,6 +117,7 @@ def test_create_search_query_q_search_with_quotes_adds_exact_suffix(media_type_c "boost": 10000, "fields": ["title"], "query": "The cutest cat", + "flags": DEFAULT_SQS_FLAGS, } }, {"rank_feature": {"boost": 10000, "field": "standardized_popularity"}}, @@ -152,6 +157,7 @@ def test_create_search_query_q_search_with_filters(image_media_type_config): "default_operator": "AND", "fields": ["title", "description", "tags.name"], "query": "cat", + "flags": DEFAULT_SQS_FLAGS, } } ], @@ -161,6 +167,7 @@ def test_create_search_query_q_search_with_filters(image_media_type_config): "boost": 10000, "fields": ["title"], "query": "cat", + "flags": DEFAULT_SQS_FLAGS, } }, {"rank_feature": {"boost": 10000, "field": "standardized_popularity"}}, @@ -188,10 +195,23 @@ def test_create_search_query_non_q_query(image_media_type_config): "simple_query_string": { "fields": ["creator"], "query": "Artist From Openverse", + "flags": DEFAULT_SQS_FLAGS, + } + }, + { + "simple_query_string": { + "fields": ["title"], + "query": "kitten🐱", + "flags": DEFAULT_SQS_FLAGS, + } + }, + { + "simple_query_string": { + "fields": ["tags.name"], + "query": "cute", + "flags": DEFAULT_SQS_FLAGS, } }, - {"simple_query_string": {"fields": ["title"], "query": "kitten🐱"}}, - {"simple_query_string": {"fields": ["tags.name"], "query": "cute"}}, ], "should": [ {"rank_feature": {"boost": 10000, "field": "standardized_popularity"}}, diff --git a/frontend/src/locales/scripts/en.json5 b/frontend/src/locales/scripts/en.json5 index deaf2a6b816..494d33b9f9c 100644 --- a/frontend/src/locales/scripts/en.json5 +++ b/frontend/src/locales/scripts/en.json5 @@ -177,70 +177,14 @@ title: "Search for an exact match", ariaLabel: "quote unquote Claude Monet", claudeMonet: '"Claude Monet"', - content: "Put a word or phrase inside quotes. For example, {link}.", - }, - combine: { - title: "Combining terms", - description: "If you want to combine terms, you can use the following operators to perform more complex queries", - and: "{symbol} signifies AND operation", - or: "{symbol} signifies OR operation", - not: "{symbol} negates a single token", - prefix: "{symbol} at the end of a term signifies a prefix query", - precedence: "{symbol} signify precedence", - fuzziness: "{symbol} after a word signifies edit distance (fuzziness)", - ariaLabels: { - fuzziness: "tilde N", - open: "open parenthesis", - close: "close parenthesis", - star: "star symbol", - not: "minus symbol", - and: "plus symbol", - or: "vertical bar symbol", - prefix: "asterisk symbol", - verticalBar: "vertical bar symbol", - precedence: "parentheses", - }, - }, - example: { - and: { - description: "Example: {link}{br} This will search for images related to both dog and cat.", - ariaLabel: "dog plus cat", - example: "dog+cat", - }, - or: { - description: "Example: {link}{br} This will search for images related to dog or cat, but not necessarily both.", - ariaLabel: "dog vertical bar cat", - example: "dog|cat", - }, - negate: { - description: "You can use the {operator} to exclude a search term from the results.", - operatorName: "operator (signifies NOT)", - operatorAriaLabel: "minus operator (signifies NOT)", - ariaLabel: "dog minus pug", - example: "dog -pug", - content: "Example: {link}{br} This will search for images related to dog but won't include results related to 'pug'", - }, - prefix: { - description: "You can use the {operatorName} to mark a prefix term. This will match anything after the *.", - operatorName: "operator (wildcard)", - operatorAriaLabel: "star operator (wildcard)", - ariaLabel: "net star symbol", - example: "net*", - content: "Example: {link}{br} This will search for images matching anything with 'net'. This might include 'network', 'Netflix', 'Netherlands', etc.", - }, - precedence: { - description: "You can use parentheses {highlight} to specify precedence of terms or combine more complex queries.", - ariaLabel: "dogs plus open parenthesis corgis vertical bar labrador close parenthesis", - example: "dogs + (corgis | labrador)", - content: "Example: {link}{br} This will search for images that match dogs that are either corgis or labrador.", - }, - fuzziness: { - description: "You can use {highlight} to specify some fuzzy logic to the term according to the {link} — the number of one character changes that need to be made to one string to make it the same as another string.", - linkText: "Levenshtein Edit Distance", - ariaLabel: "theatre tilde 1", - example: "theatre~1", - content: "Example: {link}{br} This will search for images that match strings close to the term 'theatre' with a difference of one character. Results might include terms with different spellings like 'theater'.", - }, + content: "To search for an exact word or phrase, put it inside quotes. For example, {link}.", + }, + negate: { + title: "Excluding terms", + operatorName: "minus operator", + ariaLabel: "dog minus pug", + example: "dog -pug", + content: 'To exclude a term from your results, put the {operator} in front of it. Example: {link}{br} This will search for media related to "dog" but won\'t include results related to "pug".', }, }, feedback: { diff --git a/frontend/src/pages/search-help.vue b/frontend/src/pages/search-help.vue index e013d1ff2ed..8b43607ee31 100644 --- a/frontend/src/pages/search-help.vue +++ b/frontend/src/pages/search-help.vue @@ -15,145 +15,28 @@ -
{{ $t("searchGuide.combine.description") }}
-{{ operator.symbol }}
-
-