From 06592367ce4c00346fbc0ccd39c33aa6fc8430b8 Mon Sep 17 00:00:00 2001 From: Olga Bulat Date: Mon, 15 Apr 2024 12:11:14 +0300 Subject: [PATCH] Show the unstable params and improve collection docs (#4085) Signed-off-by: Olga Bulat --- api/api/constants/search.py | 2 + api/api/docs/audio_docs.py | 8 ++- api/api/docs/base_docs.py | 23 +++++-- api/api/docs/image_docs.py | 9 ++- api/api/serializers/docs.py | 27 +++++--- api/api/serializers/media_serializers.py | 79 +++++++++--------------- 6 files changed, 83 insertions(+), 65 deletions(-) diff --git a/api/api/constants/search.py b/api/api/constants/search.py index 4325c3a96f0..3cb81c652e1 100644 --- a/api/api/constants/search.py +++ b/api/api/constants/search.py @@ -3,3 +3,5 @@ SEARCH_STRATEGIES = ["search", "collection"] SearchStrategy = Literal["search", "collection"] + +COLLECTIONS = ["tag", "source", "creator"] diff --git a/api/api/docs/audio_docs.py b/api/api/docs/audio_docs.py index 06f5d573c3c..ecb27aa082f 100644 --- a/api/api/docs/audio_docs.py +++ b/api/api/docs/audio_docs.py @@ -6,7 +6,9 @@ from drf_spectacular.utils import OpenApiResponse, extend_schema +from api.constants.parameters import COLLECTION, TAG from api.docs.base_docs import ( + NON_FILTER_FIELDS, SEARCH_DESCRIPTION, custom_extend_schema, fields_to_md, @@ -40,11 +42,15 @@ serializer = AudioSearchRequestSerializer(context={"media_type": "audio"}) -audio_filter_fields = fields_to_md([f for f in serializer.field_names if f != "q"]) +audio_filter_fields = fields_to_md( + [f for f in serializer.field_names if f not in NON_FILTER_FIELDS] +) audio_search_description = SEARCH_DESCRIPTION.format( filter_fields=audio_filter_fields, media_type="audio files", + collection_param=COLLECTION, + tag_param=TAG, ) search = custom_extend_schema( diff --git a/api/api/docs/base_docs.py b/api/api/docs/base_docs.py index 96e3f6cee83..bbdad3d9891 100644 --- a/api/api/docs/base_docs.py +++ b/api/api/docs/base_docs.py @@ -15,6 +15,7 @@ ) from api.constants.media_types import MediaType +from api.constants.parameters import COLLECTION, TAG def fields_to_md(field_names): @@ -141,7 +142,7 @@ def build_source_path_parameter(media_type: MediaType): ) SEARCH_DESCRIPTION_DEFAULT = """ -Return audio files that match the query. +Return {media_type} that match the query. This endpoint allows you to search within specific fields, or to retrieve a collection of all {media_type} from a specific source, creator or tag. @@ -169,11 +170,11 @@ def build_source_path_parameter(media_type: MediaType): ### Collection search The collection search allows to retrieve a collection of media from a specific source, -creator or tag. The `collection` parameter is used to specify the type of collection to retrieve. +creator or tag. The `{collection_param}` parameter is used to specify the type of collection to retrieve. -- `collection=tag&tag=tagName` will return the media with tag `tagName`. -- `collection=source&source=sourceName` will return the media from source `sourceName`. -- `collection=creator&creator=creatorName` will return the media by creator `creatorName`. +- `{collection_param}=tag&{tag_param}=tagName` will return the media with tag `tagName`. +- `{collection_param}=source&source=sourceName` will return the media from source `sourceName`. +- `{collection_param}=creator&creator=creatorName&source=sourceName` will return the media by creator `creatorName` at `sourceName`. Collection results are sorted by the time they were added to Openverse, with the most recent additions appearing first. The filters such as `license` are not available for collections. @@ -201,3 +202,15 @@ def build_source_path_parameter(media_type: MediaType): if settings.SHOW_COLLECTION_DOCS else SEARCH_DESCRIPTION_COLLECTIONS_DISABLED ) + +NON_FILTER_FIELDS = [ + "q", + TAG, + COLLECTION, + "page", + "page_size", + "unstable__sort_by", + "unstable__sort_dir", + "unstable__authority", + "unstable__authority_boost", +] diff --git a/api/api/docs/image_docs.py b/api/api/docs/image_docs.py index 5770f02c794..8d3dca24ae8 100644 --- a/api/api/docs/image_docs.py +++ b/api/api/docs/image_docs.py @@ -6,7 +6,9 @@ from drf_spectacular.utils import OpenApiResponse, extend_schema +from api.constants.parameters import COLLECTION, TAG from api.docs.base_docs import ( + NON_FILTER_FIELDS, SEARCH_DESCRIPTION, custom_extend_schema, fields_to_md, @@ -42,11 +44,16 @@ serializer = ImageSearchRequestSerializer(context={"media_type": "image"}) -image_filter_fields = fields_to_md([f for f in serializer.field_names if f != "q"]) +image_filter_fields = fields_to_md( + [f for f in serializer.field_names if f not in NON_FILTER_FIELDS] +) + image_search_description = SEARCH_DESCRIPTION.format( filter_fields=image_filter_fields, media_type="images", + collection_param=COLLECTION, + tag_param=TAG, ) search = custom_extend_schema( diff --git a/api/api/serializers/docs.py b/api/api/serializers/docs.py index 5c333f2b6cd..7913bc63a5d 100644 --- a/api/api/serializers/docs.py +++ b/api/api/serializers/docs.py @@ -1,6 +1,13 @@ from django.conf import settings -from api.constants.parameters import TAG +from api.constants.parameters import COLLECTION, TAG + + +UNSTABLE_WARNING = """ +\n\n_Caution: Parameters prefixed with `unstable__` are experimental and +may change or be removed without notice in future updates. Use them +with caution as they are not covered by our API versioning policy._\n\n +""" CREATOR_COLLECTIONS_DISABLED = """ @@ -10,20 +17,20 @@ the words in the value will match. To search for several values, join them with a comma.""" -CREATOR = """ +CREATOR = f""" _When `q` parameter is present, `creator` parameter is ignored._ **Creator collection** -When used with `collection=creator&source=sourceName`, returns the collection of media +When used with `{COLLECTION}=creator&source=sourceName`, returns the collection of media by the specified creator. Notice that a single creator's media items can be found on several sources, but this collection only returns the items from the specified source. This is why for this collection, both the creator and the source parameters are required, and matched exactly. For a fuzzy creator search, -use the default search without the `collection` parameter. +use the default search without the `{COLLECTION}` parameter. **Creator search** -When used without the `collection` parameter, will search in the creator field only. +When used without the `{COLLECTION}` parameter, will search in the creator field only. The search is fuzzy, so `creator=john` will match any value that includes the word `john`. If the value contains space, items that contain any of the words in the value will match. To search for several values, @@ -34,9 +41,10 @@ CREATOR if settings.SHOW_COLLECTION_DOCS else CREATOR_COLLECTIONS_DISABLED ) COLLECTION_HELP_TEXT = f""" +{UNSTABLE_WARNING} The kind of media collection to return. -Should be used with `{TAG}`, `source` or `creator`+`source`""" +Must be used with `{TAG}`, `source` or `creator`+`source`""" EXCLUDED_SOURCE_HELP_TEXT = """ A comma separated list of data sources to exclude from the search. @@ -48,7 +56,7 @@ SOURCE = """ For default search, a comma separated list of data sources. -When the `collection` parameter is used, this parameter only accepts a single source. +When the `{collection_param}` parameter is used, this parameter only accepts a single source. Valid values are `source_name`s from the stats endpoint: {origin}/v1/{media_path}/stats/. """ @@ -57,8 +65,9 @@ SOURCE if settings.SHOW_COLLECTION_DOCS else SOURCE_HELP_TEXT_COLLECTIONS_DISABLED ) -TAG_HELP_TEXT = """ -_Must be used with `collection=tag`_ +TAG_HELP_TEXT = f""" +{UNSTABLE_WARNING} +_Must be used with `{COLLECTION}=tag`_ Get the collection of media with a specific tag. Returns the collection of media that has the specified tag, matching exactly and entirely. diff --git a/api/api/serializers/media_serializers.py b/api/api/serializers/media_serializers.py index f74be03921b..dc624951af0 100644 --- a/api/api/serializers/media_serializers.py +++ b/api/api/serializers/media_serializers.py @@ -17,6 +17,7 @@ from api.constants.licenses import LICENSE_GROUPS from api.constants.media_types import MediaType from api.constants.parameters import COLLECTION, TAG +from api.constants.search import COLLECTIONS from api.constants.sorting import DESCENDING, RELEVANCE, SORT_DIRECTIONS, SORT_FIELDS from api.controllers import search_controller from api.models.media import AbstractMedia @@ -27,6 +28,7 @@ EXCLUDED_SOURCE_HELP_TEXT, SOURCE_HELP_TEXT, TAG_HELP_TEXT, + UNSTABLE_WARNING, ) from api.serializers.fields import SchemableHyperlinkedIdentityField from api.utils.help_text import make_comma_separated_help_text @@ -97,22 +99,11 @@ def validate_page_size(self, value): return value -EXCLUDED_COLLECTION_REQUEST_FIELDS = ( - [] if settings.SHOW_COLLECTION_DOCS else [COLLECTION, TAG] -) - - @extend_schema_serializer( - # Hide unstable and internal fields from documentation. + # Hide internal fields from documentation. # Also see `field_names` below. exclude_fields=[ - "unstable__sort_by", - "unstable__sort_dir", - "unstable__authority", - "unstable__authority_boost", - "unstable__include_sensitive_results", "internal__index", - *EXCLUDED_COLLECTION_REQUEST_FIELDS, ], ) class MediaSearchRequestSerializer(PaginatedRequestSerializer): @@ -133,28 +124,18 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer): "license_type", "creator", "tags", - # TODO: Uncomment after https://github.com/WordPress/openverse/issues/3919 - # "collection", - # "tag", + COLLECTION, + TAG, "title", "filter_dead", "extension", "mature", - # Excluded unstable fields, also see `exclude_fields` above. - # "unstable__sort_by", - # "unstable__sort_dir", - # "unstable__authority", - # "unstable__authority_boost", - # "unstable__include_sensitive_results", + "unstable__sort_by", + "unstable__sort_dir", + "unstable__authority", + "unstable__authority_boost", + "unstable__include_sensitive_results", ] - # TODO: Remove after https://github.com/WordPress/openverse/issues/3919 - if settings.SHOW_COLLECTION_DOCS: - field_names.extend( - [ - TAG, - COLLECTION, - ] - ) field_names.extend(PaginatedRequestSerializer.field_names) """ Keep the fields names in sync with the actual fields below as this list is @@ -199,6 +180,20 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer): required=False, max_length=200, ) + unstable__collection = serializers.ChoiceField( + source="collection", + label="collection", + choices=COLLECTIONS, + help_text=COLLECTION_HELP_TEXT, + required=False, + ) + unstable__tag = serializers.CharField( + label="tag", + source="tag", + help_text=TAG_HELP_TEXT, + required=False, + max_length=200, + ) license = serializers.CharField( label="licenses", help_text=make_comma_separated_help_text(LICENSE_GROUPS["all"], "licenses"), @@ -236,14 +231,14 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer): # - validators for these fields in ``MediaSearchRequestSerializer`` unstable__sort_by = serializers.ChoiceField( source="sort_by", - help_text="The field which should be the basis for sorting results.", + help_text=f"{UNSTABLE_WARNING}The field which should be the basis for sorting results.", choices=SORT_FIELDS, required=False, default=RELEVANCE, ) unstable__sort_dir = serializers.ChoiceField( source="sort_dir", - help_text="The direction of sorting. Cannot be applied when sorting by " + help_text=f"{UNSTABLE_WARNING}The direction of sorting. Cannot be applied when sorting by " "`relevance`.", choices=SORT_DIRECTIONS, required=False, @@ -251,14 +246,14 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer): ) unstable__authority = serializers.BooleanField( label="authority", - help_text="If enabled, the search will add a boost to results that are " + help_text=f"{UNSTABLE_WARNING}If enabled, the search will add a boost to results that are " "from authoritative sources.", required=False, default=False, ) unstable__authority_boost = serializers.FloatField( label="authority_boost", - help_text="The boost coefficient to apply to authoritative sources, " + help_text=f"{UNSTABLE_WARNING}The boost coefficient to apply to authoritative sources, " "multiplied with the popularity boost.", required=False, default=1.0, @@ -268,26 +263,11 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer): unstable__include_sensitive_results = serializers.BooleanField( source="include_sensitive_results", label="include_sensitive_results", - help_text="Whether to include results considered sensitive.", + help_text=f"{UNSTABLE_WARNING}Whether to include results considered sensitive.", required=False, default=False, ) - unstable__tag = serializers.CharField( - label="tag", - source="tag", - help_text=TAG_HELP_TEXT, - required=False, - max_length=200, - ) - unstable__collection = serializers.ChoiceField( - source="collection", - label="collection", - choices=["tag", "source", "creator"], - help_text=COLLECTION_HELP_TEXT, - required=False, - ) - # The ``internal__`` prefix is used in the query params. # If you rename these fields, update the following references: # - ``field_names`` in ``MediaSearchRequestSerializer`` @@ -317,6 +297,7 @@ def __init__(self, *args, **kwargs): variables = { "origin": settings.CANONICAL_ORIGIN, "media_path": media_path, + "collection_param": COLLECTION, } self.fields["source"].help_text = SOURCE_HELP_TEXT.format(**variables)