Skip to content

Commit

Permalink
Show the unstable params and improve collection docs (#4085)
Browse files Browse the repository at this point in the history
Signed-off-by: Olga Bulat <obulat@gmail.com>
  • Loading branch information
obulat authored Apr 15, 2024
1 parent 63af1af commit 0659236
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 65 deletions.
2 changes: 2 additions & 0 deletions api/api/constants/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@

SEARCH_STRATEGIES = ["search", "collection"]
SearchStrategy = Literal["search", "collection"]

COLLECTIONS = ["tag", "source", "creator"]
8 changes: 7 additions & 1 deletion api/api/docs/audio_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

from drf_spectacular.utils import OpenApiResponse, extend_schema

from api.constants.parameters import COLLECTION, TAG
from api.docs.base_docs import (
NON_FILTER_FIELDS,
SEARCH_DESCRIPTION,
custom_extend_schema,
fields_to_md,
Expand Down Expand Up @@ -40,11 +42,15 @@


serializer = AudioSearchRequestSerializer(context={"media_type": "audio"})
audio_filter_fields = fields_to_md([f for f in serializer.field_names if f != "q"])
audio_filter_fields = fields_to_md(
[f for f in serializer.field_names if f not in NON_FILTER_FIELDS]
)

audio_search_description = SEARCH_DESCRIPTION.format(
filter_fields=audio_filter_fields,
media_type="audio files",
collection_param=COLLECTION,
tag_param=TAG,
)

search = custom_extend_schema(
Expand Down
23 changes: 18 additions & 5 deletions api/api/docs/base_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
)

from api.constants.media_types import MediaType
from api.constants.parameters import COLLECTION, TAG


def fields_to_md(field_names):
Expand Down Expand Up @@ -141,7 +142,7 @@ def build_source_path_parameter(media_type: MediaType):
)

SEARCH_DESCRIPTION_DEFAULT = """
Return audio files that match the query.
Return {media_type} that match the query.
This endpoint allows you to search within specific fields, or to retrieve
a collection of all {media_type} from a specific source, creator or tag.
Expand Down Expand Up @@ -169,11 +170,11 @@ def build_source_path_parameter(media_type: MediaType):
### Collection search
The collection search allows to retrieve a collection of media from a specific source,
creator or tag. The `collection` parameter is used to specify the type of collection to retrieve.
creator or tag. The `{collection_param}` parameter is used to specify the type of collection to retrieve.
- `collection=tag&tag=tagName` will return the media with tag `tagName`.
- `collection=source&source=sourceName` will return the media from source `sourceName`.
- `collection=creator&creator=creatorName` will return the media by creator `creatorName`.
- `{collection_param}=tag&{tag_param}=tagName` will return the media with tag `tagName`.
- `{collection_param}=source&source=sourceName` will return the media from source `sourceName`.
- `{collection_param}=creator&creator=creatorName&source=sourceName` will return the media by creator `creatorName` at `sourceName`.
Collection results are sorted by the time they were added to Openverse, with the most recent
additions appearing first. The filters such as `license` are not available for collections.
Expand Down Expand Up @@ -201,3 +202,15 @@ def build_source_path_parameter(media_type: MediaType):
if settings.SHOW_COLLECTION_DOCS
else SEARCH_DESCRIPTION_COLLECTIONS_DISABLED
)

NON_FILTER_FIELDS = [
"q",
TAG,
COLLECTION,
"page",
"page_size",
"unstable__sort_by",
"unstable__sort_dir",
"unstable__authority",
"unstable__authority_boost",
]
9 changes: 8 additions & 1 deletion api/api/docs/image_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

from drf_spectacular.utils import OpenApiResponse, extend_schema

from api.constants.parameters import COLLECTION, TAG
from api.docs.base_docs import (
NON_FILTER_FIELDS,
SEARCH_DESCRIPTION,
custom_extend_schema,
fields_to_md,
Expand Down Expand Up @@ -42,11 +44,16 @@


serializer = ImageSearchRequestSerializer(context={"media_type": "image"})
image_filter_fields = fields_to_md([f for f in serializer.field_names if f != "q"])
image_filter_fields = fields_to_md(
[f for f in serializer.field_names if f not in NON_FILTER_FIELDS]
)


image_search_description = SEARCH_DESCRIPTION.format(
filter_fields=image_filter_fields,
media_type="images",
collection_param=COLLECTION,
tag_param=TAG,
)

search = custom_extend_schema(
Expand Down
27 changes: 18 additions & 9 deletions api/api/serializers/docs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from django.conf import settings

from api.constants.parameters import TAG
from api.constants.parameters import COLLECTION, TAG


UNSTABLE_WARNING = """
\n\n_Caution: Parameters prefixed with `unstable__` are experimental and
may change or be removed without notice in future updates. Use them
with caution as they are not covered by our API versioning policy._\n\n
"""


CREATOR_COLLECTIONS_DISABLED = """
Expand All @@ -10,20 +17,20 @@
the words in the value will match. To search for several values,
join them with a comma."""

CREATOR = """
CREATOR = f"""
_When `q` parameter is present, `creator` parameter is ignored._
**Creator collection**
When used with `collection=creator&source=sourceName`, returns the collection of media
When used with `{COLLECTION}=creator&source=sourceName`, returns the collection of media
by the specified creator. Notice that a single creator's media items
can be found on several sources, but this collection only returns the
items from the specified source.
This is why for this collection, both the creator and the source
parameters are required, and matched exactly. For a fuzzy creator search,
use the default search without the `collection` parameter.
use the default search without the `{COLLECTION}` parameter.
**Creator search**
When used without the `collection` parameter, will search in the creator field only.
When used without the `{COLLECTION}` parameter, will search in the creator field only.
The search is fuzzy, so `creator=john` will match any value that includes the
word `john`. If the value contains space, items that contain any of
the words in the value will match. To search for several values,
Expand All @@ -34,9 +41,10 @@
CREATOR if settings.SHOW_COLLECTION_DOCS else CREATOR_COLLECTIONS_DISABLED
)
COLLECTION_HELP_TEXT = f"""
{UNSTABLE_WARNING}
The kind of media collection to return.
Should be used with `{TAG}`, `source` or `creator`+`source`"""
Must be used with `{TAG}`, `source` or `creator`+`source`"""

EXCLUDED_SOURCE_HELP_TEXT = """
A comma separated list of data sources to exclude from the search.
Expand All @@ -48,7 +56,7 @@

SOURCE = """
For default search, a comma separated list of data sources.
When the `collection` parameter is used, this parameter only accepts a single source.
When the `{collection_param}` parameter is used, this parameter only accepts a single source.
Valid values are `source_name`s from the stats endpoint: {origin}/v1/{media_path}/stats/.
"""
Expand All @@ -57,8 +65,9 @@
SOURCE if settings.SHOW_COLLECTION_DOCS else SOURCE_HELP_TEXT_COLLECTIONS_DISABLED
)

TAG_HELP_TEXT = """
_Must be used with `collection=tag`_
TAG_HELP_TEXT = f"""
{UNSTABLE_WARNING}
_Must be used with `{COLLECTION}=tag`_
Get the collection of media with a specific tag. Returns the collection of media
that has the specified tag, matching exactly and entirely.
Expand Down
79 changes: 30 additions & 49 deletions api/api/serializers/media_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from api.constants.licenses import LICENSE_GROUPS
from api.constants.media_types import MediaType
from api.constants.parameters import COLLECTION, TAG
from api.constants.search import COLLECTIONS
from api.constants.sorting import DESCENDING, RELEVANCE, SORT_DIRECTIONS, SORT_FIELDS
from api.controllers import search_controller
from api.models.media import AbstractMedia
Expand All @@ -27,6 +28,7 @@
EXCLUDED_SOURCE_HELP_TEXT,
SOURCE_HELP_TEXT,
TAG_HELP_TEXT,
UNSTABLE_WARNING,
)
from api.serializers.fields import SchemableHyperlinkedIdentityField
from api.utils.help_text import make_comma_separated_help_text
Expand Down Expand Up @@ -97,22 +99,11 @@ def validate_page_size(self, value):
return value


EXCLUDED_COLLECTION_REQUEST_FIELDS = (
[] if settings.SHOW_COLLECTION_DOCS else [COLLECTION, TAG]
)


@extend_schema_serializer(
# Hide unstable and internal fields from documentation.
# Hide internal fields from documentation.
# Also see `field_names` below.
exclude_fields=[
"unstable__sort_by",
"unstable__sort_dir",
"unstable__authority",
"unstable__authority_boost",
"unstable__include_sensitive_results",
"internal__index",
*EXCLUDED_COLLECTION_REQUEST_FIELDS,
],
)
class MediaSearchRequestSerializer(PaginatedRequestSerializer):
Expand All @@ -133,28 +124,18 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer):
"license_type",
"creator",
"tags",
# TODO: Uncomment after https://github.com/WordPress/openverse/issues/3919
# "collection",
# "tag",
COLLECTION,
TAG,
"title",
"filter_dead",
"extension",
"mature",
# Excluded unstable fields, also see `exclude_fields` above.
# "unstable__sort_by",
# "unstable__sort_dir",
# "unstable__authority",
# "unstable__authority_boost",
# "unstable__include_sensitive_results",
"unstable__sort_by",
"unstable__sort_dir",
"unstable__authority",
"unstable__authority_boost",
"unstable__include_sensitive_results",
]
# TODO: Remove after https://github.com/WordPress/openverse/issues/3919
if settings.SHOW_COLLECTION_DOCS:
field_names.extend(
[
TAG,
COLLECTION,
]
)
field_names.extend(PaginatedRequestSerializer.field_names)
"""
Keep the fields names in sync with the actual fields below as this list is
Expand Down Expand Up @@ -199,6 +180,20 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer):
required=False,
max_length=200,
)
unstable__collection = serializers.ChoiceField(
source="collection",
label="collection",
choices=COLLECTIONS,
help_text=COLLECTION_HELP_TEXT,
required=False,
)
unstable__tag = serializers.CharField(
label="tag",
source="tag",
help_text=TAG_HELP_TEXT,
required=False,
max_length=200,
)
license = serializers.CharField(
label="licenses",
help_text=make_comma_separated_help_text(LICENSE_GROUPS["all"], "licenses"),
Expand Down Expand Up @@ -236,29 +231,29 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer):
# - validators for these fields in ``MediaSearchRequestSerializer``
unstable__sort_by = serializers.ChoiceField(
source="sort_by",
help_text="The field which should be the basis for sorting results.",
help_text=f"{UNSTABLE_WARNING}The field which should be the basis for sorting results.",
choices=SORT_FIELDS,
required=False,
default=RELEVANCE,
)
unstable__sort_dir = serializers.ChoiceField(
source="sort_dir",
help_text="The direction of sorting. Cannot be applied when sorting by "
help_text=f"{UNSTABLE_WARNING}The direction of sorting. Cannot be applied when sorting by "
"`relevance`.",
choices=SORT_DIRECTIONS,
required=False,
default=DESCENDING,
)
unstable__authority = serializers.BooleanField(
label="authority",
help_text="If enabled, the search will add a boost to results that are "
help_text=f"{UNSTABLE_WARNING}If enabled, the search will add a boost to results that are "
"from authoritative sources.",
required=False,
default=False,
)
unstable__authority_boost = serializers.FloatField(
label="authority_boost",
help_text="The boost coefficient to apply to authoritative sources, "
help_text=f"{UNSTABLE_WARNING}The boost coefficient to apply to authoritative sources, "
"multiplied with the popularity boost.",
required=False,
default=1.0,
Expand All @@ -268,26 +263,11 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer):
unstable__include_sensitive_results = serializers.BooleanField(
source="include_sensitive_results",
label="include_sensitive_results",
help_text="Whether to include results considered sensitive.",
help_text=f"{UNSTABLE_WARNING}Whether to include results considered sensitive.",
required=False,
default=False,
)

unstable__tag = serializers.CharField(
label="tag",
source="tag",
help_text=TAG_HELP_TEXT,
required=False,
max_length=200,
)
unstable__collection = serializers.ChoiceField(
source="collection",
label="collection",
choices=["tag", "source", "creator"],
help_text=COLLECTION_HELP_TEXT,
required=False,
)

# The ``internal__`` prefix is used in the query params.
# If you rename these fields, update the following references:
# - ``field_names`` in ``MediaSearchRequestSerializer``
Expand Down Expand Up @@ -317,6 +297,7 @@ def __init__(self, *args, **kwargs):
variables = {
"origin": settings.CANONICAL_ORIGIN,
"media_path": media_path,
"collection_param": COLLECTION,
}

self.fields["source"].help_text = SOURCE_HELP_TEXT.format(**variables)
Expand Down

0 comments on commit 0659236

Please sign in to comment.