From 055ac4cf5027ac9be32114006b03086d16423e90 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Thu, 5 Oct 2023 09:53:32 +0400 Subject: [PATCH 01/15] Allow setting number of shards per media type --- .../ingestion_server/es_mapping.py | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/ingestion_server/ingestion_server/es_mapping.py b/ingestion_server/ingestion_server/es_mapping.py index b780088a8e7..ec6cfdf0881 100644 --- a/ingestion_server/ingestion_server/es_mapping.py +++ b/ingestion_server/ingestion_server/es_mapping.py @@ -1,13 +1,28 @@ -def index_settings(table_name): +from ingestion_server.constants.media_types import ( + AUDIO_TYPE, + IMAGE_TYPE, + MODEL_3D_TYPE, + MediaType, +) + + +def index_settings(media_type: MediaType): """ Return the Elasticsearch mapping for a given table in the database. - :param table_name: The name of the table in the upstream database. - :return: + :param media_type: The name of the table in the upstream database. + :return: the settings for the ES mapping """ + + number_of_shards: dict[MediaType, int] = { + IMAGE_TYPE: 18, + AUDIO_TYPE: 1, + MODEL_3D_TYPE: 1, + } + settings = { "index": { - "number_of_shards": 18, + "number_of_shards": number_of_shards[media_type], "number_of_replicas": 0, "refresh_interval": "-1", }, @@ -154,6 +169,6 @@ def index_settings(table_name): }, } media_mappings = common_mappings.copy() - media_mappings["properties"].update(media_properties[table_name]) + media_mappings["properties"].update(media_properties[media_type]) result = {"settings": settings.copy(), "mappings": media_mappings} return result From 81912f4fe01c627b6a90c71be10f24d310a5672b Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Thu, 5 Oct 2023 10:25:13 +0400 Subject: [PATCH 02/15] Simplify and organise index properties --- .../ingestion_server/es_mapping.py | 93 +++++++------------ 1 file changed, 34 insertions(+), 59 deletions(-) diff --git a/ingestion_server/ingestion_server/es_mapping.py b/ingestion_server/ingestion_server/es_mapping.py index ec6cfdf0881..c0ab485785b 100644 --- a/ingestion_server/ingestion_server/es_mapping.py +++ b/ingestion_server/ingestion_server/es_mapping.py @@ -66,105 +66,80 @@ def index_settings(media_type: MediaType): }, } common_mappings = { + "dynamic": False, # extra fields are stored in ``_source`` but not indexed "properties": { "id": {"type": "long"}, - "identifier": { - "fields": {"keyword": {"type": "keyword", "ignore_above": 256}}, - "type": "text", - }, + "created_on": {"type": "date"}, + "mature": {"type": "boolean"}, + # Keyword fields + "identifier": {"type": "keyword"}, + "extension": {"type": "keyword"}, + "license": {"type": "keyword"}, + "provider": {"type": "keyword"}, + "source": {"type": "keyword"}, + "filetype": {"type": "keyword"}, + "category": {"type": "keyword"}, + # Text-based fields "title": { "type": "text", + "analyzer": "custom_english", "similarity": "boolean", "fields": { "keyword": {"type": "keyword", "ignore_above": 256}, "raw": {"type": "text", "index": True}, }, - "analyzer": "custom_english", - }, - "foreign_landing_url": { - "fields": {"keyword": {"ignore_above": 256, "type": "keyword"}}, - "type": "text", }, "description": { + "type": "text", + "analyzer": "custom_english", + "similarity": "boolean", "fields": { - "keyword": {"type": "keyword", "similarity": "boolean"}, + "keyword": {"type": "keyword", "ignore_above": 256}, "raw": {"type": "text", "index": True}, }, - "type": "text", - "analyzer": "custom_english", }, "creator": { "type": "text", "fields": {"keyword": {"type": "keyword", "ignore_above": 256}}, }, - "url": { - "fields": {"keyword": {"type": "keyword", "ignore_above": 256}}, - "type": "text", - }, - "extension": { - "fields": {"keyword": {"ignore_above": 8, "type": "keyword"}}, - "type": "text", - }, - "license": { - "fields": {"keyword": {"ignore_above": 256, "type": "keyword"}}, - "type": "text", - }, - "license_version": { - "type": "text", - "fields": {"keyword": {"type": "keyword", "ignore_above": 256}}, - }, - "license_url": { - "fields": {"keyword": {"type": "keyword", "ignore_above": 256}}, - "type": "text", - }, - "provider": { - "type": "text", - "fields": {"keyword": {"type": "keyword", "ignore_above": 256}}, - }, - "source": { - "fields": {"keyword": {"ignore_above": 256, "type": "keyword"}}, - "type": "text", + # Rank feature fields + "standardized_popularity": {"type": "rank_feature"}, + "authority_boost": {"type": "rank_feature"}, + "authority_penalty": { + "type": "rank_feature", + "positive_score_impact": False, }, - "filetype": {"type": "keyword"}, - "created_on": {"type": "date"}, + "max_boost": {"type": "rank_feature"}, + "min_boost": {"type": "rank_feature"}, + # Nested fields "tags": { "properties": { "accuracy": {"type": "float"}, + # Text-based fields "name": { "type": "text", + "analyzer": "custom_english", "fields": { "keyword": {"type": "keyword", "ignore_above": 256}, "raw": {"type": "text", "index": True}, }, - "analyzer": "custom_english", }, } }, - "mature": {"type": "boolean"}, - "standardized_popularity": {"type": "rank_feature"}, - "authority_boost": {"type": "rank_feature"}, - "authority_penalty": { - "type": "rank_feature", - "positive_score_impact": False, - }, - "max_boost": {"type": "rank_feature"}, - "min_boost": {"type": "rank_feature"}, - "category": {"type": "keyword"}, - } + }, } media_properties = { "image": { - "aspect_ratio": { - "fields": {"keyword": {"type": "keyword"}}, - "type": "text", - }, - "size": {"fields": {"keyword": {"type": "keyword"}}, "type": "text"}, + # Keyword fields + "aspect_ratio": {"type": "keyword"}, + "size": {"type": "keyword"}, }, "audio": { "bit_rate": {"type": "integer"}, "sample_rate": {"type": "integer"}, - "genres": {"fields": {"keyword": {"type": "keyword"}}, "type": "text"}, "duration": {"type": "integer"}, + # Keyword fields + "genres": {"type": "keyword"}, "length": {"type": "keyword"}, }, } From df51d1097e5603d824c9e450f764260f3ed7a3af Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Thu, 5 Oct 2023 14:03:58 +0400 Subject: [PATCH 03/15] Remove fields not supported by search request serializer --- ingestion_server/ingestion_server/es_mapping.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ingestion_server/ingestion_server/es_mapping.py b/ingestion_server/ingestion_server/es_mapping.py index c0ab485785b..a2fcb08cd08 100644 --- a/ingestion_server/ingestion_server/es_mapping.py +++ b/ingestion_server/ingestion_server/es_mapping.py @@ -135,11 +135,7 @@ def index_settings(media_type: MediaType): "size": {"type": "keyword"}, }, "audio": { - "bit_rate": {"type": "integer"}, - "sample_rate": {"type": "integer"}, - "duration": {"type": "integer"}, # Keyword fields - "genres": {"type": "keyword"}, "length": {"type": "keyword"}, }, } From 2e9da514643499ed3dc4bdec9b6487715e8a2e20 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Thu, 5 Oct 2023 14:05:54 +0400 Subject: [PATCH 04/15] Update field mapping in search controller --- api/api/controllers/search_controller.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/api/api/controllers/search_controller.py b/api/api/controllers/search_controller.py index f0cf402e105..a624afc8299 100644 --- a/api/api/controllers/search_controller.py +++ b/api/api/controllers/search_controller.py @@ -349,12 +349,14 @@ def search( ("extension", None), ("category", None), ("categories", "category"), + ("source", None), + ("license", None), + ("license_type", "license"), + # Audio-specific filters ("length", None), + # Image-specific filters ("aspect_ratio", None), ("size", None), - ("source", None), - ("license", "license__keyword"), - ("license_type", "license__keyword"), ] for serializer_field, es_field in filters: if serializer_field in search_params.data: From 1d276913f96db981885c845315ce5879516751bc Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Thu, 5 Oct 2023 14:12:19 +0400 Subject: [PATCH 05/15] Remove `.keyword` from source --- api/api/controllers/search_controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/api/controllers/search_controller.py b/api/api/controllers/search_controller.py index a624afc8299..3efb8c7decf 100644 --- a/api/api/controllers/search_controller.py +++ b/api/api/controllers/search_controller.py @@ -561,7 +561,7 @@ def get_sources(index): aggs = { "unique_sources": { "terms": { - "field": "source.keyword", + "field": "source", "size": size, "order": {"_key": "desc"}, } From 560fa69855df5ec7bb87684178737185b03ab1a4 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Thu, 5 Oct 2023 14:23:22 +0400 Subject: [PATCH 06/15] Remove `.keyword` from identifier --- api/api/utils/search_context.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/api/api/utils/search_context.py b/api/api/utils/search_context.py index d1505c476d8..9beccee6076 100644 --- a/api/api/utils/search_context.py +++ b/api/api/utils/search_context.py @@ -35,12 +35,11 @@ def build( # Use `identifier` rather than the document `id` due to # `id` instability between refreshes: # https://github.com/WordPress/openverse/issues/2306 - # `identifier` is mapped as `text` which will match fuzzily. - # Use `identifier.keyword` to match _exactly_ + # `identifier` is mapped as `keyword` which will match exactly. # cf: https://github.com/WordPress/openverse/issues/2154 Q( "terms", - **{"identifier.keyword": all_result_identifiers}, + **{"identifier": all_result_identifiers}, ) ) From 4aa9fb197fcaac73870d942417091ab920c68c65 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Thu, 5 Oct 2023 15:31:35 +0400 Subject: [PATCH 07/15] Remove keyword field from description --- ingestion_server/ingestion_server/es_mapping.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ingestion_server/ingestion_server/es_mapping.py b/ingestion_server/ingestion_server/es_mapping.py index a2fcb08cd08..13f25c29541 100644 --- a/ingestion_server/ingestion_server/es_mapping.py +++ b/ingestion_server/ingestion_server/es_mapping.py @@ -93,10 +93,7 @@ def index_settings(media_type: MediaType): "type": "text", "analyzer": "custom_english", "similarity": "boolean", - "fields": { - "keyword": {"type": "keyword", "ignore_above": 256}, - "raw": {"type": "text", "index": True}, - }, + "fields": {"raw": {"type": "text", "index": True}}, }, "creator": { "type": "text", From 997324fc78291653d8b2dff31b822c40ec924ab2 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Thu, 5 Oct 2023 15:35:12 +0400 Subject: [PATCH 08/15] Delete unwanted entry from `number_of_shards` --- ingestion_server/ingestion_server/es_mapping.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ingestion_server/ingestion_server/es_mapping.py b/ingestion_server/ingestion_server/es_mapping.py index 13f25c29541..d7ec3272f7b 100644 --- a/ingestion_server/ingestion_server/es_mapping.py +++ b/ingestion_server/ingestion_server/es_mapping.py @@ -1,9 +1,4 @@ -from ingestion_server.constants.media_types import ( - AUDIO_TYPE, - IMAGE_TYPE, - MODEL_3D_TYPE, - MediaType, -) +from ingestion_server.constants.media_types import AUDIO_TYPE, IMAGE_TYPE, MediaType def index_settings(media_type: MediaType): @@ -17,7 +12,6 @@ def index_settings(media_type: MediaType): number_of_shards: dict[MediaType, int] = { IMAGE_TYPE: 18, AUDIO_TYPE: 1, - MODEL_3D_TYPE: 1, } settings = { From b0c0df97c5e27a6f11b4ec0164ce9e0b30fbc591 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Fri, 6 Oct 2023 12:48:47 +0400 Subject: [PATCH 09/15] Remove extraneous fields --- ingestion_server/ingestion_server/elasticsearch_models.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ingestion_server/ingestion_server/elasticsearch_models.py b/ingestion_server/ingestion_server/elasticsearch_models.py index 0397830f639..83a0513f06b 100644 --- a/ingestion_server/ingestion_server/elasticsearch_models.py +++ b/ingestion_server/ingestion_server/elasticsearch_models.py @@ -245,7 +245,6 @@ def database_row_to_elasticsearch_doc(row, schema): popularity = attrs["standardized_popularity"] return Image( - thumbnail=row[schema["thumbnail"]], aspect_ratio=aspect_ratio, extension=extension, size=size, @@ -330,10 +329,6 @@ def database_row_to_elasticsearch_doc(row, schema): length = Audio.get_length(row[schema["duration"]]) return Audio( - bit_rate=row[schema["bit_rate"]], - sample_rate=row[schema["sample_rate"]], - genres=row[schema["genres"]], - duration=row[schema["duration"]], length=length, filetype=filetype, extension=extension, From 020350226db682114ff32ec7a6c914aebd9282a4 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Fri, 6 Oct 2023 12:54:29 +0400 Subject: [PATCH 10/15] Move boosts to parent `Media` class --- .../ingestion_server/elasticsearch_models.py | 29 +++++-------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/ingestion_server/ingestion_server/elasticsearch_models.py b/ingestion_server/ingestion_server/elasticsearch_models.py index 83a0513f06b..16279e5d6a8 100644 --- a/ingestion_server/ingestion_server/elasticsearch_models.py +++ b/ingestion_server/ingestion_server/elasticsearch_models.py @@ -97,6 +97,9 @@ def get_instance_attrs(row, schema): # cleanup tests in CI: test/unit_tests/test_cleanup.py category = row[schema["category"]] if "category" in schema else None + provider = row[schema["provider"]] + authority_boost = Media.get_authority_boost(meta, provider) + return { "_id": row[schema["id"]], "id": row[schema["id"]], @@ -110,13 +113,16 @@ def get_instance_attrs(row, schema): "license": row[schema["license"]].lower(), "license_version": row[schema["license_version"]], "license_url": Media.get_license_url(meta), - "provider": row[schema["provider"]], + "provider": provider, "source": row[schema["source"]], "category": category, "created_on": row[schema["created_on"]], "tags": Media.parse_detailed_tags(row[schema["tags"]]), "mature": Media.get_maturity(meta, row[schema["mature"]]), "standardized_popularity": popularity, + "authority_boost": authority_boost, + "max_boost": max(popularity or 1, authority_boost or 1), + "min_boost": min(popularity or 1, authority_boost or 1), } @staticmethod @@ -230,27 +236,16 @@ class Index: @staticmethod def database_row_to_elasticsearch_doc(row, schema): extension = Image.get_extension(row[schema["url"]]) - height = row[schema["height"]] width = row[schema["width"]] aspect_ratio = Image.get_aspect_ratio(height, width) size = Image.get_size(height, width) - - meta = row[schema["meta_data"]] - provider = row[schema["provider"]] - authority_boost = Image.get_authority_boost(meta, provider) - attrs = Image.get_instance_attrs(row, schema) - attrs["category"] = attrs["category"] - popularity = attrs["standardized_popularity"] return Image( aspect_ratio=aspect_ratio, extension=extension, size=size, - authority_boost=authority_boost, - max_boost=max(popularity or 1, authority_boost or 1), - min_boost=min(popularity or 1, authority_boost or 1), **attrs, ) @@ -318,23 +313,13 @@ def database_row_to_elasticsearch_doc(row, schema): alt_files = row[schema["alt_files"]] filetype = row[schema["filetype"]] extension = Audio.get_extensions(filetype, alt_files) - - meta = row[schema["meta_data"]] - provider = row[schema["provider"]] - authority_boost = Audio.get_authority_boost(meta, provider) - attrs = Audio.get_instance_attrs(row, schema) - popularity = attrs["standardized_popularity"] - length = Audio.get_length(row[schema["duration"]]) return Audio( length=length, filetype=filetype, extension=extension, - authority_boost=authority_boost, - max_boost=max(popularity or 1, authority_boost or 1), - min_boost=min(popularity or 1, authority_boost or 1), **attrs, ) From 3b5d1d2a8202c2c4a574cb1450e3131227bdba63 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Fri, 6 Oct 2023 14:38:38 +0400 Subject: [PATCH 11/15] Remove unused fields and sort as per `es_mapping.py` --- .../ingestion_server/elasticsearch_models.py | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/ingestion_server/ingestion_server/elasticsearch_models.py b/ingestion_server/ingestion_server/elasticsearch_models.py index 16279e5d6a8..4e3b33e24bc 100644 --- a/ingestion_server/ingestion_server/elasticsearch_models.py +++ b/ingestion_server/ingestion_server/elasticsearch_models.py @@ -100,29 +100,31 @@ def get_instance_attrs(row, schema): provider = row[schema["provider"]] authority_boost = Media.get_authority_boost(meta, provider) + # This matches the order of fields defined in ``es_mapping.py``. return { "_id": row[schema["id"]], "id": row[schema["id"]], + "created_on": row[schema["created_on"]], + "mature": Media.get_maturity(meta, row[schema["mature"]]), + # Keyword fields "identifier": row[schema["identifier"]], - "title": row[schema["title"]], - "foreign_landing_url": row[schema["foreign_landing_url"]], - "description": Media.parse_description(meta), - "creator": row[schema["creator"]], - "creator_url": row[schema["creator_url"]], - "url": row[schema["url"]], "license": row[schema["license"]].lower(), - "license_version": row[schema["license_version"]], - "license_url": Media.get_license_url(meta), "provider": provider, "source": row[schema["source"]], "category": category, - "created_on": row[schema["created_on"]], - "tags": Media.parse_detailed_tags(row[schema["tags"]]), - "mature": Media.get_maturity(meta, row[schema["mature"]]), + # Text-based fields + "title": row[schema["title"]], + "description": Media.parse_description(meta), + "creator": row[schema["creator"]], + # Rank feature fields "standardized_popularity": popularity, "authority_boost": authority_boost, "max_boost": max(popularity or 1, authority_boost or 1), "min_boost": min(popularity or 1, authority_boost or 1), + # Nested fields + "tags": Media.parse_detailed_tags(row[schema["tags"]]), + # Extra fields, not indexed + "url": row[schema["url"]], } @staticmethod From 618604d93873eb712d7fa0b359d5b2cb23696385 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Sat, 7 Oct 2023 09:52:37 +0400 Subject: [PATCH 12/15] Return subfields to avoid API changes --- api/api/controllers/search_controller.py | 10 ++++------ api/api/utils/search_context.py | 5 +++-- .../ingestion_server/es_mapping.py | 18 +++++++++++++++--- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/api/api/controllers/search_controller.py b/api/api/controllers/search_controller.py index 3efb8c7decf..f0cf402e105 100644 --- a/api/api/controllers/search_controller.py +++ b/api/api/controllers/search_controller.py @@ -349,14 +349,12 @@ def search( ("extension", None), ("category", None), ("categories", "category"), - ("source", None), - ("license", None), - ("license_type", "license"), - # Audio-specific filters ("length", None), - # Image-specific filters ("aspect_ratio", None), ("size", None), + ("source", None), + ("license", "license__keyword"), + ("license_type", "license__keyword"), ] for serializer_field, es_field in filters: if serializer_field in search_params.data: @@ -561,7 +559,7 @@ def get_sources(index): aggs = { "unique_sources": { "terms": { - "field": "source", + "field": "source.keyword", "size": size, "order": {"_key": "desc"}, } diff --git a/api/api/utils/search_context.py b/api/api/utils/search_context.py index 9beccee6076..d1505c476d8 100644 --- a/api/api/utils/search_context.py +++ b/api/api/utils/search_context.py @@ -35,11 +35,12 @@ def build( # Use `identifier` rather than the document `id` due to # `id` instability between refreshes: # https://github.com/WordPress/openverse/issues/2306 - # `identifier` is mapped as `keyword` which will match exactly. + # `identifier` is mapped as `text` which will match fuzzily. + # Use `identifier.keyword` to match _exactly_ # cf: https://github.com/WordPress/openverse/issues/2154 Q( "terms", - **{"identifier": all_result_identifiers}, + **{"identifier.keyword": all_result_identifiers}, ) ) diff --git a/ingestion_server/ingestion_server/es_mapping.py b/ingestion_server/ingestion_server/es_mapping.py index d7ec3272f7b..c70e4c1a4fd 100644 --- a/ingestion_server/ingestion_server/es_mapping.py +++ b/ingestion_server/ingestion_server/es_mapping.py @@ -66,11 +66,23 @@ def index_settings(media_type: MediaType): "created_on": {"type": "date"}, "mature": {"type": "boolean"}, # Keyword fields - "identifier": {"type": "keyword"}, + "identifier": { + # TODO: Remove subfield when API is updated + "fields": {"keyword": {"type": "keyword"}}, + "type": "keyword", + }, "extension": {"type": "keyword"}, - "license": {"type": "keyword"}, + "license": { + # TODO: Remove subfield when API is updated + "fields": {"keyword": {"type": "keyword"}}, + "type": "keyword", + }, "provider": {"type": "keyword"}, - "source": {"type": "keyword"}, + "source": { + # TODO: Remove subfield when API is updated + "fields": {"keyword": {"type": "keyword"}}, + "type": "keyword", + }, "filetype": {"type": "keyword"}, "category": {"type": "keyword"}, # Text-based fields From c67f3ee749cbf9b7c3b28bca943626585960e77f Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Mon, 9 Oct 2023 09:33:48 +0400 Subject: [PATCH 13/15] Re-add extra fields that are not indexed --- .../ingestion_server/elasticsearch_models.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ingestion_server/ingestion_server/elasticsearch_models.py b/ingestion_server/ingestion_server/elasticsearch_models.py index 4e3b33e24bc..32fc8491cd6 100644 --- a/ingestion_server/ingestion_server/elasticsearch_models.py +++ b/ingestion_server/ingestion_server/elasticsearch_models.py @@ -125,6 +125,10 @@ def get_instance_attrs(row, schema): "tags": Media.parse_detailed_tags(row[schema["tags"]]), # Extra fields, not indexed "url": row[schema["url"]], + "foreign_landing_url": row[schema["foreign_landing_url"]], + "creator_url": row[schema["creator_url"]], + "license_version": row[schema["license_version"]], + "license_url": Media.get_license_url(meta), } @staticmethod @@ -248,6 +252,8 @@ def database_row_to_elasticsearch_doc(row, schema): aspect_ratio=aspect_ratio, extension=extension, size=size, + # Extra fields, not indexed + thumbnail=row[schema["thumbnail"]], **attrs, ) @@ -322,6 +328,11 @@ def database_row_to_elasticsearch_doc(row, schema): length=length, filetype=filetype, extension=extension, + # Extra fields, not indexed + bit_rate=row[schema["bit_rate"]], + sample_rate=row[schema["sample_rate"]], + genres=row[schema["genres"]], + duration=row[schema["duration"]], **attrs, ) From ba3efc359246d3527c0ddae9c1a28c875b059443 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Mon, 9 Oct 2023 18:09:33 +0000 Subject: [PATCH 14/15] Use blank array instead of `None` for tags --- ingestion_server/ingestion_server/elasticsearch_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingestion_server/ingestion_server/elasticsearch_models.py b/ingestion_server/ingestion_server/elasticsearch_models.py index 32fc8491cd6..c758d847ee6 100644 --- a/ingestion_server/ingestion_server/elasticsearch_models.py +++ b/ingestion_server/ingestion_server/elasticsearch_models.py @@ -199,7 +199,7 @@ def get_popularity(raw): @staticmethod def parse_detailed_tags(json_tags): if not json_tags: - return None + return [] parsed_tags = [] for tag in json_tags: if "name" in tag: From d52e73ffa347ed194f93cab0f0f881dac75daf91 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Fri, 13 Oct 2023 12:56:49 +0000 Subject: [PATCH 15/15] Revert to `None` when no tags --- ingestion_server/ingestion_server/elasticsearch_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingestion_server/ingestion_server/elasticsearch_models.py b/ingestion_server/ingestion_server/elasticsearch_models.py index c758d847ee6..32fc8491cd6 100644 --- a/ingestion_server/ingestion_server/elasticsearch_models.py +++ b/ingestion_server/ingestion_server/elasticsearch_models.py @@ -199,7 +199,7 @@ def get_popularity(raw): @staticmethod def parse_detailed_tags(json_tags): if not json_tags: - return [] + return None parsed_tags = [] for tag in json_tags: if "name" in tag: