From 41b1d03badfc0bd49b50a933a7d3738cd94b78c1 Mon Sep 17 00:00:00 2001 From: Daniel Sotirhos Date: Thu, 12 Sep 2024 15:56:30 -0700 Subject: [PATCH] Refactor the AnVIL plugin's manifest config --- src/azul/plugins/metadata/anvil/__init__.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/azul/plugins/metadata/anvil/__init__.py b/src/azul/plugins/metadata/anvil/__init__.py index 77fea54c36..393f4a64fa 100644 --- a/src/azul/plugins/metadata/anvil/__init__.py +++ b/src/azul/plugins/metadata/anvil/__init__.py @@ -277,6 +277,14 @@ def facets(self) -> Sequence[str]: def manifest_config(self) -> ManifestConfig: result = defaultdict(dict) + # This is used to modify the field names from how they are specified in + # the field mapping. Keys are field paths in an ES hit, and values are + # the desired manifest column name, or None to omit from the manifest. + custom_field_names = { + ('contents', 'files', 'uuid'): None, + ('contents', 'files', 'version'): None, + } + def recurse(mapping: MetadataPlugin._FieldMapping, path: FieldPath): for path_element, name_or_type in mapping.items(): new_path = (*path, path_element) @@ -285,20 +293,17 @@ def recurse(mapping: MetadataPlugin._FieldMapping, path: FieldPath): elif isinstance(name_or_type, str): if new_path == ('entity_id',): pass - elif new_path == ('contents', 'files', 'uuid'): - # Request the injection of a file URL … - result[path]['file_url'] = 'files.file_url' - # … but suppress the columns for the fields … - result[path][path_element] = None - elif new_path == ('contents', 'files', 'version'): - # … only used by that injection. - result[path][path_element] = None + elif new_path in custom_field_names: + result[path][path_element] = custom_field_names[new_path] else: result[path][path_element] = name_or_type else: assert False, (path, path_element, name_or_type) recurse(self._field_mapping, ()) + # Add a file URL column. It isn't an indexed field so isn't in the field + # mapping, rather it is dynamically added in during manifest generation. + result[('contents', 'files')]['file_url'] = 'files.file_url' return result def verbatim_pfb_schema(self,