Skip to content

Commit

Permalink
fixup! Fix: Invalid columns in compact manifest for AnVIL (#6110)
Browse files Browse the repository at this point in the history
  • Loading branch information
dsotirho-ucsc committed Oct 11, 2024
1 parent efc6736 commit 25b7851
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 19 deletions.
30 changes: 13 additions & 17 deletions src/azul/plugins/metadata/anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,16 +133,17 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
'document_id',
'source_datarepo_row_ids'
]
# Note that there is a brittle coupling that must be maintained between
# the keys here and those used in the custom field name lookup in
# `self.manifest_config`.
return {
'entity_id': 'entryId',
'bundles': {
# These field paths have a brittle coupling that must be
# maintained to the field lookups in `self.manifest_config`.
'uuid': self.special_fields.bundle_uuid,
'version': self.special_fields.bundle_version
},
'sources': {
# These field paths have a brittle coupling that must be
# maintained to the field lookups in `self.manifest_config`.
'id': self.special_fields.source_id,
'spec': self.special_fields.source_spec
},
Expand Down Expand Up @@ -199,6 +200,9 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
f: f'activities.{f}' for f in [
*common_fields,
'activity_id',
# This field path has a brittle coupling that must be
# maintained to the field lookup in
# `self.manifest_config`.
'activity_table',
'activity_type',
'assay_type',
Expand Down Expand Up @@ -286,17 +290,9 @@ def manifest_config(self) -> ManifestConfig:
('contents', 'files', 'version'),
]

# This is used to modify the field names from how they are specified in
# the field mapping. Keys are field paths in an ES hit, and values are
# the desired manifest column name, or None to omit the column from the
# manifest.
#
# Note that there is a brittle coupling that must be maintained between
# the keys here and those used in `self._field_mapping`. Also, the
# values should match the related field's path in a response hit from
# the `/index/files` endpoint.
#
custom_field_names = {
# Furthermore, renamed values should match the field's path in a
# response hit from the `/index/files` endpoint.
fields_to_rename_in_manifest = {
('bundles', 'uuid'): 'bundles.bundle_uuid',
('bundles', 'version'): 'bundles.bundle_version',
('sources', 'id'): 'sources.source_id',
Expand All @@ -314,16 +310,16 @@ def recurse(mapping: MetadataPlugin._FieldMapping, path: FieldPath):
elif new_path in fields_to_omit_from_manifest:
result[path][path_element] = None
fields_to_omit_from_manifest.remove(new_path)
elif new_path in custom_field_names:
result[path][path_element] = custom_field_names.pop(new_path)
elif new_path in fields_to_rename_in_manifest:
result[path][path_element] = fields_to_rename_in_manifest.pop(new_path)
else:
result[path][path_element] = name_or_type
else:
assert False, (path, path_element, name_or_type)

recurse(self._field_mapping, ())
assert len(fields_to_omit_from_manifest) == 0, fields_to_omit_from_manifest
assert len(custom_field_names) == 0, custom_field_names
assert len(fields_to_rename_in_manifest) == 0, fields_to_rename_in_manifest
# The file URL is synthesized from the `uuid` and `version` fields.
# Above, we already configured these two fields to be omitted from the
# manifest since they are not informative to the user.
Expand Down
4 changes: 2 additions & 2 deletions src/azul/plugins/metadata/anvil/service/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,8 @@ def _make_hit(self, es_hit: JSON) -> MutableJSON:
return {
'entryId': es_hit['entity_id'],
# Note that there is a brittle coupling that must be maintained
# between the `sources` and `bundles` field paths and the values in
# the custom field name lookup in `Plugin.manifest_config`.
# between the `sources` and `bundles` field paths here and the
# renamed fields in `Plugin.manifest_config`.
'sources': list(map(self._make_source, es_hit['sources'])),
'bundles': list(map(self._make_bundle, es_hit['bundles'])),
**self._make_contents(es_hit['contents'])
Expand Down

0 comments on commit 25b7851

Please sign in to comment.