Skip to content

Commit

Permalink
Order AnVIL fields consistently with schema document
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Dec 6, 2022
1 parent 1ed0473 commit dad7322
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 29 deletions.
32 changes: 18 additions & 14 deletions src/azul/plugins/metadata/anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ def mapping(self) -> MutableJSON:

@property
def _field_mapping(self) -> MetadataPlugin._FieldMapping:
common_fields = [
'document_id',
'source_datarepo_row_ids'
]
return {
'entity_id': 'entryId',
'bundles': {
Expand All @@ -98,58 +102,57 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
'contents': {
'activities': {
f: f'activities.{f}' for f in [
*common_fields,
'activity_id',
'activity_table',
'activity_type',
'assay_category',
'data_modality',
'date_created',
'document_id',
'source_datarepo_row_ids',
# Not in schema
'date_created',
]
},
'biosamples': {
f: f'biosamples.{f}' for f in [
'anatomical_site',
*common_fields,
'biosample_id',
'anatomical_site',
'biosample_type',
'document_id',
'donor_age_at_collection_age_range',
'donor_age_at_collection_unit',
'disease',
'source_datarepo_row_ids',
'donor_age_at_collection_unit',
'donor_age_at_collection_age_range',
]
},
'datasets': {
f: f'datasets.{f}' for f in [
*common_fields,
'dataset_id',
'consent_group',
'data_use_permission',
'document_id',
'registered_identifier',
'source_datarepo_row_ids',
'title',
]
},
'donors': {
f: f'donors.{f}' for f in [
'document_id',
*common_fields,
'donor_id',
'organism_type',
'phenotypic_sex',
'reported_ethnicity',
'source_datarepo_row_ids',
]
},
'files': {
**{
f: f'files.{f}' for f in [
*common_fields,
'file_id',
'data_modality',
'document_id',
'file_format',
'file_id',
'reference_assembly',
'source_datarepo_row_ids',
# Not in schema
'crc32',
'sha256',
'drs_path',
Expand All @@ -159,9 +162,10 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
# These field names are hard-coded in the implementation of
# the repository service/controller.
**{
'byte_size': 'size',
# Not in schema
'version': 'fileVersion',
'uuid': 'fileId',
'byte_size': 'size'
}
}
}
Expand Down
16 changes: 9 additions & 7 deletions src/azul/plugins/metadata/anvil/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,19 +197,20 @@ def _activity_types(cls) -> FieldTypes:
'activity_type': null_str,
'assay_category': null_str,
'data_modality': null_str,
# Not in schema
'date_created': null_datetime,
}

@classmethod
def _biosample_types(cls) -> FieldTypes:
return {
**cls._entity_types(),
'anatomical_site': null_str,
'biosample_id': null_str,
'anatomical_site': null_str,
'biosample_type': null_str,
'donor_age_at_collection_age_range': pass_thru_json,
'donor_age_at_collection_unit': null_str,
'disease': null_str,
'donor_age_at_collection_unit': null_str,
'donor_age_at_collection_age_range': pass_thru_json,
}

@classmethod
Expand Down Expand Up @@ -237,15 +238,16 @@ def _donor_types(cls) -> FieldTypes:
def _file_types(cls) -> FieldTypes:
return {
**cls._entity_types(),
'version': null_str,
'uuid': null_str,
'file_id': null_str,
'data_modality': [null_str],
'file_format': null_str,
'file_id': null_str,
'byte_size': null_int,
'reference_assembly': [null_str],
# Not in schema
'version': null_str,
'uuid': null_str,
'size': null_int,
'name': null_str,
'reference_assembly': [null_str],
'crc32': null_str,
'sha256': null_str,
'drs_path': null_str
Expand Down
4 changes: 2 additions & 2 deletions src/azul/plugins/metadata/anvil/service/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,9 @@ def _non_pivotal_entity(self,
'biosamples': {
'anatomical_site',
'biosample_type',
'donor_age_at_collection_age_range',
'donor_age_at_collection_unit',
'disease',
'donor_age_at_collection_unit',
'donor_age_at_collection_age_range',
},
'datasets': {
'dataset_id',
Expand Down
14 changes: 8 additions & 6 deletions src/azul/plugins/repository/tdr_anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,12 +490,12 @@ def convert_column(value):
indexed_columns_by_entity_type = {
'biosample': {
'biosample_id',
'biosample_type',
'anatomical_site',
'biosample_type',
'disease',
'donor_age_at_collection_unit',
'donor_age_at_collection_lower_bound',
'donor_age_at_collection_upper_bound',
'donor_age_at_collection_unit',
'disease',
},
'dataset': {
'dataset_id',
Expand All @@ -512,11 +512,11 @@ def convert_column(value):
},
'file': {
'file_id',
'file_ref',
'byte_size',
'data_modality',
'file_format',
'byte_size',
'reference_assembly',
'file_ref',
},
'activity': {
'activity_id',
Expand All @@ -526,13 +526,15 @@ def convert_column(value):
'alignmentactivity_id',
'activity_type',
'data_modality',
# Not in schema
'date_created',
},
'assayactivity': {
'assayactivity_id',
'activity_type',
'assay_category',
'data_modality',
# Not in schema
'assay_category',
'date_created',
},
'sequencingactivity': {
Expand Down

0 comments on commit dad7322

Please sign in to comment.