Skip to content

Commit

Permalink
Index all AnVIL v4 schema columns (#4617)
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Dec 6, 2022
1 parent 8157dd3 commit a49cd09
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 4 deletions.
12 changes: 10 additions & 2 deletions src/azul/plugins/metadata/anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
'activity_table',
'activity_type',
'assay_category',
'assay_type',
'data_modality',
'document_id',
'source_datarepo_row_ids',
'reference_assembly',
# Not in schema
'date_created',
]
Expand All @@ -119,6 +119,7 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
*common_fields,
'biosample_id',
'anatomical_site',
'apriori_cell_type',
'biosample_type',
'disease',
'donor_age_at_collection_unit',
Expand All @@ -131,8 +132,11 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
'dataset_id',
'consent_group',
'data_use_permission',
'owner',
'principal_investigator',
'registered_identifier',
'title',
'data_modality',
]
},
'donors': {
Expand All @@ -142,6 +146,7 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
'organism_type',
'phenotypic_sex',
'reported_ethnicity',
'genetic_ancestry',
]
},
'files': {
Expand All @@ -151,7 +156,9 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
'file_id',
'data_modality',
'file_format',
'file_md5sum',
'reference_assembly',
'file_name',
# Not in schema
'crc32',
'sha256',
Expand Down Expand Up @@ -180,6 +187,7 @@ def facets(self) -> Sequence[str]:
return [
'activities.activity_type',
'activities.assay_category',
'activities.assay_type',
'activities.data_modality',
'biosamples.anatomical_site',
'biosamples.biosample_type',
Expand Down
9 changes: 9 additions & 0 deletions src/azul/plugins/metadata/anvil/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,9 @@ def _activity_types(cls) -> FieldTypes:
'activity_table': null_str,
'activity_type': null_str,
'assay_category': null_str,
'assay_type': null_str,
'data_modality': null_str,
'reference_assembly': [null_str],
# Not in schema
'date_created': null_datetime,
}
Expand All @@ -207,6 +209,7 @@ def _biosample_types(cls) -> FieldTypes:
**cls._entity_types(),
'biosample_id': null_str,
'anatomical_site': null_str,
'apriori_cell_type': [null_str],
'biosample_type': null_str,
'disease': null_str,
'donor_age_at_collection_unit': null_str,
Expand All @@ -220,8 +223,11 @@ def _dataset_types(cls) -> FieldTypes:
'dataset_id': null_str,
'consent_group': [null_str],
'data_use_permission': [null_str],
'owner': [null_str],
'principal_investigator': [null_str],
'registered_identifier': [null_str],
'title': null_str,
'data_modality': [null_str],
}

@classmethod
Expand All @@ -232,6 +238,7 @@ def _donor_types(cls) -> FieldTypes:
'organism_type': null_str,
'phenotypic_sex': null_str,
'reported_ethnicity': null_str,
'genetic_ancestry': [null_str],
}

@classmethod
Expand All @@ -242,7 +249,9 @@ def _file_types(cls) -> FieldTypes:
'data_modality': [null_str],
'file_format': null_str,
'file_size': null_int,
'file_md5sum': null_str,
'reference_assembly': [null_str],
'file_name': null_str,
# Not in schema
'version': null_str,
'uuid': null_str,
Expand Down
4 changes: 3 additions & 1 deletion src/azul/plugins/metadata/anvil/service/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def _non_pivotal_entity(self,
'activities': {
'activity_type',
'assay_category',
'assay_type',
'data_modality'
},
'biosamples': {
Expand All @@ -198,7 +199,8 @@ def _non_pivotal_entity(self,
'donors': {
'organism_type',
'phenotypic_sex',
'reported_ethnicity'
'reported_ethnicity',
'genetic_ancestry'
},
'files': {
'count',
Expand Down
12 changes: 11 additions & 1 deletion src/azul/plugins/repository/tdr_anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,7 @@ def convert_column(value):
'biosample': {
'biosample_id',
'anatomical_site',
'apriori_cell_type',
'biosample_type',
'disease',
'donor_age_at_collection_unit',
Expand All @@ -506,21 +507,27 @@ def convert_column(value):
'dataset_id',
'consent_group',
'data_use_permission',
'owner',
'principal_investigator',
'registered_identifier',
'title'
'title',
'data_modality'
},
'donor': {
'donor_id',
'organism_type',
'phenotypic_sex',
'reported_ethnicity',
'genetic_ancestry',
},
'file': {
'file_id',
'data_modality',
'file_format',
'file_size',
'file_md5sum',
'reference_assembly',
'file_name',
'file_ref',
},
'activity': {
Expand All @@ -531,12 +538,14 @@ def convert_column(value):
'alignmentactivity_id',
'activity_type',
'data_modality',
'reference_assembly',
# Not in schema
'date_created',
},
'assayactivity': {
'assayactivity_id',
'activity_type',
'assay_type',
'data_modality',
# Not in schema
'assay_category',
Expand All @@ -545,6 +554,7 @@ def convert_column(value):
'sequencingactivity': {
'sequencingactivity_id',
'activity_type',
'assay_type',
'data_modality',
}
}
Expand Down

0 comments on commit a49cd09

Please sign in to comment.