From 673446791b20f64d78549f232734ac6e53da5593 Mon Sep 17 00:00:00 2001 From: Noah Dove Date: Mon, 21 Nov 2022 21:48:30 -0800 Subject: [PATCH] Index all AnVIL v4 schema columns (#4617) --- src/azul/plugins/metadata/anvil/__init__.py | 12 ++++++++++-- src/azul/plugins/metadata/anvil/indexer/transform.py | 9 +++++++++ src/azul/plugins/metadata/anvil/service/response.py | 4 +++- src/azul/plugins/repository/tdr_anvil/__init__.py | 12 +++++++++++- 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/azul/plugins/metadata/anvil/__init__.py b/src/azul/plugins/metadata/anvil/__init__.py index 5b9296a1d..b66d55295 100644 --- a/src/azul/plugins/metadata/anvil/__init__.py +++ b/src/azul/plugins/metadata/anvil/__init__.py @@ -107,9 +107,9 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping: 'activity_table', 'activity_type', 'assay_category', + 'assay_type', 'data_modality', - 'document_id', - 'source_datarepo_row_ids', + 'reference_assembly', # Not in schema 'date_created', ] @@ -119,6 +119,7 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping: *common_fields, 'biosample_id', 'anatomical_site', + 'apriori_cell_type', 'biosample_type', 'disease', 'donor_age_at_collection_unit', @@ -131,8 +132,11 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping: 'dataset_id', 'consent_group', 'data_use_permission', + 'owner', + 'principal_investigator', 'registered_identifier', 'title', + 'data_modality', ] }, 'donors': { @@ -142,6 +146,7 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping: 'organism_type', 'phenotypic_sex', 'reported_ethnicity', + 'genetic_ancestry', ] }, 'files': { @@ -151,7 +156,9 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping: 'file_id', 'data_modality', 'file_format', + 'file_md5sum', 'reference_assembly', + 'file_name', # Not in schema 'crc32', 'sha256', @@ -180,6 +187,7 @@ def facets(self) -> Sequence[str]: return [ 'activities.activity_type', 'activities.assay_category', + 'activities.assay_type', 'activities.data_modality', 'biosamples.anatomical_site', 'biosamples.biosample_type', diff --git a/src/azul/plugins/metadata/anvil/indexer/transform.py b/src/azul/plugins/metadata/anvil/indexer/transform.py index 4ad660a15..3a4a8b139 100644 --- a/src/azul/plugins/metadata/anvil/indexer/transform.py +++ b/src/azul/plugins/metadata/anvil/indexer/transform.py @@ -196,7 +196,9 @@ def _activity_types(cls) -> FieldTypes: 'activity_table': null_str, 'activity_type': null_str, 'assay_category': null_str, + 'assay_type': null_str, 'data_modality': null_str, + 'reference_assembly': [null_str], # Not in schema 'date_created': null_datetime, } @@ -207,6 +209,7 @@ def _biosample_types(cls) -> FieldTypes: **cls._entity_types(), 'biosample_id': null_str, 'anatomical_site': null_str, + 'apriori_cell_type': [null_str], 'biosample_type': null_str, 'disease': null_str, 'donor_age_at_collection_unit': null_str, @@ -220,8 +223,11 @@ def _dataset_types(cls) -> FieldTypes: 'dataset_id': null_str, 'consent_group': [null_str], 'data_use_permission': [null_str], + 'owner': [null_str], + 'principal_investigator': [null_str], 'registered_identifier': [null_str], 'title': null_str, + 'data_modality': [null_str], } @classmethod @@ -232,6 +238,7 @@ def _donor_types(cls) -> FieldTypes: 'organism_type': null_str, 'phenotypic_sex': null_str, 'reported_ethnicity': null_str, + 'genetic_ancestry': [null_str], } @classmethod @@ -242,7 +249,9 @@ def _file_types(cls) -> FieldTypes: 'data_modality': [null_str], 'file_format': null_str, 'file_size': null_int, + 'file_md5sum': null_str, 'reference_assembly': [null_str], + 'file_name': null_str, # Not in schema 'version': null_str, 'uuid': null_str, diff --git a/src/azul/plugins/metadata/anvil/service/response.py b/src/azul/plugins/metadata/anvil/service/response.py index 4d483c492..f804e0ca3 100644 --- a/src/azul/plugins/metadata/anvil/service/response.py +++ b/src/azul/plugins/metadata/anvil/service/response.py @@ -182,6 +182,7 @@ def _non_pivotal_entity(self, 'activities': { 'activity_type', 'assay_category', + 'assay_type', 'data_modality' }, 'biosamples': { @@ -198,7 +199,8 @@ def _non_pivotal_entity(self, 'donors': { 'organism_type', 'phenotypic_sex', - 'reported_ethnicity' + 'reported_ethnicity', + 'genetic_ancestry' }, 'files': { 'count', diff --git a/src/azul/plugins/repository/tdr_anvil/__init__.py b/src/azul/plugins/repository/tdr_anvil/__init__.py index 67b34adcc..8d3121c35 100644 --- a/src/azul/plugins/repository/tdr_anvil/__init__.py +++ b/src/azul/plugins/repository/tdr_anvil/__init__.py @@ -496,6 +496,7 @@ def convert_column(value): 'biosample': { 'biosample_id', 'anatomical_site', + 'apriori_cell_type', 'biosample_type', 'disease', 'donor_age_at_collection_unit', @@ -506,21 +507,27 @@ def convert_column(value): 'dataset_id', 'consent_group', 'data_use_permission', + 'owner', + 'principal_investigator', 'registered_identifier', - 'title' + 'title', + 'data_modality' }, 'donor': { 'donor_id', 'organism_type', 'phenotypic_sex', 'reported_ethnicity', + 'genetic_ancestry', }, 'file': { 'file_id', 'data_modality', 'file_format', 'file_size', + 'file_md5sum', 'reference_assembly', + 'file_name', 'file_ref', }, 'activity': { @@ -531,12 +538,14 @@ def convert_column(value): 'alignmentactivity_id', 'activity_type', 'data_modality', + 'reference_assembly', # Not in schema 'date_created', }, 'assayactivity': { 'assayactivity_id', 'activity_type', + 'assay_type', 'data_modality', # Not in schema 'assay_category', @@ -545,6 +554,7 @@ def convert_column(value): 'sequencingactivity': { 'sequencingactivity_id', 'activity_type', + 'assay_type', 'data_modality', } }