diff --git a/src/azul/plugins/metadata/anvil/__init__.py b/src/azul/plugins/metadata/anvil/__init__.py index 5892eccacb..b66d552955 100644 --- a/src/azul/plugins/metadata/anvil/__init__.py +++ b/src/azul/plugins/metadata/anvil/__init__.py @@ -107,7 +107,9 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping: 'activity_table', 'activity_type', 'assay_category', + 'assay_type', 'data_modality', + 'reference_assembly', # Not in schema 'date_created', ] @@ -117,6 +119,7 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping: *common_fields, 'biosample_id', 'anatomical_site', + 'apriori_cell_type', 'biosample_type', 'disease', 'donor_age_at_collection_unit', @@ -129,8 +132,11 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping: 'dataset_id', 'consent_group', 'data_use_permission', + 'owner', + 'principal_investigator', 'registered_identifier', 'title', + 'data_modality', ] }, 'donors': { @@ -140,6 +146,7 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping: 'organism_type', 'phenotypic_sex', 'reported_ethnicity', + 'genetic_ancestry', ] }, 'files': { @@ -149,7 +156,9 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping: 'file_id', 'data_modality', 'file_format', + 'file_md5sum', 'reference_assembly', + 'file_name', # Not in schema 'crc32', 'sha256', @@ -178,6 +187,7 @@ def facets(self) -> Sequence[str]: return [ 'activities.activity_type', 'activities.assay_category', + 'activities.assay_type', 'activities.data_modality', 'biosamples.anatomical_site', 'biosamples.biosample_type', diff --git a/src/azul/plugins/metadata/anvil/indexer/transform.py b/src/azul/plugins/metadata/anvil/indexer/transform.py index 4ad660a15c..3a4a8b1399 100644 --- a/src/azul/plugins/metadata/anvil/indexer/transform.py +++ b/src/azul/plugins/metadata/anvil/indexer/transform.py @@ -196,7 +196,9 @@ def _activity_types(cls) -> FieldTypes: 'activity_table': null_str, 'activity_type': null_str, 'assay_category': null_str, + 'assay_type': null_str, 'data_modality': null_str, + 'reference_assembly': [null_str], # Not in schema 'date_created': null_datetime, } @@ -207,6 +209,7 @@ def _biosample_types(cls) -> FieldTypes: **cls._entity_types(), 'biosample_id': null_str, 'anatomical_site': null_str, + 'apriori_cell_type': [null_str], 'biosample_type': null_str, 'disease': null_str, 'donor_age_at_collection_unit': null_str, @@ -220,8 +223,11 @@ def _dataset_types(cls) -> FieldTypes: 'dataset_id': null_str, 'consent_group': [null_str], 'data_use_permission': [null_str], + 'owner': [null_str], + 'principal_investigator': [null_str], 'registered_identifier': [null_str], 'title': null_str, + 'data_modality': [null_str], } @classmethod @@ -232,6 +238,7 @@ def _donor_types(cls) -> FieldTypes: 'organism_type': null_str, 'phenotypic_sex': null_str, 'reported_ethnicity': null_str, + 'genetic_ancestry': [null_str], } @classmethod @@ -242,7 +249,9 @@ def _file_types(cls) -> FieldTypes: 'data_modality': [null_str], 'file_format': null_str, 'file_size': null_int, + 'file_md5sum': null_str, 'reference_assembly': [null_str], + 'file_name': null_str, # Not in schema 'version': null_str, 'uuid': null_str, diff --git a/src/azul/plugins/metadata/anvil/service/response.py b/src/azul/plugins/metadata/anvil/service/response.py index 4d483c4921..f804e0ca3a 100644 --- a/src/azul/plugins/metadata/anvil/service/response.py +++ b/src/azul/plugins/metadata/anvil/service/response.py @@ -182,6 +182,7 @@ def _non_pivotal_entity(self, 'activities': { 'activity_type', 'assay_category', + 'assay_type', 'data_modality' }, 'biosamples': { @@ -198,7 +199,8 @@ def _non_pivotal_entity(self, 'donors': { 'organism_type', 'phenotypic_sex', - 'reported_ethnicity' + 'reported_ethnicity', + 'genetic_ancestry' }, 'files': { 'count', diff --git a/src/azul/plugins/repository/tdr_anvil/__init__.py b/src/azul/plugins/repository/tdr_anvil/__init__.py index 800a850f52..9e2ceccfc4 100644 --- a/src/azul/plugins/repository/tdr_anvil/__init__.py +++ b/src/azul/plugins/repository/tdr_anvil/__init__.py @@ -489,6 +489,7 @@ def convert_column(value): 'biosample': { 'biosample_id', 'anatomical_site', + 'apriori_cell_type', 'biosample_type', 'disease', 'donor_age_at_collection_unit', @@ -499,21 +500,27 @@ def convert_column(value): 'dataset_id', 'consent_group', 'data_use_permission', + 'owner', + 'principal_investigator', 'registered_identifier', - 'title' + 'title', + 'data_modality' }, 'donor': { 'donor_id', 'organism_type', 'phenotypic_sex', 'reported_ethnicity', + 'genetic_ancestry', }, 'file': { 'file_id', 'data_modality', 'file_format', 'file_size', + 'file_md5sum', 'reference_assembly', + 'file_name', 'file_ref', }, 'activity': { @@ -524,12 +531,14 @@ def convert_column(value): 'alignmentactivity_id', 'activity_type', 'data_modality', + 'reference_assembly', # Not in schema 'date_created', }, 'assayactivity': { 'assayactivity_id', 'activity_type', + 'assay_type', 'data_modality', # Not in schema 'assay_category', @@ -538,6 +547,7 @@ def convert_column(value): 'sequencingactivity': { 'sequencingactivity_id', 'activity_type', + 'assay_type', 'data_modality', } }