Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for HCA data_use_restriction (#6131) #6207

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lambdas/service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@
# changes and reset the minor version to zero. Otherwise, increment only
# the minor version for backwards compatible changes. A backwards
# compatible change is one that does not require updates to clients.
'version': '9.1'
'version': '9.2'
},
'tags': [
{
Expand Down
131 changes: 123 additions & 8 deletions lambdas/service/openapi.json

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions src/azul/plugins/metadata/hca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
'estimated_cell_count': 'projectEstimatedCellCount',
'is_tissue_atlas_project': 'isTissueAtlasProject',
'tissue_atlas': 'tissueAtlas',
'bionetwork_name': 'bionetworkName'
'bionetwork_name': 'bionetworkName',
'data_use_restriction': 'dataUseRestriction'
},
'sequencing_protocols': {
'instrument_manufacturer_model': 'instrumentManufacturerModel',
Expand Down Expand Up @@ -332,7 +333,8 @@ def facets(self) -> Sequence[str]:
'publicationTitle',
'isTissueAtlasProject',
'tissueAtlas',
'bionetworkName'
'bionetworkName',
'dataUseRestriction'
]

@property
Expand Down
6 changes: 4 additions & 2 deletions src/azul/plugins/metadata/hca/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,8 @@ def _project_types(cls) -> FieldTypes:
'is_tissue_atlas_project': null_bool,
'tissue_atlas': [tissue_atlas],
'bionetwork_name': [null_str],
'estimated_cell_count': null_int
'estimated_cell_count': null_int,
'data_use_restriction': null_str
}

def _project(self, project: api.Project) -> MutableJSON:
Expand Down Expand Up @@ -733,7 +734,8 @@ def _project(self, project: api.Project) -> MutableJSON:
for bionetwork in project.bionetworks),
'tissue_atlas': list(map(self._tissue_atlas, project.bionetworks)),
'bionetwork_name': sorted(bionetwork.name for bionetwork in project.bionetworks),
'estimated_cell_count': project.estimated_cell_count
'estimated_cell_count': project.estimated_cell_count,
'data_use_restriction': project.data_use_restriction
}

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion src/azul/plugins/metadata/hca/service/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ def make_projects(self, entry) -> MutableJSONs:
'estimatedCellCount': project['estimated_cell_count'],
'isTissueAtlasProject': project['is_tissue_atlas_project'],
'tissueAtlas': project.get('tissue_atlas'),
'bionetworkName': project['bionetwork_name']
'bionetworkName': project['bionetwork_name'],
'dataUseRestriction': project.get('data_use_restriction')
}
if self.entity_type == 'projects':
translated_project['projectDescription'] = project.get('project_description', [])
Expand Down
2 changes: 2 additions & 0 deletions src/humancellatlas/data/metadata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ class Project(Entity):
supplementary_links: OrderedSet[str]
estimated_cell_count: int | None
bionetworks: OrderedSet[Bionetwork]
data_use_restriction: str | None

def __init__(self, json: JSON) -> None:
super().__init__(json)
Expand All @@ -317,6 +318,7 @@ def __init__(self, json: JSON) -> None:
self.bionetworks = OrderedSet(Bionetwork(**bionetwork)
for bionetwork in content.get('hca_bionetworks', ())
if bionetwork)
self.data_use_restriction = content.get('data_use_restriction')

def _accessions(self, namespace: str) -> set[str]:
return {a.accession for a in self.accessions if a.namespace == namespace}
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions test/service/data/pfb_manifest.results.json
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,9 @@
null
],
"contributors": [],
"data_use_restriction": [
null
],
"document_id": [
"6615efae-fca8-4dd2-a223-9cfcf30fe94d"
],
Expand Down Expand Up @@ -848,6 +851,9 @@
null
],
"contributors": [],
"data_use_restriction": [
null
],
"document_id": [
"e8642221-4c2c-4fd7-b926-a68bce363c88"
],
Expand Down Expand Up @@ -3282,6 +3288,9 @@
null
],
"contributors": [],
"data_use_restriction": [
null
],
"document_id": [
"90bf705c-d891-5ce2-aa54-094488b445c6"
],
Expand Down
8 changes: 8 additions & 0 deletions test/service/data/pfb_manifest.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1976,6 +1976,14 @@
"null",
"long"
]
},
{
"name": "data_use_restriction",
"namespace": "projects",
"type": {
"items": ["null", "string"],
"type": "array"
}
}
],
"name": "projects",
Expand Down
3 changes: 2 additions & 1 deletion test/service/test_index_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ def assert_file_type_summaries(hit):
'tissueAtlas',
'isTissueAtlasProject',
'bionetworkName',
'estimatedCellCount'
'estimatedCellCount',
'dataUseRestriction'
}
response_json = get_response_json()
self.assertIn('hits', response_json)
Expand Down
27 changes: 26 additions & 1 deletion test/service/test_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def test_response_stage_files(self):
"isTissueAtlasProject": [False],
"tissueAtlas": [],
"estimatedCellCount": None,
"dataUseRestriction": [None],
}
],
"protocols": [
Expand Down Expand Up @@ -590,7 +591,8 @@ def test_response_stage_projects(self):
"bionetworkName": [None],
"tissueAtlas": [],
"isTissueAtlasProject": False,
"accessions": []
"accessions": [],
"dataUseRestriction": None,
}
],
"protocols": [
Expand Down Expand Up @@ -854,6 +856,7 @@ def test_response_stage_projects_accessions(self):
{"namespace": "insdc_project", "accession": "SRP000001"},
{"namespace": "insdc_study", "accession": "PRJNA000000"},
],
"dataUseRestriction": None,
}
],
"protocols": [
Expand Down Expand Up @@ -3506,6 +3509,28 @@ def test_projects_response(self):
}
self.assertEqual({None: 2, 'Lung': 1, 'Retina': 1, 'Blood': 1}, terms)

def test_data_use_restriction(self):
field, value = 'dataUseRestriction', 'NRES'
params = {
'catalog': self.catalog,
'sort': field,
'filters': json.dumps({field: {'is': [value]}})
}
plugin = self.index_service.metadata_plugin(self.catalog)
for entity_type in plugin.exposed_indices:
url = self.base_url.set(path=('index', entity_type), args=params)
response = requests.get(url)
response.raise_for_status()
response = response.json()
facets = response['termFacets']
terms = {term['term'] for term in facets[field]['terms']}
self.assertEqual({None, value}, terms)
hits = response['hits']
self.assertGreater(len(hits), 0)
expected = value if entity_type == 'projects' else [value]
for hit in hits:
self.assertEqual(expected, one(hit['projects'])[field])


class TestUnpopulatedIndexResponse(IndexResponseTestCase):

Expand Down
Loading