Skip to content

Commit

Permalink
Replace file names with entity references in HCA bundle metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Aug 30, 2024
1 parent 18b8dd5 commit 846dd8a
Show file tree
Hide file tree
Showing 77 changed files with 5,383 additions and 5,364 deletions.
6 changes: 3 additions & 3 deletions src/azul/plugins/repository/tdr_hca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,9 @@ def add_entity(self,
checksums=Checksums.from_json(descriptor),
drs_uri=self._parse_drs_uri(row['file_id'], descriptor))
content = row['content']
self.metadata[entity_key] = (json.loads(content)
if isinstance(content, str)
else content)
self.metadata[str(entity)] = (json.loads(content)
if isinstance(content, str)
else content)

metadata_columns: ClassVar[set[str]] = {
'version',
Expand Down
16 changes: 9 additions & 7 deletions src/humancellatlas/data/metadata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@
OrderedSet,
dict_merge,
)

from azul.indexer.document import (
EntityReference,
)
from azul.types import (
JSON,
MutableJSON,
Expand Down Expand Up @@ -920,13 +924,11 @@ def __init__(self,
self.stitched = frozenset(map(UUID4, stitched_entity_ids))

json_by_core_cls: MutableMapping[type[E], list[JSON]] = defaultdict(list)
for file_name, json in metadata.items():
assert file_name.endswith('.json')
schema_name, _, suffix = file_name[:-5].rpartition('_')
if schema_name and suffix.isdigit():
entity_cls = entity_types[schema_name]
core_cls = core_types[entity_cls]
json_by_core_cls[core_cls].append(json)
for key, json in metadata.items():
schema_name = EntityReference.parse(key).entity_type
entity_cls = entity_types[schema_name]
core_cls = core_types[entity_cls]
json_by_core_cls[core_cls].append(json)

def from_json_vx(core_cls: type[E],
**kwargs
Expand Down
6 changes: 5 additions & 1 deletion src/humancellatlas/data/metadata/helpers/staging_area.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
reject,
require,
)
from azul.indexer.document import (
EntityReference,
)
from azul.types import (
JSON,
MutableJSON,
Expand Down Expand Up @@ -158,7 +161,8 @@ def get_bundle_parts(self, subgraph_id: str) -> tuple[str, MutableJSONs, Mutable
json_file_name = f'{entity_type}_{i}.json'
metadata_file = self.metadata[entity_id]
json_content = metadata_file.content
metadata[json_file_name] = json_content
key = str(EntityReference(entity_type=entity_type, entity_id=entity_id))
metadata[key] = json_content
file_manifest = {
'content-type': 'application/json;',
'crc32c': '0' * 8,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@
}
],
"metadata": {
"cell_suspension_0.json": {
"cell_suspension/7b53bae2-2424-44c0-9d80-ad72e8bca136": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/biomaterial/8.6.1/cell_suspension",
"schema_type": "biomaterial",
"biomaterial_core": {
Expand Down Expand Up @@ -174,7 +174,7 @@
"update_date": "2018-09-04T12:27:19.625Z"
}
},
"dissociation_protocol_0.json": {
"dissociation_protocol/eebf404f-4fbb-41b0-a9c6-81586f729599": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/protocol/biomaterial_collection/5.0.3/dissociation_protocol",
"schema_type": "protocol",
"protocol_core": {
Expand All @@ -192,7 +192,7 @@
"update_date": "2018-09-04T12:26:56.433Z"
}
},
"donor_organism_0.json": {
"donor_organism/628e8b1d-a1ce-4dee-b15a-3fd33290eafe": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/biomaterial/10.1.1/donor_organism",
"schema_type": "biomaterial",
"biomaterial_core": {
Expand Down Expand Up @@ -246,7 +246,7 @@
"update_date": "2018-09-04T12:27:10.175Z"
}
},
"library_preparation_protocol_0.json": {
"library_preparation_protocol/e4024c4a-dbce-4bda-bed8-21414091e7ce": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/protocol/sequencing/4.3.2/library_preparation_protocol",
"schema_type": "protocol",
"protocol_core": {
Expand Down Expand Up @@ -281,7 +281,7 @@
"update_date": "2018-09-04T12:27:50.909Z"
}
},
"process_0.json": {
"process/98442f49-9afb-491e-8347-f891f39d8d70": {
"process_core": {
"process_id": "process_id_128"
},
Expand All @@ -293,7 +293,7 @@
"update_date": "2018-09-04T12:28:00.426Z"
}
},
"process_1.json": {
"process/d691bda4-ed01-48b6-a4ea-65b70f6a3946": {
"process_core": {
"process_id": "process_id_255"
},
Expand All @@ -305,7 +305,7 @@
"update_date": "2018-09-04T12:28:06.541Z"
}
},
"process_2.json": {
"process/5aa4645b-2802-4140-9b15-d1008338b1c9": {
"process_core": {
"process_id": "process_id_1"
},
Expand All @@ -317,7 +317,7 @@
"update_date": "2018-09-04T12:27:51.508Z"
}
},
"project_0.json": {
"project/617eb7c1-a3bc-4dd3-9a2a-50a77c998e22": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/project/9.0.2/project",
"schema_type": "project",
"project_core": {
Expand Down Expand Up @@ -461,7 +461,7 @@
"update_date": "2018-09-04T12:27:09.930Z"
}
},
"sequence_file_0.json": {
"sequence_file/36d7f891-8a43-4ae4-8472-a34dcb2be643": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/file/6.5.2/sequence_file",
"schema_type": "file",
"file_core": {
Expand All @@ -478,7 +478,7 @@
"update_date": "2018-09-04T12:27:39.827Z"
}
},
"sequencing_protocol_0.json": {
"sequencing_protocol/aaa08845-5150-4a4f-9c44-5ea22add1fc3": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/protocol/sequencing/9.0.2/sequencing_protocol",
"schema_type": "protocol",
"protocol_core": {
Expand All @@ -502,7 +502,7 @@
"update_date": "2018-09-04T12:27:50.911Z"
}
},
"specimen_from_organism_0.json": {
"specimen_from_organism/6228558b-436a-46c9-9cd3-ea9b5c123070": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/biomaterial/6.3.1/specimen_from_organism",
"schema_type": "biomaterial",
"biomaterial_core": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@
}
],
"metadata": {
"cell_suspension_0.json": {
"cell_suspension/cc0d9bf0-6ad5-4489-994b-db26ff761c5a": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/biomaterial/8.6.1/cell_suspension",
"schema_type": "biomaterial",
"biomaterial_core": {
Expand Down Expand Up @@ -217,7 +217,7 @@
"update_date": "2018-09-05T09:53:09.415Z"
}
},
"collection_protocol_0.json": {
"collection_protocol/f8d9778f-0f72-4432-a929-1096fd9ca2f4": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/protocol/biomaterial_collection/8.2.6/collection_protocol",
"schema_type": "protocol",
"protocol_core": {
Expand All @@ -236,7 +236,7 @@
"update_date": "2018-09-05T09:52:05.472Z"
}
},
"dissociation_protocol_0.json": {
"dissociation_protocol/a934e15c-e1ce-423b-b554-9203d4e93f41": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/protocol/biomaterial_collection/5.0.3/dissociation_protocol",
"schema_type": "protocol",
"protocol_core": {
Expand All @@ -255,7 +255,7 @@
"update_date": "2018-09-05T09:51:59.418Z"
}
},
"donor_organism_0.json": {
"donor_organism/25e083e3-d747-4295-86c8-c7ddc4b975be": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/biomaterial/10.1.1/donor_organism",
"schema_type": "biomaterial",
"biomaterial_core": {
Expand Down Expand Up @@ -298,7 +298,7 @@
"update_date": "2018-09-05T09:50:15.914Z"
}
},
"enrichment_protocol_0.json": {
"enrichment_protocol/80921b90-fe8d-45e1-a5e5-4fdb55f9a3fa": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/protocol/biomaterial_collection/2.2.5/enrichment_protocol",
"schema_type": "protocol",
"protocol_core": {
Expand All @@ -317,7 +317,7 @@
"update_date": "2018-09-05T09:52:05.683Z"
}
},
"enrichment_protocol_1.json": {
"enrichment_protocol/77c71448-fb32-472f-9d44-ea9a42867a41": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/protocol/biomaterial_collection/2.2.5/enrichment_protocol",
"schema_type": "protocol",
"protocol_core": {
Expand All @@ -337,7 +337,7 @@
"update_date": "2018-09-05T09:51:58.823Z"
}
},
"library_preparation_protocol_0.json": {
"library_preparation_protocol/0ea30fa0-7183-4ffc-a4b8-537c6de32e65": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/protocol/sequencing/4.3.2/library_preparation_protocol",
"schema_type": "protocol",
"protocol_core": {
Expand Down Expand Up @@ -372,7 +372,7 @@
"update_date": "2018-09-05T09:52:00.867Z"
}
},
"process_0.json": {
"process/bcfcb3d7-674a-429f-8bf9-347a0f222db2": {
"insdc_experiment": {
"insdc_experiment": "SRX3364233"
},
Expand All @@ -387,7 +387,7 @@
"update_date": "2018-09-05T09:56:34.501Z"
}
},
"process_1.json": {
"process/f8e3c0b7-ed2a-464d-bff4-b80fac2a3849": {
"process_core": {
"process_id": "process_id_29"
},
Expand All @@ -399,7 +399,7 @@
"update_date": "2018-09-05T09:55:08.938Z"
}
},
"process_2.json": {
"process/4e504efb-a65b-4fe5-97f5-8148f6a8ed4d": {
"process_core": {
"process_location": "Sri Lanka",
"process_id": "process_id_16"
Expand All @@ -412,7 +412,7 @@
"update_date": "2018-09-05T09:55:08.214Z"
}
},
"project_0.json": {
"project/ee5b3a17-4128-40ff-88f4-44903ef1ab54": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/project/9.0.2/project",
"schema_type": "project",
"project_core": {
Expand Down Expand Up @@ -497,7 +497,7 @@
"update_date": "2018-09-05T09:50:15.608Z"
}
},
"sequence_file_0.json": {
"sequence_file/a5806f2e-3f85-486a-9015-e02e5c805285": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/file/6.5.2/sequence_file",
"schema_type": "file",
"file_core": {
Expand All @@ -515,7 +515,7 @@
"update_date": "2018-09-05T09:54:36.578Z"
}
},
"sequencing_protocol_0.json": {
"sequencing_protocol/2d0f835b-ced1-4cf1-a053-0cf109fdefeb": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/protocol/sequencing/9.0.2/sequencing_protocol",
"schema_type": "protocol",
"protocol_core": {
Expand All @@ -538,7 +538,7 @@
"update_date": "2018-09-05T09:51:58.194Z"
}
},
"specimen_from_organism_0.json": {
"specimen_from_organism/ff63b5d7-e702-40da-bbe9-619e52131b63": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/biomaterial/6.3.1/specimen_from_organism",
"schema_type": "biomaterial",
"biomaterial_core": {
Expand Down
Loading

0 comments on commit 846dd8a

Please sign in to comment.