Skip to content

Commit

Permalink
[p] Convert HCABundle.manifest to dictionary (partial #6299)
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Aug 23, 2024
1 parent f10c22e commit 7cf9eb2
Show file tree
Hide file tree
Showing 77 changed files with 2,870 additions and 2,849 deletions.
5 changes: 2 additions & 3 deletions src/azul/plugins/metadata/hca/bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,14 @@
from azul.types import (
JSON,
MutableJSON,
MutableJSONs,
)

log = logging.getLogger(__name__)


@attrs.define(kw_only=True)
class HCABundle(Bundle[BUNDLE_FQID], ABC):
manifest: MutableJSONs
manifest: MutableJSON
"""
Each item of the `manifest` attribute's value has this shape:
{
Expand Down Expand Up @@ -62,7 +61,7 @@ def from_json(cls, fqid: BUNDLE_FQID, json_: JSON) -> 'Bundle':
metadata = json_['metadata']
links = json_['links']
stitched = json_['stitched']
assert isinstance(manifest, list), manifest
assert isinstance(manifest, dict), manifest
assert isinstance(metadata, dict), metadata
assert isinstance(links, dict), links
assert isinstance(stitched, list), stitched
Expand Down
2 changes: 1 addition & 1 deletion src/azul/plugins/repository/dss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def list_bundles(self,
def fetch_bundle(self, bundle_fqid: DSSBundleFQID) -> DSSBundle:
assert False, 'DSS is EOL'
# noinspection PyUnreachableCode
return DSSBundle(fqid=bundle_fqid, manifest=[], metadata={}, links={})
return DSSBundle(fqid=bundle_fqid, manifest={}, metadata={}, links={})

Check warning

Code scanning / CodeQL

Unreachable code Warning

This statement is unreachable.

def dss_subscription_query(self, prefix: str) -> JSON:
return {
Expand Down
11 changes: 6 additions & 5 deletions src/azul/plugins/repository/tdr_hca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,8 @@ def add_entity(self,
self.stitched.add(entity.entity_id)
if entity.entity_type.endswith('_file'):
descriptor = json.loads(row['descriptor'])
self._add_manifest_entry(name=row['file_name'],
self._add_manifest_entry(entity,
name=row['file_name'],
uuid=descriptor['file_id'],
version=descriptor['file_version'],
size=descriptor['size'],
Expand Down Expand Up @@ -224,6 +225,7 @@ def add_entity(self,
_suffix = 'tdr.'

def _add_manifest_entry(self,
entity: EntityReference,
*,
name: str,
uuid: str,
Expand All @@ -234,7 +236,7 @@ def _add_manifest_entry(self,
checksums: Optional[Checksums] = None,
drs_uri: Optional[str] = None) -> None:

self.manifest.append({
self.manifest[str(entity)] = {
'name': name,
'uuid': uuid,
'version': version,
Expand All @@ -251,7 +253,7 @@ def _add_manifest_entry(self,
**checksums.to_json()
}
)
})
}

def _parse_drs_uri(self,
file_id: Optional[str],
Expand Down Expand Up @@ -327,7 +329,7 @@ def _query_unique_sorted(self,

def _emulate_bundle(self, bundle_fqid: TDRBundleFQID) -> TDRHCABundle:
bundle = TDRHCABundle(fqid=bundle_fqid,
manifest=[],
manifest={},
metadata={},
links={})
entities, root_entities, links_jsons = self._stitch_bundles(bundle)
Expand Down Expand Up @@ -357,7 +359,6 @@ def _emulate_bundle(self, bundle_fqid: TDRBundleFQID) -> TDRHCABundle:
log.error('TDR worker failed to retrieve entities of type %r',
entity_type, exc_info=e)
raise e
bundle.manifest.sort(key=itemgetter('uuid'))
return bundle

def _stitch_bundles(self,
Expand Down
35 changes: 18 additions & 17 deletions src/humancellatlas/data/metadata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
)
from azul.collections import (
OrderedSet,
adict,
dict_merge,
)

Expand All @@ -45,7 +46,6 @@
from azul.types import (
JSON,
MutableJSON,
MutableJSONs,
is_optional,
)
from humancellatlas.data.metadata.age_range import (
Expand Down Expand Up @@ -749,7 +749,7 @@ class File(LinkedEntity):

def __init__(self,
json: JSON,
manifest: Mapping[str, ManifestEntry]):
manifest_entry: ManifestEntry):
super().__init__(json)
content = json.get('content', json)
# '/' was once forbidden in file paths and was encoded with '!'. Now
Expand All @@ -758,7 +758,7 @@ def __init__(self,
core = content['file_core']
core['file_name'] = core['file_name'].replace('!', '/')
self.format = lookup(core, 'format', 'file_format')
self.manifest_entry = manifest[core['file_name']]
self.manifest_entry = manifest_entry
self.content_description = {ontology_label(cd) for cd in core.get('content_description', [])}
self.file_source = core.get('file_source')
self.from_processes = dict()
Expand Down Expand Up @@ -791,8 +791,8 @@ class SequenceFile(File):

def __init__(self,
json: JSON,
manifest: Mapping[str, ManifestEntry]):
super().__init__(json, manifest)
manifest_entry: ManifestEntry):
super().__init__(json, manifest_entry)
content = json.get('content', json)
self.read_index = content['read_index']
self.lane_index = content.get('lane_index')
Expand All @@ -809,8 +809,8 @@ class AnalysisFile(File):

def __init__(self,
json: JSON,
manifest: Mapping[str, ManifestEntry]):
super().__init__(json, manifest)
manifest_entry: ManifestEntry):
super().__init__(json, manifest_entry)
content = json.get('content', json)
self.matrix_cell_count = content.get('matrix_cell_count')

Expand Down Expand Up @@ -916,37 +916,38 @@ class Bundle:
def __init__(self,
uuid: str,
version: str,
manifest: MutableJSONs,
metadata: Mapping[str, JSON],
manifest: Mapping[str, MutableJSON],
metadata: Mapping[str, MutableJSON],
links_json: JSON,
stitched_entity_ids: AbstractSet[str] = frozenset()):
self.uuid = UUID4(uuid)
self.version = version
self.manifest = {m.name: m for m in map(ManifestEntry, manifest)}
self.manifest = {ref: ManifestEntry(e) for ref, e in manifest.items()}
self.stitched = stitched_entity_ids

json_by_core_cls: MutableMapping[type[E], list[JSON]] = defaultdict(list)
entity_args_by_core_cls: MutableMapping[type[E], list[dict]] = defaultdict(list)
for key, json in metadata.items():
schema_name = EntityReference.parse(key).entity_type
entity_cls = entity_types[schema_name]
core_cls = core_types[entity_cls]
json_by_core_cls[core_cls].append(json)
args = adict(json=json,
manifest_entry=self.manifest.get(key))
entity_args_by_core_cls[core_cls].append(args)

def from_json_vx(core_cls: type[E],
**kwargs
) -> MutableMapping[UUID4, E]:
json_entities = json_by_core_cls[core_cls]
args_list = entity_args_by_core_cls[core_cls]
entities = (
core_cls.from_json(entity, **kwargs)
for entity in json_entities
core_cls.from_json(**args)
for args in args_list
)
return {entity.document_id: entity for entity in entities}

self.projects = from_json_vx(Project)
self.biomaterials = from_json_vx(Biomaterial)
self.processes = from_json_vx(Process)
self.protocols = from_json_vx(Protocol)
self.files = from_json_vx(File, manifest=self.manifest)
self.files = from_json_vx(File)

self.entities = {**self.projects, **self.biomaterials, **self.processes, **self.protocols, **self.files}

Expand Down
7 changes: 3 additions & 4 deletions src/humancellatlas/data/metadata/helpers/staging_area.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from azul.types import (
JSON,
MutableJSON,
MutableJSONs,
)
from humancellatlas.data.metadata.api import (
Bundle,
Expand Down Expand Up @@ -147,12 +146,12 @@ def get_bundle(self, subgraph_id: str) -> Bundle:
version, manifest, metadata, links = self.get_bundle_parts(subgraph_id)
return Bundle(subgraph_id, version, manifest, metadata, links)

def get_bundle_parts(self, subgraph_id: str) -> tuple[str, MutableJSONs, MutableJSON, MutableJSON]:
def get_bundle_parts(self, subgraph_id: str) -> tuple[str, MutableJSON, MutableJSON, MutableJSON]:
"""
Return the components to create a bundle from the staging area
"""
links_file = self.links[subgraph_id]
manifest = []
manifest = {}
metadata = {}
entity_ids_by_type = self._entity_ids_by_type(subgraph_id)
for entity_type, entity_ids in entity_ids_by_type.items():
Expand All @@ -164,7 +163,7 @@ def get_bundle_parts(self, subgraph_id: str) -> tuple[str, MutableJSONs, Mutable
metadata[key] = json_content
if entity_type.endswith('_file'):
file_manifest = self.descriptors[entity_id].manifest_entry
manifest.append(file_manifest)
manifest[key] = file_manifest
else:
pass
return links_file.version, manifest, metadata, links_file.content
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"manifest": [
{
"manifest": {
"sequence_file/36d7f891-8a43-4ae4-8472-a34dcb2be643": {
"crc32c": "29B559F9",
"sha1": "0587673d7d88efa40a6f4d25d34d2eba14cf8e8b",
"sha256": "df8b88b6f32f494a53687f79724a6ebd8bbb9c5c0474b9a857d467a0bdb83c2d",
Expand All @@ -12,7 +12,7 @@
"uuid": "c8a4ea32-6d66-48f3-b480-9421743b9c0a",
"version": "1"
}
],
},
"metadata": {
"cell_suspension/7b53bae2-2424-44c0-9d80-ad72e8bca136": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/biomaterial/8.6.1/cell_suspension",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"manifest": [
{
"manifest": {
"sequence_file/a5806f2e-3f85-486a-9015-e02e5c805285": {
"crc32c": "E0404A01",
"sha1": "2d85a788acb4a291d5d13fb918dfbb25d4205de9",
"sha256": "895c57e9ca2bccbcd306086fd691c55b8e9303a864f1791d9a924b599a3c5313",
Expand All @@ -12,7 +12,7 @@
"uuid": "6ac13e04-d123-42de-bed9-f874b0d2fed2",
"version": "1"
}
],
},
"metadata": {
"cell_suspension/cc0d9bf0-6ad5-4489-994b-db26ff761c5a": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/biomaterial/8.6.1/cell_suspension",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"manifest": [
{
"manifest": {
"sequence_file/740b2221-41c6-498d-87de-8b2937a5ebed": {
"crc32c": "E9B38AA1",
"sha1": "0cec1204162ddfbd3782a1aedce104b373163a9f",
"sha256": "3f29407965e542a4787b6ab1f32ea3f4162017dc88e294ab3e48ddfe76d476e7",
Expand All @@ -12,7 +12,7 @@
"uuid": "b9f26dd4-dddc-426d-97bb-674e8a0a26a2",
"version": "1"
},
{
"sequence_file/a3f614b3-e6cf-4751-a15d-ff623efea62a": {
"crc32c": "59378C79",
"sha1": "d982fed4c2cfd402a145637f1bc1d6364745a506",
"sha256": "ff3118605c52fd9cb96cb17b96ecc8ec74b72cb75558305e8630568478c03547",
Expand All @@ -24,7 +24,7 @@
"uuid": "1af1df49-a772-4645-bc0e-e8a599ab7121",
"version": "1"
},
{
"sequence_file/56c2a158-e389-40a3-97a7-0f2966a393b8": {
"crc32c": "8F96945F",
"sha1": "9cc9baf78624edf0e2c39e48ab7f559f32cfd7ca",
"sha256": "5a4bc638c31346d1fc4cde4cc8bfaf2341e486ba377edbeafb42a12d49ab6ddc",
Expand All @@ -36,7 +36,7 @@
"uuid": "53c4d13c-d3d3-4294-affe-fd1fecf9d1ef",
"version": "1"
},
{
"supplementary_file/a0eb3208-97e4-498c-993b-20757231b233": {
"crc32c": "62D4DB03",
"sha1": "32df780ee719286fccafae4d715607d4f05f3c40",
"sha256": "6daea9af107bd45e2666ca2ccd1a2a3d98c42b892a7a783093b638508eccb158",
Expand All @@ -48,7 +48,7 @@
"uuid": "c7555cbd-d66c-4f9a-b3f4-23e013da6910",
"version": "1"
},
{
"supplementary_file/e01ee650-58b3-4f3c-8da8-4483700a1eae": {
"crc32c": "B9D38666",
"sha1": "095ce1889337b2d8dd5ba3ce21ad85903ebda004",
"sha256": "a8b3535da8ba62f83c31b238c86a6f4b9848f913c6d35824cef5f955748f1ab9",
Expand All @@ -60,7 +60,7 @@
"uuid": "b5bc3187-6553-4410-b589-ac0f3bb6858f",
"version": "1"
},
{
"supplementary_file/5ab06a61-c66b-4d3d-9889-ab47371a65b0": {
"crc32c": "47985515",
"sha1": "b12c361c6d4ebb7c255ddb3e67e3babbd1fe0e25",
"sha256": "492641dd1bc0004c879d9fe8599e03a455612892df4802f5a1e9c860216777f6",
Expand All @@ -72,7 +72,7 @@
"uuid": "28aa156f-3eee-481b-8849-ab68ddf4b67d",
"version": "1"
}
],
},
"metadata": {
"cell_line/304fadde-e22a-4ff9-9544-f8ec097b6135": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/biomaterial/8.6.2/cell_line",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"manifest": [
{
"manifest": {
"sequence_file/60471337-a47b-4b9c-95e7-4a19349a5e05": {
"crc32c": "3D02CDD9",
"sha1": "b2dad8a028624c456ba43f1f97ac20bba430af74",
"sha256": "730386f5383a1c82f114ba6cc9ff381f2c47872d6edc4f1342b0b9df9a282795",
Expand All @@ -12,7 +12,7 @@
"uuid": "32d60a94-9f0f-44e3-83c6-eb2d1c1177bd",
"version": "1"
}
],
},
"metadata": {
"cell_suspension/1200f5bf-7d45-4f26-865e-b560797f1808": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/biomaterial/8.6.1/cell_suspension",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"manifest": [
{
"manifest": {
"sequence_file/b93897c4-0681-407a-bc0c-fb791b919fa4": {
"crc32c": "4AEB011B",
"sha1": "7d5358d6a69ffc7e92ae1bae5e4ae6b9467bc8c5",
"sha256": "d184ee1517a86acdf92d35e911db3effef0630ab40ef062b094d456d4e6084e1",
Expand All @@ -12,7 +12,7 @@
"uuid": "b2b4819a-e22d-4ed4-a5c8-c04a1dec79eb",
"version": "1"
}
],
},
"metadata": {
"cell_suspension/1446ca36-ba75-45ea-b6ab-a80641a88812": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/biomaterial/8.6.1/cell_suspension",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"manifest": [
{
"manifest": {
"sequence_file/baf745cd-9052-4a6c-8c1a-919390062c09": {
"crc32c": "30F94925",
"sha1": "472dd1bf4a1e942cbfc6ad79687e2a2ca8f72c74",
"sha256": "b474913c2db1d26a3d8ab9f37c59ac7e1768b2c628b4e250d0dae0c942adc677",
Expand All @@ -12,7 +12,7 @@
"uuid": "a4f16d4f-cdd7-46f4-8ead-42155d059f23",
"version": "1"
},
{
"sequence_file/0494ee09-b1e2-437a-986f-06d5df4a6858": {
"crc32c": "3727801A",
"sha1": "98b1bcf334aae2a06b973565497b16b092b5fc7e",
"sha256": "a58cc9dff7f585b8ddc40c56c60219b9c61f76fe210e357d2761ca6cea0693f9",
Expand All @@ -24,7 +24,7 @@
"uuid": "26f05d68-8700-40a4-8c5f-173cad377f22",
"version": "1"
}
],
},
"metadata": {
"cell_suspension/c61125ab-d5e0-4d93-b0a7-2deac1729f65": {
"describedBy": "http://schema.dev.data.humancellatlas.org/type/biomaterial/8.6.1/cell_suspension",
Expand Down
Loading

0 comments on commit 7cf9eb2

Please sign in to comment.