Skip to content

Commit

Permalink
Extract links.json to a separate bundle attribute
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc authored and hannes-ucsc committed Aug 29, 2024
1 parent a548ffd commit 8a9138f
Show file tree
Hide file tree
Showing 82 changed files with 77,428 additions and 77,404 deletions.
3 changes: 2 additions & 1 deletion src/azul/plugins/metadata/hca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ def transformers(self,
api_bundle = api.Bundle(uuid=bundle.uuid,
version=bundle.version,
manifest=bundle.manifest,
metadata_files=bundle.metadata_files)
metadata_files=bundle.metadata_files,
links_json=bundle.links)

def transformers():
for transformer_cls in self.transformer_types():
Expand Down
9 changes: 7 additions & 2 deletions src/azul/plugins/metadata/hca/bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,21 +40,26 @@ class HCABundle(Bundle[BUNDLE_FQID], ABC):
}
"""
metadata_files: MutableJSON
links: MutableJSON

def reject_joiner(self, catalog: CatalogName):
self._reject_joiner(self.manifest)
self._reject_joiner(self.metadata_files)
self._reject_joiner(self.links)

def to_json(self) -> MutableJSON:
return {
'manifest': self.manifest,
'metadata': self.metadata_files
'metadata': self.metadata_files,
'links': self.links,
}

@classmethod
def from_json(cls, fqid: BUNDLE_FQID, json_: JSON) -> 'Bundle':
manifest = json_['manifest']
metadata = json_['metadata']
links = json_['links']
assert isinstance(manifest, list), manifest
assert isinstance(metadata, dict), metadata
return cls(fqid=fqid, manifest=manifest, metadata_files=metadata)
assert isinstance(links, dict), links
return cls(fqid=fqid, manifest=manifest, metadata_files=metadata, links=links)
2 changes: 1 addition & 1 deletion src/azul/plugins/metadata/hca/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ def _add_replica(self,
if not config.enable_replicas:
replica = None
elif self.entity_type() == 'bundles':
links = self.bundle.metadata_files['links.json']
links = self.bundle.links
replica = self._replica(links, entity_ref, hub_ids)
else:
assert isinstance(entity, api.Entity), entity
Expand Down
5 changes: 3 additions & 2 deletions src/azul/plugins/repository/canned/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,14 +192,15 @@ def fetch_bundle(self, bundle_fqid: CannedBundleFQID) -> CannedBundle:
self._assert_source(bundle_fqid.source)
now = time.time()
staging_area = self.staging_area(bundle_fqid.source.spec.name)
version, manifest, metadata = staging_area.get_bundle_parts(bundle_fqid.uuid)
version, manifest, metadata, links = staging_area.get_bundle_parts(bundle_fqid.uuid)
if bundle_fqid.version is None:
bundle_fqid = CannedBundleFQID(source=bundle_fqid.source,
uuid=bundle_fqid.uuid,
version=version)
bundle = CannedBundle(fqid=bundle_fqid,
manifest=manifest,
metadata_files=metadata)
metadata_files=metadata,
links=links)
assert version == bundle.version, (version, bundle)
log.info('It took %.003fs to download bundle %s.%s',
time.time() - now, bundle.uuid, bundle.version)
Expand Down
2 changes: 1 addition & 1 deletion src/azul/plugins/repository/dss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def list_bundles(self,
def fetch_bundle(self, bundle_fqid: DSSBundleFQID) -> DSSBundle:
assert False, 'DSS is EOL'
# noinspection PyUnreachableCode
return DSSBundle(fqid=bundle_fqid, manifest=[], metadata_files={})
return DSSBundle(fqid=bundle_fqid, manifest=[], metadata_files={}, links={})

def dss_subscription_query(self, prefix: str) -> JSON:
return {
Expand Down
5 changes: 3 additions & 2 deletions src/azul/plugins/repository/tdr_hca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,9 +341,10 @@ def _query_unique_sorted(self,
def _emulate_bundle(self, bundle_fqid: TDRBundleFQID) -> TDRHCABundle:
bundle = TDRHCABundle(fqid=bundle_fqid,
manifest=[],
metadata_files={})
metadata_files={},
links={})
entities, root_entities, links_jsons = self._stitch_bundles(bundle)
bundle.metadata_files['links.json'] = self._merge_links(links_jsons)
bundle.links = self._merge_links(links_jsons)

with ThreadPoolExecutor(max_workers=config.num_tdr_workers) as executor:
futures = {
Expand Down
8 changes: 6 additions & 2 deletions src/humancellatlas/data/metadata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -994,7 +994,12 @@ class Bundle:
entities: MutableMapping[UUID4, Entity] = field(repr=False)
links: list[Link]

def __init__(self, uuid: str, version: str, manifest: MutableJSONs, metadata_files: Mapping[str, JSON]):
def __init__(self,
uuid: str,
version: str,
manifest: MutableJSONs,
metadata_files: Mapping[str, JSON],
links_json: JSON):
self.uuid = UUID4(uuid)
self.version = version
self.manifest = {m.name: m for m in map(ManifestEntry, manifest)}
Expand Down Expand Up @@ -1026,7 +1031,6 @@ def from_json_vx(core_cls: type[E],

self.entities = {**self.projects, **self.biomaterials, **self.processes, **self.protocols, **self.files}

links_json = metadata_files['links.json']
schema_version = tuple(map(int, links_json['schema_version'].split('.')))
self.links = list(chain.from_iterable(
Link.from_json(link, schema_version)
Expand Down
12 changes: 5 additions & 7 deletions src/humancellatlas/data/metadata/helpers/staging_area.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,18 +141,16 @@ def get_bundle(self, subgraph_id: str) -> Bundle:
"""
Return a bundle from the staging area
"""
version, manifest, metadata = self.get_bundle_parts(subgraph_id)
return Bundle(subgraph_id, version, manifest, metadata)
version, manifest, metadata, links = self.get_bundle_parts(subgraph_id)
return Bundle(subgraph_id, version, manifest, metadata, links)

def get_bundle_parts(self, subgraph_id: str) -> tuple[str, MutableJSONs, MutableJSON]:
def get_bundle_parts(self, subgraph_id: str) -> tuple[str, MutableJSONs, MutableJSON, MutableJSON]:
"""
Return the components to create a bundle from the staging area
"""
links_file = self.links[subgraph_id]
manifest = []
metadata = {
'links.json': links_file.content
}
metadata = {}
entity_ids_by_type = self._entity_ids_by_type(subgraph_id)
for entity_type, entity_ids in entity_ids_by_type.items():
# Sort entity_ids to produce the same ordering on multiple runs
Expand All @@ -179,7 +177,7 @@ def get_bundle_parts(self, subgraph_id: str) -> tuple[str, MutableJSONs, Mutable
manifest.append(file_manifest)
else:
pass
return links_file.version, manifest, metadata
return links_file.version, manifest, metadata, links_file.content

def _entity_ids_by_type(self,
subgraph_id: str
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,63 +281,6 @@
"update_date": "2018-09-04T12:27:50.909Z"
}
},
"links.json": {
"describedBy": "https://schema.humancellatlas.org/system/1.1.1/links",
"schema_type": "link_bundle",
"schema_version": "1.1.1",
"links": [
{
"process": "d691bda4-ed01-48b6-a4ea-65b70f6a3946",
"inputs": [
"7b53bae2-2424-44c0-9d80-ad72e8bca136"
],
"input_type": "biomaterial",
"outputs": [
"36d7f891-8a43-4ae4-8472-a34dcb2be643"
],
"output_type": "file",
"protocols": [
{
"protocol_type": "library_preparation_protocol",
"protocol_id": "e4024c4a-dbce-4bda-bed8-21414091e7ce"
},
{
"protocol_type": "sequencing_protocol",
"protocol_id": "aaa08845-5150-4a4f-9c44-5ea22add1fc3"
}
]
},
{
"process": "98442f49-9afb-491e-8347-f891f39d8d70",
"inputs": [
"6228558b-436a-46c9-9cd3-ea9b5c123070"
],
"input_type": "biomaterial",
"outputs": [
"7b53bae2-2424-44c0-9d80-ad72e8bca136"
],
"output_type": "biomaterial",
"protocols": [
{
"protocol_type": "dissociation_protocol",
"protocol_id": "eebf404f-4fbb-41b0-a9c6-81586f729599"
}
]
},
{
"process": "5aa4645b-2802-4140-9b15-d1008338b1c9",
"inputs": [
"628e8b1d-a1ce-4dee-b15a-3fd33290eafe"
],
"input_type": "biomaterial",
"outputs": [
"6228558b-436a-46c9-9cd3-ea9b5c123070"
],
"output_type": "biomaterial",
"protocols": []
}
]
},
"process_0.json": {
"process_core": {
"process_id": "process_id_128"
Expand Down Expand Up @@ -591,5 +534,62 @@
"update_date": "2018-09-04T12:27:13.378Z"
}
}
},
"links": {
"describedBy": "https://schema.humancellatlas.org/system/1.1.1/links",
"schema_type": "link_bundle",
"schema_version": "1.1.1",
"links": [
{
"process": "d691bda4-ed01-48b6-a4ea-65b70f6a3946",
"inputs": [
"7b53bae2-2424-44c0-9d80-ad72e8bca136"
],
"input_type": "biomaterial",
"outputs": [
"36d7f891-8a43-4ae4-8472-a34dcb2be643"
],
"output_type": "file",
"protocols": [
{
"protocol_type": "library_preparation_protocol",
"protocol_id": "e4024c4a-dbce-4bda-bed8-21414091e7ce"
},
{
"protocol_type": "sequencing_protocol",
"protocol_id": "aaa08845-5150-4a4f-9c44-5ea22add1fc3"
}
]
},
{
"process": "98442f49-9afb-491e-8347-f891f39d8d70",
"inputs": [
"6228558b-436a-46c9-9cd3-ea9b5c123070"
],
"input_type": "biomaterial",
"outputs": [
"7b53bae2-2424-44c0-9d80-ad72e8bca136"
],
"output_type": "biomaterial",
"protocols": [
{
"protocol_type": "dissociation_protocol",
"protocol_id": "eebf404f-4fbb-41b0-a9c6-81586f729599"
}
]
},
{
"process": "5aa4645b-2802-4140-9b15-d1008338b1c9",
"inputs": [
"628e8b1d-a1ce-4dee-b15a-3fd33290eafe"
],
"input_type": "biomaterial",
"outputs": [
"6228558b-436a-46c9-9cd3-ea9b5c123070"
],
"output_type": "biomaterial",
"protocols": []
}
]
}
}
Loading

0 comments on commit 8a9138f

Please sign in to comment.