From d057e78efbebc5c4e829beaef82e70e7a9f33eda Mon Sep 17 00:00:00 2001 From: Noa Aviel Dove Date: Wed, 10 Apr 2024 00:50:41 -0700 Subject: [PATCH] Include `replica_type` in JSONL manifest --- src/azul/service/manifest_service.py | 8 ++++++-- test/service/test_manifest.py | 26 ++++++++++++++++---------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/azul/service/manifest_service.py b/src/azul/service/manifest_service.py index 4962932b2..9d432be00 100644 --- a/src/azul/service/manifest_service.py +++ b/src/azul/service/manifest_service.py @@ -2067,7 +2067,7 @@ def _all_replicas(self) -> Iterable[JSON]: replica_id = replica.meta.id if replica_id not in emitted_replica_ids: num_new_replicas += 1 - yield replica.contents.to_dict() + yield replica.to_dict() # Note that this will be zero for replicas that use implicit # hubs, in which case there are actually many hubs explicit_hub_count = len(replica.hub_ids) @@ -2113,6 +2113,10 @@ def create_file(self) -> tuple[str, Optional[str]]: os.close(fd) with open(path, 'w') as f: for replica in self._all_replicas(): - json.dump(replica, f) + entry = { + 'contents': replica['contents'], + 'type': replica['replica_type'] + } + json.dump(entry, f) f.write('\n') return path, None diff --git a/test/service/test_manifest.py b/test/service/test_manifest.py index 18ef4928c..8c5ac5d5f 100644 --- a/test/service/test_manifest.py +++ b/test/service/test_manifest.py @@ -1317,15 +1317,18 @@ def test_manifest_content_disposition_header(self): @manifest_test def test_verbatim_jsonl_manifest(self): expected = [ - bundle.metadata_files[d] + { + 'type': replica_type, + 'contents': bundle.metadata_files[key], + } for bundle in map(self._load_canned_bundle, self.bundles()) - for d in [ - 'links.json', - 'cell_suspension_0.json', - 'project_0.json', - 'sequence_file_0.json', - 'sequence_file_1.json', - 'specimen_from_organism_0.json' + for replica_type, key in [ + ('links', 'links.json'), + ('cell_suspension', 'cell_suspension_0.json'), + ('project', 'project_0.json'), + ('file', 'sequence_file_0.json'), + ('file', 'sequence_file_1.json'), + ('sample', 'specimen_from_organism_0.json') ] ] response = self._get_manifest(ManifestFormat.verbatim_jsonl, {}) @@ -2058,8 +2061,11 @@ def test_verbatim_jsonl_manifest(self): response = self._get_manifest(ManifestFormat.verbatim_jsonl, filters={}) self.assertEqual(200, response.status_code) expected = [ - entity + { + 'type': 'anvil_' + entity_ref.entity_type, + 'contents': entity, + } for bundle in self.bundles() - for entity in self._load_canned_bundle(bundle).entities.values() + for entity_ref, entity in self._load_canned_bundle(bundle).entities.items() ] self._assert_jsonl(expected, response)