From 924805f0146b81d42ece5c6c3e426cc05983823b Mon Sep 17 00:00:00 2001 From: Noa Aviel Dove Date: Fri, 22 Mar 2024 18:54:58 -0700 Subject: [PATCH] fixup! Add JSONL-based verbatim manifest format (#6028) --- src/azul/plugins/__init__.py | 5 ++++- src/azul/service/manifest_service.py | 2 +- test/service/test_manifest.py | 10 +++++----- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/azul/plugins/__init__.py b/src/azul/plugins/__init__.py index 44de233ad2..212f4d6733 100644 --- a/src/azul/plugins/__init__.py +++ b/src/azul/plugins/__init__.py @@ -409,7 +409,10 @@ def source_id_field(self) -> str: def implicit_hub_type(self) -> str: """ The type of entities that do not explicitly track their hubs in replica - documents. + documents in order to avoid a large list of hub references in the + replica document, and to avoid contention when updating that list during + indexing. Note that this is not a type of hub entities, but rather the + type of replica entities that have implicit hubs. """ raise NotImplementedError diff --git a/src/azul/service/manifest_service.py b/src/azul/service/manifest_service.py index 8f5f496f20..b777765516 100644 --- a/src/azul/service/manifest_service.py +++ b/src/azul/service/manifest_service.py @@ -2096,7 +2096,7 @@ def _join_replicas(self, keys: Iterable[ReplicaKeys]) -> Iterable[Hit]: return request.scan() def create_file(self) -> tuple[str, Optional[str]]: - fd, path = mkstemp() + fd, path = mkstemp(suffix=f'.{self.file_name_extension()}') os.close(fd) with open(path, 'w') as f: for replica in self._all_replicas(): diff --git a/test/service/test_manifest.py b/test/service/test_manifest.py index f971fa9a1b..4e7736e960 100644 --- a/test/service/test_manifest.py +++ b/test/service/test_manifest.py @@ -1278,7 +1278,7 @@ def test_manifest_content_disposition_header(self): @manifest_test def test_verbatim_jsonl_manifest(self): bundle = self._load_canned_bundle(one(self.bundles())) - expected_contents = [ + expected = [ bundle.metadata_files[d] for d in [ 'cell_suspension_0.json', @@ -1290,14 +1290,14 @@ def test_verbatim_jsonl_manifest(self): ] response = self._get_manifest(ManifestFormat.verbatim_jsonl, {}) self.assertEqual(200, response.status_code) - response_contents = list(map(json.loads, response.content.decode().splitlines())) + response = list(map(json.loads, response.content.decode().splitlines())) def sort_key(hca_doc: JSON) -> str: return hca_doc['provenance']['document_id'] - expected_contents.sort(key=sort_key) - response_contents.sort(key=sort_key) - self.assertEqual(expected_contents, response_contents) + expected.sort(key=sort_key) + response.sort(key=sort_key) + self.assertEqual(expected, response) class TestManifestCache(ManifestTestCase):