From 5b87984915b798dcc5052ab8f6f5ed99b5b245d6 Mon Sep 17 00:00:00 2001 From: Noa Aviel Dove Date: Fri, 16 Aug 2024 18:51:19 -0700 Subject: [PATCH] Remove synthetic file names from TDR bundle manifest entries --- .../plugins/repository/tdr_hca/__init__.py | 8 +-- ...8-d6e9-406a-aa6a-7ee886e52bf9.tdr.hca.json | 58 +++++++++---------- ...5-b3c5-5aab-ab86-51d8ce44dfbe.tdr.hca.json | 40 ++++++------- 3 files changed, 52 insertions(+), 54 deletions(-) diff --git a/src/azul/plugins/repository/tdr_hca/__init__.py b/src/azul/plugins/repository/tdr_hca/__init__.py index 0cb76e5a3..401dbde71 100644 --- a/src/azul/plugins/repository/tdr_hca/__init__.py +++ b/src/azul/plugins/repository/tdr_hca/__init__.py @@ -184,14 +184,13 @@ def canning_qualifier(cls) -> str: def add_entity(self, *, - entity_key: str, entity: EntityReference, row: BigQueryRow, is_stitched: bool ) -> None: if is_stitched: self.stitched.add(entity.entity_id) - self._add_manifest_entry(name=entity_key, + self._add_manifest_entry(name='', uuid=entity.entity_id, version=TDRPlugin.format_version(row['version']), size=row['content_size'], @@ -356,11 +355,10 @@ def _emulate_bundle(self, bundle_fqid: TDRBundleFQID) -> TDRHCABundle: rows = future.result() pk_column = entity_type + '_id' rows.sort(key=itemgetter(pk_column)) - for i, row in enumerate(rows): + for row in rows: entity = EntityReference(entity_id=row[pk_column], entity_type=entity_type) is_stitched = entity not in root_entities - bundle.add_entity(entity_key=f'{entity_type}_{i}.json', - entity=entity, + bundle.add_entity(entity=entity, row=row, is_stitched=is_stitched) else: diff --git a/test/indexer/data/1b6d8348-d6e9-406a-aa6a-7ee886e52bf9.tdr.hca.json b/test/indexer/data/1b6d8348-d6e9-406a-aa6a-7ee886e52bf9.tdr.hca.json index b942c7df5..38715f55a 100644 --- a/test/indexer/data/1b6d8348-d6e9-406a-aa6a-7ee886e52bf9.tdr.hca.json +++ b/test/indexer/data/1b6d8348-d6e9-406a-aa6a-7ee886e52bf9.tdr.hca.json @@ -12,7 +12,7 @@ "drs_uri": "drs://mock_tdr.lan/v1_d8c20944-739f-4e7d-9161-b720953432ce_1d239f78-f3e6-5dfb-aeb0-f50d8d97b51c" }, { - "name": "sequencing_protocol_0.json", + "name": "", "uuid": "08b09d15-ef99-4278-9b88-65127adb51b9", "version": "2019-09-20T13:43:51.921000Z", "content-type": "application/json; dcp-type=\"metadata/protocol\"", @@ -35,7 +35,7 @@ "drs_uri": "drs://mock_tdr.lan/v1_d8c20944-739f-4e7d-9161-b720953432ce_c82292d3-61d1-5785-b8dc-24b9365df5bd" }, { - "name": "project_0.json", + "name": "", "uuid": "116965f3-f094-4769-9d28-ae675c1b569c", "version": "2019-10-03T10:54:38.894000Z", "content-type": "application/json; dcp-type=\"metadata/project\"", @@ -45,7 +45,7 @@ "sha256": "" }, { - "name": "sequence_file_0.json", + "name": "", "uuid": "133f2a25-f3aa-49a7-b9b6-d185d302fb26", "version": "2019-09-20T13:51:28.379000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -55,7 +55,7 @@ "sha256": "" }, { - "name": "process_0.json", + "name": "", "uuid": "15de6ce1-c185-4852-ade4-43614c5d048b", "version": "2019-09-20T13:43:52.606000Z", "content-type": "application/json; dcp-type=\"metadata/process\"", @@ -102,7 +102,7 @@ "drs_uri": "drs://mock_tdr.lan/v1_d8c20944-739f-4e7d-9161-b720953432ce_83e89c54-4fc6-58b0-800c-3788539c41e7" }, { - "name": "supplementary_file_0.json", + "name": "", "uuid": "22561f93-d5e0-57eb-a54e-2d9d565cc234", "version": "2019-09-24T09:35:06.958773Z", "size": 353, @@ -112,7 +112,7 @@ "sha256": "" }, { - "name": "sequence_file_1.json", + "name": "", "uuid": "23f8e139-f4fa-4476-ba59-ff5d37003c34", "version": "2019-09-20T13:50:08.455000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -122,7 +122,7 @@ "sha256": "" }, { - "name": "sequence_file_2.json", + "name": "", "uuid": "27fc1a2e-d70e-47ee-a4b7-92bf57e5b7a6", "version": "2019-09-20T13:50:35.338000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -145,7 +145,7 @@ "drs_uri": "drs://mock_tdr.lan/v1_d8c20944-739f-4e7d-9161-b720953432ce_dba31e1c-1b9c-56ae-af74-16b00b8144d2" }, { - "name": "library_preparation_protocol_0.json", + "name": "", "uuid": "2945bb1f-90de-42a3-afa1-f57a62c853f0", "version": "2019-09-20T13:43:52.178000Z", "content-type": "application/json; dcp-type=\"metadata/protocol\"", @@ -155,7 +155,7 @@ "sha256": "" }, { - "name": "donor_organism_0.json", + "name": "", "uuid": "2b7adb0a-82a4-4319-80d1-4a73d879dec1", "version": "2019-09-20T13:43:52.612000Z", "content-type": "application/json; dcp-type=\"metadata/biomaterial\"", @@ -165,7 +165,7 @@ "sha256": "" }, { - "name": "enrichment_protocol_0.json", + "name": "", "uuid": "345bcbd2-f3a7-4f57-a806-fbb0ce4a25da", "version": "2019-09-20T13:43:52.381000Z", "content-type": "application/json; dcp-type=\"metadata/protocol\"", @@ -175,7 +175,7 @@ "sha256": "" }, { - "name": "sequence_file_3.json", + "name": "", "uuid": "35e07477-76cb-4a92-99b6-7b3bbb02ec06", "version": "2019-09-20T13:49:42.254000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -185,7 +185,7 @@ "sha256": "" }, { - "name": "sequence_file_4.json", + "name": "", "uuid": "3c3d882a-cc38-4eea-ac53-0885f906852e", "version": "2019-09-20T13:52:19.666000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -195,7 +195,7 @@ "sha256": "" }, { - "name": "sequence_file_5.json", + "name": "", "uuid": "3dffc456-caaf-4823-9f65-fc0db7abacbc", "version": "2019-09-20T13:49:42.771000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -205,7 +205,7 @@ "sha256": "" }, { - "name": "process_1.json", + "name": "", "uuid": "47ca0565-ab7d-42bd-8731-ce5d30cfd6c7", "version": "2019-09-20T13:43:52.438000Z", "content-type": "application/json; dcp-type=\"metadata/process\"", @@ -215,7 +215,7 @@ "sha256": "" }, { - "name": "supplementary_file_1.json", + "name": "", "uuid": "507d2814-1688-54e7-b73e-2f831aa34368", "version": "2019-09-24T09:35:06.958773Z", "size": 353, @@ -236,7 +236,7 @@ "drs_uri": "drs://mock_tdr.lan/v1_d8c20944-739f-4e7d-9161-b720953432ce_9d6f268f-f484-5381-9095-f0998fa0c961" }, { - "name": "sequence_file_6.json", + "name": "", "uuid": "5f68ea73-350b-4d37-a341-3004139fb699", "version": "2019-09-20T13:50:35.660000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -246,7 +246,7 @@ "sha256": "" }, { - "name": "sequence_file_7.json", + "name": "", "uuid": "63051b9f-87d4-4853-a4a0-5cc742ef6e72", "version": "2019-09-20T13:50:59.836000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -256,7 +256,7 @@ "sha256": "" }, { - "name": "sequence_file_8.json", + "name": "", "uuid": "6ca8e43f-5f40-4eb9-bfa9-294c54dbeade", "version": "2019-09-20T13:52:17.560000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -266,7 +266,7 @@ "sha256": "" }, { - "name": "cell_line_0.json", + "name": "", "uuid": "70054038-cb12-429d-bb37-69185ff1deee", "version": "2019-09-20T13:43:52.455000Z", "content-type": "application/json; dcp-type=\"metadata/biomaterial\"", @@ -276,7 +276,7 @@ "sha256": "" }, { - "name": "collection_protocol_0.json", + "name": "", "uuid": "8612862e-beb1-416b-a807-0a83fae8d168", "version": "2019-09-20T13:43:52.146000Z", "content-type": "application/json; dcp-type=\"metadata/protocol\"", @@ -299,7 +299,7 @@ "drs_uri": "drs://mock_tdr.lan/v1_d8c20944-739f-4e7d-9161-b720953432ce_a5e4e698-1d4c-5a5d-8d7c-5c84ea0f30d8" }, { - "name": "cell_suspension_0.json", + "name": "", "uuid": "906f8f09-deaa-40bb-bf50-27d2336b2dc4", "version": "2019-09-20T13:43:52.387000Z", "content-type": "application/json; dcp-type=\"metadata/biomaterial\"", @@ -309,7 +309,7 @@ "sha256": "" }, { - "name": "sequence_file_9.json", + "name": "", "uuid": "98ee0901-d34c-48a5-915a-0511bac325ec", "version": "2019-09-20T13:53:16.941000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -319,7 +319,7 @@ "sha256": "" }, { - "name": "sequence_file_10.json", + "name": "", "uuid": "a2f4b05d-0dae-4655-acfd-ef3b91ae0ad0", "version": "2019-09-20T13:50:08.492000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -342,7 +342,7 @@ "drs_uri": "drs://mock_tdr.lan/v1_d8c20944-739f-4e7d-9161-b720953432ce_065dad63-3e05-5738-8ba5-9925d5d73d5e" }, { - "name": "supplementary_file_2.json", + "name": "", "uuid": "c343a47d-683f-571d-99c4-1331841b4e63", "version": "2019-09-24T09:35:06.958773Z", "size": 353, @@ -378,7 +378,7 @@ "drs_uri": "drs://mock_tdr.lan/v1_d8c20944-739f-4e7d-9161-b720953432ce_e6c1d2ce-d6ad-500e-8682-626a555992ab" }, { - "name": "process_2.json", + "name": "", "uuid": "c5cf9180-535a-4d1c-bdee-4f6ce7845b1e", "version": "2019-09-20T13:43:52.368000Z", "content-type": "application/json; dcp-type=\"metadata/process\"", @@ -388,7 +388,7 @@ "sha256": "" }, { - "name": "specimen_from_organism_0.json", + "name": "", "uuid": "d6a518a8-0c5d-4cb0-aed5-68f3455c2bda", "version": "2019-09-20T13:43:52.654000Z", "content-type": "application/json; dcp-type=\"metadata/biomaterial\"", @@ -411,7 +411,7 @@ "drs_uri": "drs://mock_tdr.lan/v1_d8c20944-739f-4e7d-9161-b720953432ce_bb455f5a-a70f-5155-85a0-46f475856e9f" }, { - "name": "process_3.json", + "name": "", "uuid": "df5114c9-9f52-451b-afde-3673392fc864", "version": "2019-09-20T13:43:52.607000Z", "content-type": "application/json; dcp-type=\"metadata/process\"", @@ -421,7 +421,7 @@ "sha256": "" }, { - "name": "sequence_file_11.json", + "name": "", "uuid": "e46af70a-0396-41db-9b74-330cb8244124", "version": "2019-09-20T13:52:19.118000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -444,7 +444,7 @@ "drs_uri": "drs://mock_tdr.lan/v1_d8c20944-739f-4e7d-9161-b720953432ce_f140bd59-16c2-5082-aaf6-1c929e7063c6" }, { - "name": "dissociation_protocol_0.json", + "name": "", "uuid": "eaf15851-97e3-4e4b-b81b-0e625098f4d5", "version": "2019-09-20T13:43:52.177000Z", "content-type": "application/json; dcp-type=\"metadata/protocol\"", diff --git a/test/indexer/data/4426adc5-b3c5-5aab-ab86-51d8ce44dfbe.tdr.hca.json b/test/indexer/data/4426adc5-b3c5-5aab-ab86-51d8ce44dfbe.tdr.hca.json index ab314980f..a7ab1c01b 100644 --- a/test/indexer/data/4426adc5-b3c5-5aab-ab86-51d8ce44dfbe.tdr.hca.json +++ b/test/indexer/data/4426adc5-b3c5-5aab-ab86-51d8ce44dfbe.tdr.hca.json @@ -1,7 +1,7 @@ { "manifest": [ { - "name": "supplementary_file_0.json", + "name": "", "uuid": "01a1d04b-05d0-4904-b627-68b0dc02bc17", "version": "2019-05-14T11:01:09.564000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -11,7 +11,7 @@ "sha256": "" }, { - "name": "analysis_protocol_0.json", + "name": "", "uuid": "099457a1-a453-54c6-aae5-b70dafa33bbd", "version": "2020-08-10T21:24:26.174274Z", "content-type": "application/json; dcp-type=\"metadata/protocol\"", @@ -21,7 +21,7 @@ "sha256": "" }, { - "name": "project_0.json", + "name": "", "uuid": "116965f3-f094-4769-9d28-ae675c1b569c", "version": "2019-10-03T10:54:38.894000Z", "content-type": "application/json; dcp-type=\"metadata/project\"", @@ -31,7 +31,7 @@ "sha256": "" }, { - "name": "cell_suspension_0.json", + "name": "", "uuid": "201ae4d0-c7c6-4777-947f-18f5268884b6", "version": "2019-05-14T12:06:23.503000Z", "content-type": "application/json; dcp-type=\"metadata/biomaterial\"", @@ -52,7 +52,7 @@ "sha256": "1bb86ee570e328dcd72a684689ddd87c966209cfc4b2b5c36e50dd0e4c156cab" }, { - "name": "donor_organism_0.json", + "name": "", "uuid": "3211ef8f-a60c-4892-ae55-91d6752dcd75", "version": "2019-05-14T11:29:52.575000Z", "content-type": "application/json; dcp-type=\"metadata/biomaterial\"", @@ -62,7 +62,7 @@ "sha256": "" }, { - "name": "process_0.json", + "name": "", "uuid": "33e31f6f-fe78-45bd-ae57-10d2d14253cf", "version": "2019-05-14T12:29:57.858000Z", "content-type": "application/json; dcp-type=\"metadata/process\"", @@ -72,7 +72,7 @@ "sha256": "" }, { - "name": "library_preparation_protocol_0.json", + "name": "", "uuid": "5b503dcb-dca6-4e4f-988b-f7100c030dc5", "version": "2019-05-14T11:06:42.648000Z", "content-type": "application/json; dcp-type=\"metadata/protocol\"", @@ -82,7 +82,7 @@ "sha256": "" }, { - "name": "specimen_from_organism_0.json", + "name": "", "uuid": "5d82b884-7db4-47fa-9ccd-03054065c509", "version": "2019-05-14T11:59:17.317000Z", "content-type": "application/json; dcp-type=\"metadata/biomaterial\"", @@ -105,7 +105,7 @@ "s3_etag": "7e892bf8f6aa489ccb08a995c7f017e1" }, { - "name": "process_1.json", + "name": "", "uuid": "6f5ddf2c-382d-4c0a-a506-3a5286067c83", "version": "2019-05-14T12:20:34.464000Z", "content-type": "application/json; dcp-type=\"metadata/process\"", @@ -115,7 +115,7 @@ "sha256": "" }, { - "name": "analysis_file_0.json", + "name": "", "uuid": "7a015307-6db9-541f-aaae-db9fdb41000f", "version": "2020-08-10T21:24:26.174274Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -125,7 +125,7 @@ "sha256": "" }, { - "name": "analysis_process_0.json", + "name": "", "uuid": "848c816e-4ae8-4c9a-9e3d-ebe447519ff4", "version": "2020-08-10T21:24:26.174274Z", "content-type": "application/json; dcp-type=\"metadata/process\"", @@ -135,7 +135,7 @@ "sha256": "" }, { - "name": "sequence_file_0.json", + "name": "", "uuid": "8f8b9587-237f-4995-9461-c96eac53d615", "version": "2019-05-14T12:18:53.532000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -145,7 +145,7 @@ "sha256": "" }, { - "name": "supplementary_file_1.json", + "name": "", "uuid": "a06cb5d5-2675-4d64-aeb8-79e0103715f3", "version": "2019-05-14T11:01:09.596000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -155,7 +155,7 @@ "sha256": "" }, { - "name": "analysis_file_1.json", + "name": "", "uuid": "b8b76328-854c-5862-a688-9b761a9cea86", "version": "2020-08-10T21:24:26.174274Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -178,7 +178,7 @@ "s3_etag": "2742e1e78f6d4663bf41d3080396695c" }, { - "name": "process_2.json", + "name": "", "uuid": "c0d18b81-0b54-4a11-864b-cd3f43b7e7bf", "version": "2019-05-14T12:26:53.666000Z", "content-type": "application/json; dcp-type=\"metadata/process\"", @@ -212,7 +212,7 @@ "s3_etag": "846fd9e6b98041df46a1ddb94e85b6b9" }, { - "name": "enrichment_protocol_0.json", + "name": "", "uuid": "dd7f3d64-4b79-48f0-9d8c-4324594fe820", "version": "2019-05-14T11:06:42.695000Z", "content-type": "application/json; dcp-type=\"metadata/protocol\"", @@ -222,7 +222,7 @@ "sha256": "" }, { - "name": "supplementary_file_2.json", + "name": "", "uuid": "e738a267-87fc-4070-abc7-b3be6442c6d0", "version": "2019-05-14T11:01:15.816000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -245,7 +245,7 @@ "s3_etag": "8786a82f739d1848aa5be480faf65e70" }, { - "name": "sequencing_protocol_0.json", + "name": "", "uuid": "eca2ab79-ad61-411f-815a-4f6d936d992b", "version": "2019-05-14T11:06:42.648000Z", "content-type": "application/json; dcp-type=\"metadata/protocol\"", @@ -255,7 +255,7 @@ "sha256": "" }, { - "name": "sequence_file_1.json", + "name": "", "uuid": "ee7404a4-c183-4a75-8bd1-34b5fc140e81", "version": "2019-05-14T12:13:41.926000Z", "content-type": "application/json; dcp-type=\"metadata/file\"", @@ -278,7 +278,7 @@ "s3_etag": "6aca4df2ef18b97243967887837c3bef" }, { - "name": "dissociation_protocol_0.json", + "name": "", "uuid": "fcba26aa-658c-4120-ab31-cc5a5a00f759", "version": "2019-05-14T11:06:42.781000Z", "content-type": "application/json; dcp-type=\"metadata/protocol\"",