Skip to content

Commit

Permalink
[p] Remove manifest entries for HCA metadata entities (partial #6299)
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Aug 30, 2024
1 parent 30c5704 commit 898dda5
Show file tree
Hide file tree
Showing 64 changed files with 9 additions and 44,154 deletions.
10 changes: 0 additions & 10 deletions src/azul/plugins/repository/tdr_hca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,6 @@ def add_entity(self,
) -> None:
if is_stitched:
self.stitched.add(entity.entity_id)
self._add_manifest_entry(name='',
uuid=entity.entity_id,
version=TDRPlugin.format_version(row['version']),
size=row['content_size'],
content_type='application/json',
dcp_type=f'"metadata/{row["schema_type"]}"')
if entity.entity_type.endswith('_file'):
descriptor = json.loads(row['descriptor'])
self._add_manifest_entry(name=row['file_name'],
Expand All @@ -212,9 +206,6 @@ def add_entity(self,
else content)

metadata_columns: ClassVar[set[str]] = {
'version',
'JSON_EXTRACT_SCALAR(content, "$.schema_type") AS schema_type',
'BYTE_LENGTH(content) AS content_size',
'content'
}

Expand Down Expand Up @@ -462,7 +453,6 @@ def _retrieve_entities(self,
else TDRHCABundle.data_columns if entity_type.endswith('_file')
else TDRHCABundle.metadata_columns
)
assert version_column in non_pk_columns
table_name = backtick(self._full_table_name(source, entity_type))
entity_id_type = one(set(map(type, entity_ids)))

Expand Down
16 changes: 1 addition & 15 deletions src/humancellatlas/data/metadata/helpers/staging_area.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,25 +157,11 @@ def get_bundle_parts(self, subgraph_id: str) -> tuple[str, MutableJSONs, Mutable
entity_ids_by_type = self._entity_ids_by_type(subgraph_id)
for entity_type, entity_ids in entity_ids_by_type.items():
# Sort entity_ids to produce the same ordering on multiple runs
for i, entity_id in enumerate(sorted(entity_ids)):
json_file_name = f'{entity_type}_{i}.json'
for entity_id in sorted(entity_ids):
metadata_file = self.metadata[entity_id]
json_content = metadata_file.content
key = str(EntityReference(entity_type=entity_type, entity_id=entity_id))
metadata[key] = json_content
file_manifest = {
'content-type': 'application/json;',
'crc32c': '0' * 8,
'indexed': True,
'name': json_file_name,
's3_etag': None,
'sha1': None,
'sha256': '0' * 64,
'size': len(json.dumps(json_content)),
'uuid': metadata_file.uuid,
'version': metadata_file.version
}
manifest.append(file_manifest)
if entity_type.endswith('_file'):
file_manifest = self.descriptors[entity_id].manifest_entry
manifest.append(file_manifest)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,113 +1,5 @@
{
"manifest": [
{
"crc32c": "1C09FCE0",
"sha1": "f1aeb2b94ce28ee524388730c9b63b7dafc895c7",
"sha256": "b3f554280c892aecfc235a64f924843b2484f73cb0cd21aacdd1d796383a6d69",
"s3_etag": "bceb1c61bab478faf3f70e0ff832b4c0",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "cell_suspension_0.json",
"size": 770,
"uuid": "84354fad-8890-41df-b808-48f9ed4e1cc3",
"version": "1"
},
{
"crc32c": "28EA16F2",
"sha1": "7cc7ac34f3c2c00befb398a63b5303d636ca4711",
"sha256": "de429783e1308851f7f5e8320d8612e7c1ad90975698fec9a481c836db2e9543",
"s3_etag": "61eb8b831af8c1fdf1105ee9cb66fb16",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "dissociation_protocol_0.json",
"size": 711,
"uuid": "30cf9805-82f5-4725-a7ad-7411c276b4a9",
"version": "1"
},
{
"crc32c": "C2C08857",
"sha1": "8f1b7481a5dc77f3a6c90530a957952102a92e35",
"sha256": "94be68544f8a5a0171fd918e070150a29fa1c8f4380ec351a302bb56b980b491",
"s3_etag": "d98de4db8b42b8def2aa2591da2f8569",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "donor_organism_0.json",
"size": 1358,
"uuid": "491a22fb-3a6c-4a8f-9c5b-f009e080d280",
"version": "1"
},
{
"crc32c": "0B85C35C",
"sha1": "dd9a81c11165c742d94b1cb8561e3789854a8ca7",
"sha256": "e215aceb61560e510592eb1f4247c501ea0d756659bd781d995df0546ecb9696",
"s3_etag": "abc3a4a3dbec4beab0e2cacf50b831c4",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "library_preparation_protocol_0.json",
"size": 1087,
"uuid": "821601f1-64e8-4427-8680-34a743b35b69",
"version": "1"
},
{
"crc32c": "522D96A7",
"sha1": "9876adb67827ea0328566811a1304bcf49a31b02",
"sha256": "ecc1b1a2cd924933311c28e9b00f841c0beaa918f3c29722a36ff3fa6600440b",
"s3_etag": "4f939c7dae692b468e4741b56ccfbe3e",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "process_0.json",
"size": 386,
"uuid": "c4175b47-1767-48c5-b408-f9e97c01f94e",
"version": "1"
},
{
"crc32c": "51B043A0",
"sha1": "9b17c9cd8e3d7c3bc4685e24bbcba9e528792d05",
"sha256": "98d9878e05b6a56fed6d00153aa5c0664de07185994e2cf080ec1be4b4776933",
"s3_etag": "8ff1e91a29e51311d5252889599eb709",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "process_1.json",
"size": 386,
"uuid": "88b3b728-e9c6-4b1b-a030-97f6670469c2",
"version": "1"
},
{
"crc32c": "1FAD27D0",
"sha1": "b2a61619fa1922cb899d993aace4fa0f6b30a232",
"sha256": "31eab93a5c0e39c58ee16d16c0ce5331e2185cc271891fbd63e7f62a75336e4b",
"s3_etag": "a781ef10618d4bd5c11bf5bb923d4d12",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "process_2.json",
"size": 384,
"uuid": "317b3c35-e0f6-48d6-9154-80db47ff8f20",
"version": "1"
},
{
"crc32c": "68AEC241",
"sha1": "1296d3c9b98cef091f1aecd6d9df25fa7f780d5c",
"sha256": "c9cd608014bb3cfd7b0ab891cd597272f0e860f0d5e7f0eab38338d828ba2bec",
"s3_etag": "93f27be5baa191e55406b1b54af4bd1e",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "project_0.json",
"size": 6018,
"uuid": "a34d2ddc-1433-4a3c-b1de-ad3a26adca9e",
"version": "1"
},
{
"crc32c": "29B559F9",
"sha1": "0587673d7d88efa40a6f4d25d34d2eba14cf8e8b",
"sha256": "df8b88b6f32f494a53687f79724a6ebd8bbb9c5c0474b9a857d467a0bdb83c2d",
"s3_etag": "ef8edc4bf0ba07805a052cd7686b5ea7",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "sequence_file_0.json",
"size": 566,
"uuid": "6832e250-33c6-469a-b105-efa0d6f61309",
"version": "1"
},
{
"crc32c": "29B559F9",
"sha1": "0587673d7d88efa40a6f4d25d34d2eba14cf8e8b",
Expand All @@ -119,30 +11,6 @@
"size": 566,
"uuid": "c8a4ea32-6d66-48f3-b480-9421743b9c0a",
"version": "1"
},
{
"crc32c": "5B68EDBD",
"sha1": "189a903531d1b9fe78e035945ed29bc004636526",
"sha256": "87854958090c0868f38be4ac5c7aa2324ff027b2b4803a5a2fd9a8eb01af4f21",
"s3_etag": "2c0a2cf2cc963e8f01e85cc324763c4c",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "sequencing_protocol_0.json",
"size": 864,
"uuid": "86417b00-f90f-409d-a9dd-717d077c3cf4",
"version": "1"
},
{
"crc32c": "290F5DCB",
"sha1": "3d4837dacac05df3adb29fa05874a5841b4661f7",
"sha256": "62e38c88e4038b73e52abafeb08440ccb6eb4417c1f56fd3fc7bc10a2aec32ff",
"s3_etag": "9512af43df021d47adc83a34d49046b9",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "specimen_from_organism_0.json",
"size": 852,
"uuid": "169096e5-f310-4c67-9a70-e139a3576e9e",
"version": "1"
}
],
"metadata": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,149 +1,5 @@
{
"manifest": [
{
"crc32c": "A78360AF",
"sha1": "e71327f467a43cca405794f5bf59856d78d93243",
"sha256": "f3c7f5fa20f3a57093de7e3d4afebfe0e79e0fc4199787f0527b83cbd548a462",
"s3_etag": "06621a64a5764cd05279e6de5c45940d",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "cell_suspension_0.json",
"size": 1084,
"uuid": "23e6c6f8-8bc4-4f18-81fd-1803e5628f6e",
"version": "1"
},
{
"crc32c": "D8097612",
"sha1": "f7eef7c5deab2669f7f19bc8b136564481dce1fa",
"sha256": "13949d71db41b28215ef855e41b63c16b88908eda26a24de405955f678125632",
"s3_etag": "8bf3ce1178c2204273571e1b4840ae25",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "collection_protocol_0.json",
"size": 698,
"uuid": "49c1ee68-51ea-476e-8432-c7419adb366b",
"version": "1"
},
{
"crc32c": "8A734C29",
"sha1": "94744afce8a76a828338a74cd1e0448b37ed9968",
"sha256": "d0a8f924a7861d9022a2199f106e35a643a0d1d0bb5d56f6d1201cd2890a850d",
"s3_etag": "ee2968d79156fdb69de1a6ef61af7fea",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "dissociation_protocol_0.json",
"size": 759,
"uuid": "35cf0ab5-c983-4da8-bb1c-c6504d74db8e",
"version": "1"
},
{
"crc32c": "477C14C4",
"sha1": "69e19c2322780bb6c31140214036e7286a66c022",
"sha256": "17a1c4f8a5f28028654a3e279d97695c6f3387a6ce20dd744381a2d8d4beae2b",
"s3_etag": "ee99d4e6d6da4ffdf0b321cdcf9d229d",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "donor_organism_0.json",
"size": 1129,
"uuid": "b7ef3cae-d65e-474d-ab6c-adbed59ed1ff",
"version": "1"
},
{
"crc32c": "FCDAF214",
"sha1": "23f4a2abf4456b2f2939645c7a1b65de59f460e6",
"sha256": "ba0c9eaf5361a1b3b8ec71f0050c01951cf756aff725b7bbe8ca5ec08c2e8305",
"s3_etag": "918e63ffedd853d5cb5418c89ec71f3c",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "enrichment_protocol_0.json",
"size": 948,
"uuid": "67f56a17-0a66-42c6-aac6-5aebe13aaf89",
"version": "1"
},
{
"crc32c": "20D5D16C",
"sha1": "5ce58077eed436a134c1a4baf4a838269e74e8bc",
"sha256": "0efe158e06b9d22ba10c72173ba3d175c5d33f3e0b1665a88cb49a1e90bc19f6",
"s3_etag": "fa5cd0c751962d1691218e5a99b02b14",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "enrichment_protocol_1.json",
"size": 1294,
"uuid": "5ee5c81d-0ad3-4351-bac5-f25f800492c9",
"version": "1"
},
{
"crc32c": "86BE78FD",
"sha1": "380c119013406240db0cd5521be3cefdcb25f860",
"sha256": "85aee1c90518a16173a03cc9b8ada0fbfda272ea302c6365a513cf50fad8f5e6",
"s3_etag": "bf171fb356c5cb9e4400794c1cb47f82",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "library_preparation_protocol_0.json",
"size": 1471,
"uuid": "5910c207-1093-4ef1-9537-dc614d128d14",
"version": "1"
},
{
"crc32c": "17049723",
"sha1": "df812b18c751efd55e21318be9411200f3066513",
"sha256": "4e82048876caa5f5def97e23c481e864d53cbd44e9692e4c75fd00ff3d51ecf6",
"s3_etag": "e3033356747551134f923f4ae17bc8f2",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "process_0.json",
"size": 461,
"uuid": "53e9b35d-0dca-49c8-b08b-e9dd90291107",
"version": "1"
},
{
"crc32c": "E8C13AED",
"sha1": "0c1fad482baa3f7de2e1e6dc189eca315ac5094e",
"sha256": "2218a054a130618a97023537f926f7642809b9c3b8f933ac8ab824e8d90f5852",
"s3_etag": "f3836ec07acacce1bf68b9471e9f8d53",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "process_1.json",
"size": 385,
"uuid": "42f32ec2-8e7a-41bc-9a22-0d35e4d7dac3",
"version": "1"
},
{
"crc32c": "455BBFEC",
"sha1": "99aa6318164b67ffd9834d8e063359d12d05dcc5",
"sha256": "4f4e89bb9d0b2c5939db92ab6eb57d53fe04852a444e2a8543011108bb41dafe",
"s3_etag": "f5132847fc461d27d1c1d7c242b4ad5b",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "process_2.json",
"size": 426,
"uuid": "f6d4ed60-9bba-41fc-a0cb-bcc3286cc00e",
"version": "1"
},
{
"crc32c": "25B4475B",
"sha1": "4feabdd41e56f1ea1f7b0dd77a572fe0d2a7658a",
"sha256": "068673c5372937bf982f7065ba62d012016a5c6bd20126d0ba6026eb0f6d9cb8",
"s3_etag": "14687d2a59cefa3d8ce35d70cddf4a46",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "project_0.json",
"size": 5017,
"uuid": "a6ba8ac5-e314-4cae-8afc-a2293e401748",
"version": "1"
},
{
"crc32c": "E0404A01",
"sha1": "2d85a788acb4a291d5d13fb918dfbb25d4205de9",
"sha256": "895c57e9ca2bccbcd306086fd691c55b8e9303a864f1791d9a924b599a3c5313",
"s3_etag": "2f991645e73f650f6ad740cea4202d04",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "sequence_file_0.json",
"size": 519,
"uuid": "358e8e9e-88c3-46d3-9e85-6ec3e714dd30",
"version": "1"
},
{
"crc32c": "E0404A01",
"sha1": "2d85a788acb4a291d5d13fb918dfbb25d4205de9",
Expand All @@ -155,30 +11,6 @@
"size": 519,
"uuid": "6ac13e04-d123-42de-bed9-f874b0d2fed2",
"version": "1"
},
{
"crc32c": "EDCA012A",
"sha1": "7af61784e4161bcf1ab7a5de9e36a8b1fdad8f24",
"sha256": "d6eef7948662074a17a9d6cd18f68f52f124f484707e8701d9d66efd1eb9fc62",
"s3_etag": "f110de13025c205035da128277811b0b",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "sequencing_protocol_0.json",
"size": 972,
"uuid": "d9744965-5fbc-4da2-8e6b-f1c991948a3c",
"version": "1"
},
{
"crc32c": "ECC85615",
"sha1": "c0630167c6cf904df7c4c1d6bbe073e0f2a03bce",
"sha256": "fe538370762296480db4c7ee3c440bbaa707c72aa4968e493b9c9b4ee427dc01",
"s3_etag": "a24f4b29940b6dd27f7b8cf724b2f078",
"content-type": "application/json; dcp-type=\"metadata/json\"",
"indexed": true,
"name": "specimen_from_organism_0.json",
"size": 1102,
"uuid": "564ed776-1edb-4b3e-b697-6af9174ed98a",
"version": "1"
}
],
"metadata": {
Expand Down
Loading

0 comments on commit 898dda5

Please sign in to comment.