From 5b0e0fe0478d46e325cd2cd24e627c138eba9809 Mon Sep 17 00:00:00 2001 From: Noa Aviel Dove Date: Wed, 24 Jan 2024 16:42:14 -0800 Subject: [PATCH] [a r] Support for HCA bionetworks and atlases (#5519) --- lambdas/service/app.py | 2 +- lambdas/service/openapi.json | 491 +++++++++++++++++- src/azul/plugins/metadata/hca/__init__.py | 10 +- .../plugins/metadata/hca/indexer/aggregate.py | 3 +- .../plugins/metadata/hca/indexer/transform.py | 15 + .../plugins/metadata/hca/service/response.py | 3 + src/humancellatlas/data/metadata/api.py | 13 + ...8-658e-4c51-9de4-a768322f84c5.dss.hca.json | 27 + ...4-1075-4bbf-b96a-4d1ede0481b2.dss.hca.json | 9 + ...d.2018-11-02T11:33:44.698028Z.results.json | 31 ++ ...f-4b1c-4455-bd58-19c1a9e863b4.dss.hca.json | 3 +- test/service/data/pfb_manifest.results.json | 9 + test/service/data/pfb_manifest.schema.json | 45 ++ test/service/test_index_projects.py | 3 + test/service/test_response.py | 27 +- 15 files changed, 674 insertions(+), 17 deletions(-) diff --git a/lambdas/service/app.py b/lambdas/service/app.py index b5458fc60..ddc0ed818 100644 --- a/lambdas/service/app.py +++ b/lambdas/service/app.py @@ -228,7 +228,7 @@ # changes and reset the minor version to zero. Otherwise, increment only # the minor version for backwards compatible changes. A backwards # compatible change is one that does not require updates to clients. - 'version': '3.4' + 'version': '3.5' }, 'tags': [ { diff --git a/lambdas/service/openapi.json b/lambdas/service/openapi.json index 2d0a57c62..64253883b 100644 --- a/lambdas/service/openapi.json +++ b/lambdas/service/openapi.json @@ -3,7 +3,7 @@ "info": { "title": "azul_service", "description": "\n# Overview\n\nAzul is a REST web service for querying metadata associated with\nboth experimental and analysis data from a data repository. In order\nto deliver response times that make it suitable for interactive use\ncases, the set of metadata properties that it exposes for sorting,\nfiltering, and aggregation is limited. Azul provides a uniform view\nof the metadata over a range of diverse schemas, effectively\nshielding clients from changes in the schemas as they occur over\ntime. It does so, however, at the expense of detail in the set of\nmetadata properties it exposes and in the accuracy with which it\naggregates them.\n\nAzul denormalizes and aggregates metadata into several different\nindices for selected entity types. Metadata entities can be queried\nusing the [Index](#operations-tag-Index) endpoints.\n\nA set of indices forms a catalog. There is a default catalog called\n`dcp2` which will be used unless a\ndifferent catalog name is specified using the `catalog` query\nparameter. Metadata from different catalogs is completely\nindependent: a response obtained by querying one catalog does not\nnecessarily correlate to a response obtained by querying another\none. Two catalogs can contain metadata from the same sources or\ndifferent sources. It is only guaranteed that the body of a\nresponse by any given endpoint adheres to one schema,\nindependently of which catalog was specified in the request.\n\nAzul provides the ability to download data and metadata via the\n[Manifests](#operations-tag-Manifests) endpoints. The\n`curl` format manifests can be used to\ndownload data files. Other formats provide various views of the\nmetadata. Manifests can be generated for a selection of files using\nfilters. These filters are interchangeable with the filters used by\nthe [Index](#operations-tag-Index) endpoints.\n\nAzul also provides a [summary](#operations-Index-get_index_summary)\nview of indexed data.\n\n## Data model\n\nAny index, when queried, returns a JSON array of hits. Each hit\nrepresents a metadata entity. Nested in each hit is a summary of the\nproperties of entities associated with the hit. An entity is\nassociated either by a direct edge in the original metadata graph,\nor indirectly as a series of edges. The nested properties are\ngrouped by the type of the associated entity. The properties of all\ndata files associated with a particular sample, for example, are\nlisted under `hits[*].files` in a `/index/samples` response. It is\nimportant to note that while each _hit_ represents a discrete\nentity, the properties nested within that hit are the result of an\naggregation over potentially many associated entities.\n\nTo illustrate this, consider a data file that is part of two\nprojects (a project is a group of related experiments, typically by\none laboratory, institution or consortium). Querying the `files`\nindex for this file yields a hit looking something like:\n\n```\n{\n \"projects\": [\n {\n \"projectTitle\": \"Project One\"\n \"laboratory\": ...,\n ...\n },\n {\n \"projectTitle\": \"Project Two\"\n \"laboratory\": ...,\n ...\n }\n ],\n \"files\": [\n {\n \"format\": \"pdf\",\n \"name\": \"Team description.pdf\",\n ...\n }\n ]\n}\n```\n\nThis example hit contains two kinds of nested entities (a hit in an\nactual Azul response will contain more): There are the two projects\nentities, and the file itself. These nested entities contain\nselected metadata properties extracted in a consistent way. This\nmakes filtering and sorting simple.\n\nAlso notice that there is only one file. When querying a particular\nindex, the corresponding entity will always be a singleton like\nthis.\n", - "version": "3.4" + "version": "3.5" }, "tags": [ { @@ -1017,6 +1017,22 @@ ], "additionalProperties": false }, + "bionetworkName": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "string", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "bundleUuid": { "type": "object", "properties": { @@ -1470,6 +1486,22 @@ ], "additionalProperties": false }, + "isTissueAtlasProject": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "boolean", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "laboratory": { "type": "object", "properties": { @@ -2076,6 +2108,32 @@ ], "additionalProperties": false }, + "tissueAtlas": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "object", + "properties": { + "atlas": { + "type": "string", + "nullable": true + }, + "version": { + "type": "string", + "nullable": true + } + }, + "additionalProperties": false + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "updateDate": { "type": "object", "properties": { @@ -2123,7 +2181,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, updateDate, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow\n" }, { "name": "entity_type", @@ -2165,6 +2223,7 @@ "aggregateUpdateDate", "assayType", "biologicalSex", + "bionetworkName", "bundleUuid", "bundleVersion", "cellCount", @@ -2187,6 +2246,7 @@ "institution", "instrumentManufacturerModel", "isIntermediate", + "isTissueAtlasProject", "laboratory", "lastModifiedDate", "libraryConstructionApproach", @@ -2216,6 +2276,7 @@ "specimenOrgan", "specimenOrganPart", "submissionDate", + "tissueAtlas", "updateDate", "workflow" ] @@ -2412,6 +2473,22 @@ ], "additionalProperties": false }, + "bionetworkName": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "string", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "bundleUuid": { "type": "object", "properties": { @@ -2865,6 +2942,22 @@ ], "additionalProperties": false }, + "isTissueAtlasProject": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "boolean", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "laboratory": { "type": "object", "properties": { @@ -3471,6 +3564,32 @@ ], "additionalProperties": false }, + "tissueAtlas": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "object", + "properties": { + "atlas": { + "type": "string", + "nullable": true + }, + "version": { + "type": "string", + "nullable": true + } + }, + "additionalProperties": false + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "updateDate": { "type": "object", "properties": { @@ -3518,7 +3637,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, updateDate, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow\n" }, { "name": "entity_type", @@ -3560,6 +3679,7 @@ "aggregateUpdateDate", "assayType", "biologicalSex", + "bionetworkName", "bundleUuid", "bundleVersion", "cellCount", @@ -3582,6 +3702,7 @@ "institution", "instrumentManufacturerModel", "isIntermediate", + "isTissueAtlasProject", "laboratory", "lastModifiedDate", "libraryConstructionApproach", @@ -3611,6 +3732,7 @@ "specimenOrgan", "specimenOrganPart", "submissionDate", + "tissueAtlas", "updateDate", "workflow" ] @@ -3922,6 +4044,22 @@ ], "additionalProperties": false }, + "bionetworkName": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "string", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "bundleUuid": { "type": "object", "properties": { @@ -4375,6 +4513,22 @@ ], "additionalProperties": false }, + "isTissueAtlasProject": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "boolean", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "laboratory": { "type": "object", "properties": { @@ -4981,6 +5135,32 @@ ], "additionalProperties": false }, + "tissueAtlas": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "object", + "properties": { + "atlas": { + "type": "string", + "nullable": true + }, + "version": { + "type": "string", + "nullable": true + } + }, + "additionalProperties": false + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "updateDate": { "type": "object", "properties": { @@ -5028,7 +5208,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, updateDate, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow\n" }, { "name": "entity_type", @@ -5070,6 +5250,7 @@ "aggregateUpdateDate", "assayType", "biologicalSex", + "bionetworkName", "bundleUuid", "bundleVersion", "cellCount", @@ -5092,6 +5273,7 @@ "institution", "instrumentManufacturerModel", "isIntermediate", + "isTissueAtlasProject", "laboratory", "lastModifiedDate", "libraryConstructionApproach", @@ -5121,6 +5303,7 @@ "specimenOrgan", "specimenOrganPart", "submissionDate", + "tissueAtlas", "updateDate", "workflow" ] @@ -5437,6 +5620,22 @@ ], "additionalProperties": false }, + "bionetworkName": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "string", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "bundleUuid": { "type": "object", "properties": { @@ -5890,6 +6089,22 @@ ], "additionalProperties": false }, + "isTissueAtlasProject": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "boolean", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "laboratory": { "type": "object", "properties": { @@ -6496,6 +6711,32 @@ ], "additionalProperties": false }, + "tissueAtlas": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "object", + "properties": { + "atlas": { + "type": "string", + "nullable": true + }, + "version": { + "type": "string", + "nullable": true + } + }, + "additionalProperties": false + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "updateDate": { "type": "object", "properties": { @@ -6543,7 +6784,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, updateDate, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow\n" } ] }, @@ -6757,6 +6998,22 @@ ], "additionalProperties": false }, + "bionetworkName": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "string", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "bundleUuid": { "type": "object", "properties": { @@ -7210,6 +7467,22 @@ ], "additionalProperties": false }, + "isTissueAtlasProject": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "boolean", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "laboratory": { "type": "object", "properties": { @@ -7816,6 +8089,32 @@ ], "additionalProperties": false }, + "tissueAtlas": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "object", + "properties": { + "atlas": { + "type": "string", + "nullable": true + }, + "version": { + "type": "string", + "nullable": true + } + }, + "additionalProperties": false + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "updateDate": { "type": "object", "properties": { @@ -7863,7 +8162,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, updateDate, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow\n" } ] } @@ -8004,6 +8303,22 @@ ], "additionalProperties": false }, + "bionetworkName": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "string", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "bundleUuid": { "type": "object", "properties": { @@ -8457,6 +8772,22 @@ ], "additionalProperties": false }, + "isTissueAtlasProject": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "boolean", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "laboratory": { "type": "object", "properties": { @@ -9063,6 +9394,32 @@ ], "additionalProperties": false }, + "tissueAtlas": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "object", + "properties": { + "atlas": { + "type": "string", + "nullable": true + }, + "version": { + "type": "string", + "nullable": true + } + }, + "additionalProperties": false + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "updateDate": { "type": "object", "properties": { @@ -9110,7 +9467,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, updateDate, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow\n" }, { "name": "format", @@ -9355,6 +9712,22 @@ ], "additionalProperties": false }, + "bionetworkName": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "string", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "bundleUuid": { "type": "object", "properties": { @@ -9808,6 +10181,22 @@ ], "additionalProperties": false }, + "isTissueAtlasProject": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "boolean", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "laboratory": { "type": "object", "properties": { @@ -10414,6 +10803,32 @@ ], "additionalProperties": false }, + "tissueAtlas": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "object", + "properties": { + "atlas": { + "type": "string", + "nullable": true + }, + "version": { + "type": "string", + "nullable": true + } + }, + "additionalProperties": false + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "updateDate": { "type": "object", "properties": { @@ -10461,7 +10876,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, updateDate, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow\n" }, { "name": "format", @@ -10662,6 +11077,22 @@ ], "additionalProperties": false }, + "bionetworkName": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "string", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "bundleUuid": { "type": "object", "properties": { @@ -11115,6 +11546,22 @@ ], "additionalProperties": false }, + "isTissueAtlasProject": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "boolean", + "nullable": true + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "laboratory": { "type": "object", "properties": { @@ -11721,6 +12168,32 @@ ], "additionalProperties": false }, + "tissueAtlas": { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": { + "type": "object", + "properties": { + "atlas": { + "type": "string", + "nullable": true + }, + "version": { + "type": "string", + "nullable": true + } + }, + "additionalProperties": false + } + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, "updateDate": { "type": "object", "properties": { @@ -11768,7 +12241,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, updateDate, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow\n" }, { "name": "format", diff --git a/src/azul/plugins/metadata/hca/__init__.py b/src/azul/plugins/metadata/hca/__init__.py index 09697c786..88d068bc6 100644 --- a/src/azul/plugins/metadata/hca/__init__.py +++ b/src/azul/plugins/metadata/hca/__init__.py @@ -203,7 +203,10 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping: 'project_title': 'projectTitle', 'publication_titles': 'publicationTitle', 'accessions': 'accessions', - 'estimated_cell_count': 'projectEstimatedCellCount' + 'estimated_cell_count': 'projectEstimatedCellCount', + 'is_tissue_atlas_project': 'isTissueAtlasProject', + 'tissue_atlas': 'tissueAtlas', + 'bionetwork_name': 'bionetworkName' }, 'sequencing_protocols': { 'instrument_manufacturer_model': 'instrumentManufacturerModel', @@ -297,7 +300,10 @@ def facets(self) -> Sequence[str]: 'projectDescription', 'institution', 'contactName', - 'publicationTitle' + 'publicationTitle', + 'isTissueAtlasProject', + 'tissueAtlas', + 'bionetworkName' ] @property diff --git a/src/azul/plugins/metadata/hca/indexer/aggregate.py b/src/azul/plugins/metadata/hca/indexer/aggregate.py index ca1cdd0c6..354252431 100644 --- a/src/azul/plugins/metadata/hca/indexer/aggregate.py +++ b/src/azul/plugins/metadata/hca/indexer/aggregate.py @@ -179,7 +179,8 @@ def _accumulator(self, field) -> Optional[Accumulator]: 'contact_names', 'contributors', 'publications', - 'accessions'): + 'accessions', + 'tissue_atlas'): return None elif field == 'estimated_cell_count': return MaxAccumulator() diff --git a/src/azul/plugins/metadata/hca/indexer/transform.py b/src/azul/plugins/metadata/hca/indexer/transform.py index 32e6aa72c..4b58daa5c 100644 --- a/src/azul/plugins/metadata/hca/indexer/transform.py +++ b/src/azul/plugins/metadata/hca/indexer/transform.py @@ -293,6 +293,8 @@ def api_schema(self) -> JSON: accession: Nested = Nested(namespace=null_str, accession=null_str) +tissue_atlas: Nested = Nested(atlas=null_str, version=null_str) + age_range = ClosedRange(pass_thru_float) @@ -658,6 +660,12 @@ def _accession(self, p: api.Accession): 'accession': p.accession } + def _tissue_atlas(self, b: api.Bionetwork): + return { + 'atlas': b.hca_tissue_atlas, + 'version': b.hca_tissue_atlas_version + } + @classmethod def _project_types(cls) -> FieldTypes: return { @@ -674,6 +682,9 @@ def _project_types(cls) -> FieldTypes: 'supplementary_links': [null_str], '_type': null_str, 'accessions': [accession], + 'is_tissue_atlas_project': null_bool, + 'tissue_atlas': [tissue_atlas], + 'bionetwork_name': [null_str], 'estimated_cell_count': null_int } @@ -717,6 +728,10 @@ def _project(self, project: api.Project) -> MutableJSON: 'supplementary_links': sorted(project.supplementary_links), '_type': 'project', 'accessions': list(map(self._accession, project.accessions)), + 'is_tissue_atlas_project': any(bionetwork.atlas_project + for bionetwork in project.bionetworks), + 'tissue_atlas': list(map(self._tissue_atlas, project.bionetworks)), + 'bionetwork_name': sorted(bionetwork.name for bionetwork in project.bionetworks), 'estimated_cell_count': project.estimated_cell_count } diff --git a/src/azul/plugins/metadata/hca/service/response.py b/src/azul/plugins/metadata/hca/service/response.py index 0d4332840..43d16a99c 100644 --- a/src/azul/plugins/metadata/hca/service/response.py +++ b/src/azul/plugins/metadata/hca/service/response.py @@ -365,6 +365,9 @@ def make_projects(self, entry) -> MutableJSONs: 'projectShortname': project['project_short_name'], 'laboratory': sorted(set(project.get('laboratory', [None]))), 'estimatedCellCount': project['estimated_cell_count'], + 'isTissueAtlasProject': project['is_tissue_atlas_project'], + 'tissueAtlas': project.get('tissue_atlas'), + 'bionetworkName': project['bionetwork_name'] } if self.entity_type == 'projects': translated_project['projectDescription'] = project.get('project_description', []) diff --git a/src/humancellatlas/data/metadata/api.py b/src/humancellatlas/data/metadata/api.py index 30314ca6c..39c4e7a77 100644 --- a/src/humancellatlas/data/metadata/api.py +++ b/src/humancellatlas/data/metadata/api.py @@ -306,6 +306,15 @@ class Accession: accession: str +@dataclass(eq=True, frozen=True) +class Bionetwork: + name: str + atlas_project: Optional[bool] = None + hca_tissue_atlas: Optional[str] = None + hca_tissue_atlas_version: Optional[str] = None + schema_version: Optional[str] = None + + @dataclass(init=False) class Project(Entity): project_short_name: str @@ -316,6 +325,7 @@ class Project(Entity): accessions: OrderedSet[Accession] supplementary_links: OrderedSet[str] estimated_cell_count: Optional[int] + bionetworks: OrderedSet[Bionetwork] def __init__(self, json: JSON, @@ -341,6 +351,9 @@ def __init__(self, assert isinstance(value, list) accessions.update(Accession(namespace=prefix, accession=v) for v in value) self.accessions = accessions + self.bionetworks = OrderedSet(Bionetwork(**bionetwork) + for bionetwork in content.get('hca_bionetworks', ()) + if bionetwork) def _accessions(self, namespace: str) -> Set[str]: return {a.accession for a in self.accessions if a.namespace == namespace} diff --git a/test/indexer/data/2c7d06b8-658e-4c51-9de4-a768322f84c5.dss.hca.json b/test/indexer/data/2c7d06b8-658e-4c51-9de4-a768322f84c5.dss.hca.json index ebc5c9fef..3eb746590 100644 --- a/test/indexer/data/2c7d06b8-658e-4c51-9de4-a768322f84c5.dss.hca.json +++ b/test/indexer/data/2c7d06b8-658e-4c51-9de4-a768322f84c5.dss.hca.json @@ -685,6 +685,33 @@ "grant_id": "ZIC DE000729-09", "organization": "NIDCR Combined Technical Research Core" } + ], + "hca_bionetworks": [ + { + "name": "Kidney" + }, + { + "name": "Skin", + "atlas_project": true + }, + { + "name": "Lung", + "atlas_project": false, + "hca_tissue_atlas": "Lung" + }, + { + "name": "Eye", + "atlas_project": false, + "hca_tissue_atlas": "Retina", + "hca_tissue_atlas_version": "v1.0" + }, + { + "name": "Immune", + "atlas_project": false, + "hca_tissue_atlas": "Blood", + "hca_tissue_atlas_version": "v1.0", + "schema_version": "1.0.1" + } ], "provenance": { "document_id": "50151324-f3ed-4358-98af-ec352a940a61", diff --git a/test/indexer/data/587d74b4-1075-4bbf-b96a-4d1ede0481b2.dss.hca.json b/test/indexer/data/587d74b4-1075-4bbf-b96a-4d1ede0481b2.dss.hca.json index 7ad0b6672..abf7dd5f9 100644 --- a/test/indexer/data/587d74b4-1075-4bbf-b96a-4d1ede0481b2.dss.hca.json +++ b/test/indexer/data/587d74b4-1075-4bbf-b96a-4d1ede0481b2.dss.hca.json @@ -1444,6 +1444,15 @@ "institution": "Fake Institution" } ], + "hca_bionetworks": [ + { + "name": "Immune", + "atlas_project": false, + "hca_tissue_atlas": "Blood", + "hca_tissue_atlas_version": "v1.0", + "schema_version": "1.0.1" + } + ], "provenance": { "document_id": "6615efae-fca8-4dd2-a223-9cfcf30fe94d", "submission_date": "2018-10-10T02:23:39.569Z", diff --git a/test/indexer/data/aaa96233-bf27-44c7-82df-b4dc15ad4d9d.2018-11-02T11:33:44.698028Z.results.json b/test/indexer/data/aaa96233-bf27-44c7-82df-b4dc15ad4d9d.2018-11-02T11:33:44.698028Z.results.json index 8d99b4c9b..e3b701964 100644 --- a/test/indexer/data/aaa96233-bf27-44c7-82df-b4dc15ad4d9d.2018-11-02T11:33:44.698028Z.results.json +++ b/test/indexer/data/aaa96233-bf27-44c7-82df-b4dc15ad4d9d.2018-11-02T11:33:44.698028Z.results.json @@ -243,6 +243,9 @@ ], "_type": "project", "accessions": [], + "bionetwork_name": ["~null"], + "is_tissue_atlas_project": 0, + "tissue_atlas": [], "estimated_cell_count": 9223372036854774784, "estimated_cell_count_": null } @@ -488,6 +491,9 @@ ], "_type": "project", "accessions": [], + "bionetwork_name": ["~null"], + "is_tissue_atlas_project": 0, + "tissue_atlas": [], "estimated_cell_count": 9223372036854774784, "estimated_cell_count_": null } @@ -728,6 +734,9 @@ ], "_type": "project", "accessions": [], + "bionetwork_name": ["~null"], + "is_tissue_atlas_project": 0, + "tissue_atlas": [], "estimated_cell_count": 9223372036854774784, "estimated_cell_count_": null } @@ -993,6 +1002,9 @@ ], "_type": "project", "accessions": [], + "bionetwork_name": ["~null"], + "is_tissue_atlas_project": 0, + "tissue_atlas": [], "estimated_cell_count": 9223372036854774784, "estimated_cell_count_": null } @@ -1258,6 +1270,9 @@ ], "_type": "project", "accessions": [], + "bionetwork_name": ["~null"], + "is_tissue_atlas_project": 0, + "tissue_atlas": [], "estimated_cell_count": 9223372036854774784, "estimated_cell_count_": null } @@ -1573,6 +1588,8 @@ "_type": [ "project" ], + "bionetwork_name": ["~null"], + "is_tissue_atlas_project": [0], "estimated_cell_count": 9223372036854774784, "estimated_cell_count_": null } @@ -1873,6 +1890,8 @@ "_type": [ "project" ], + "bionetwork_name": ["~null"], + "is_tissue_atlas_project": [0], "estimated_cell_count": 9223372036854774784, "estimated_cell_count_": null } @@ -2171,6 +2190,8 @@ "_type": [ "project" ], + "bionetwork_name": ["~null"], + "is_tissue_atlas_project": [0], "estimated_cell_count": 9223372036854774784, "estimated_cell_count_": null } @@ -2429,6 +2450,8 @@ "_type": [ "project" ], + "bionetwork_name": ["~null"], + "is_tissue_atlas_project": [0], "estimated_cell_count": 9223372036854774784, "estimated_cell_count_": null } @@ -2751,6 +2774,9 @@ ], "_type": "project", "accessions": [], + "bionetwork_name": ["~null"], + "is_tissue_atlas_project": 0, + "tissue_atlas": [], "estimated_cell_count": 9223372036854774784, "estimated_cell_count_": null } @@ -3025,6 +3051,9 @@ ], "_type": "project", "accessions": [], + "bionetwork_name": ["~null"], + "is_tissue_atlas_project": 0, + "tissue_atlas": [], "estimated_cell_count": 9223372036854774784, "estimated_cell_count_": null } @@ -3302,6 +3331,8 @@ "_type": [ "project" ], + "bionetwork_name": ["~null"], + "is_tissue_atlas_project": [0], "estimated_cell_count": 9223372036854774784, "estimated_cell_count_": null } diff --git a/test/indexer/data/ffac201f-4b1c-4455-bd58-19c1a9e863b4.dss.hca.json b/test/indexer/data/ffac201f-4b1c-4455-bd58-19c1a9e863b4.dss.hca.json index 53f58c963..cc54819b5 100644 --- a/test/indexer/data/ffac201f-4b1c-4455-bd58-19c1a9e863b4.dss.hca.json +++ b/test/indexer/data/ffac201f-4b1c-4455-bd58-19c1a9e863b4.dss.hca.json @@ -776,7 +776,8 @@ "update_date": "2019-10-09T15:32:48.934Z", "schema_major_version": 14, "schema_minor_version": 1 - } + }, + "hca_bionetworks": [] }, "library_preparation_protocol_0.json": { "describedBy": "https://schema.humancellatlas.org/type/protocol/sequencing/6.2.0/library_preparation_protocol", diff --git a/test/service/data/pfb_manifest.results.json b/test/service/data/pfb_manifest.results.json index eaff01d82..4a36bb7a5 100644 --- a/test/service/data/pfb_manifest.results.json +++ b/test/service/data/pfb_manifest.results.json @@ -528,6 +528,9 @@ null ], "publications": [], + "is_tissue_atlas_project": [false], + "bionetwork_name": ["Immune"], + "tissue_atlas": [], "supplementary_links": [ null ] @@ -864,6 +867,9 @@ "Single-Cell Analysis of Human Pancreas Reveals Transcriptional Signatures of Aging and Somatic Mutation Patterns." ], "publications": [], + "is_tissue_atlas_project": [false], + "bionetwork_name": [null], + "tissue_atlas": [], "supplementary_links": [ "https://www.ebi.ac.uk/gxa/sc/experiments/E-GEOD-81547/Results" ] @@ -3273,6 +3279,9 @@ "The cellular immune response to COVID-19 deciphered by single cell multi-omics across three UK centres" ], "publications": [], + "is_tissue_atlas_project": [false], + "bionetwork_name": [null], + "tissue_atlas": [], "supplementary_links": [ null ] diff --git a/test/service/data/pfb_manifest.schema.json b/test/service/data/pfb_manifest.schema.json index 054fe4269..520a94596 100644 --- a/test/service/data/pfb_manifest.schema.json +++ b/test/service/data/pfb_manifest.schema.json @@ -1936,6 +1936,51 @@ "type": "array" } }, + { + "name": "is_tissue_atlas_project", + "namespace": "projects", + "type": { + "items": ["null", "boolean"], + "type": "array" + } + }, + { + "name": "tissue_atlas", + "namespace": "projects", + "type": { + "items": { + "fields": [ + { + "name": "atlas", + "namespace": "projects.tissue_atlas", + "type": { + "items": ["null", "string"], + "type": "array" + } + }, + { + "name": "version", + "namespace": "projects.tissue_atlas", + "type": { + "items": ["null", "string"], + "type": "array" + } + } + ], + "name": "projects.tissue_atlas", + "type": "record" + }, + "type": "array" + } + }, + { + "name": "bionetwork_name", + "namespace": "projects", + "type": { + "items": ["null", "string"], + "type": "array" + } + }, { "name": "estimated_cell_count", "namespace": "projects", diff --git a/test/service/test_index_projects.py b/test/service/test_index_projects.py index c489554c2..1f40b5eb2 100644 --- a/test/service/test_index_projects.py +++ b/test/service/test_index_projects.py @@ -81,6 +81,9 @@ def assert_file_type_summaries(hit): 'matrices', 'contributedAnalyses', 'accessions', + 'tissueAtlas', + 'isTissueAtlasProject', + 'bionetworkName', 'estimatedCellCount' } response_json = get_response_json() diff --git a/test/service/test_response.py b/test/service/test_response.py index a628cd7d8..68d44679f 100644 --- a/test/service/test_response.py +++ b/test/service/test_response.py @@ -253,6 +253,9 @@ def test_response_factory_files(self): "projectId": ["e8642221-4c2c-4fd7-b926-a68bce363c88"], "projectShortname": ["Single of human pancreas"], "projectTitle": ["Single cell transcriptome patterns."], + "bionetworkName": [None], + "isTissueAtlasProject": [False], + "tissueAtlas": None, "estimatedCellCount": None, } ], @@ -590,7 +593,10 @@ def test_response_factory_projects(self): "estimatedCellCount": None, "matrices": {}, "contributedAnalyses": {}, - "accessions": [], + "bionetworkName": [None], + "tissueAtlas": [], + "isTissueAtlasProject": False, + "accessions": [] } ], "protocols": [ @@ -846,6 +852,9 @@ def test_response_factory_projects_accessions(self): "estimatedCellCount": None, "matrices": {}, "contributedAnalyses": {}, + "isTissueAtlasProject": False, + "bionetworkName": [None], + "tissueAtlas": [], "accessions": [ {"namespace": "array_express", "accession": "E-AAAA-00"}, {"namespace": "geo_series", "accession": "GSE00000"}, @@ -3432,7 +3441,8 @@ def test_summary_filter_none(self): def test_projects_response(self): """ - Verify a project's contributors, laboratory, and publications. + Verify a project's contributors, laboratory, bionetworks, + and publications. """ params = { 'catalog': self.catalog, @@ -3489,6 +3499,17 @@ def test_projects_response(self): } ] self.assertEqual(expected_publications, project['publications']) + expected_tissue_atlas = [ + {'atlas': None, 'version': None}, + {'atlas': None, 'version': None}, + {'atlas': 'Lung', 'version': None}, + {'atlas': 'Retina', 'version': 'v1.0'}, + {'atlas': 'Blood', 'version': 'v1.0'}, + ] + expected_bionetwork_name = ['Eye', 'Immune', 'Kidney', 'Lung', 'Skin'] + self.assertEqual(expected_tissue_atlas, project['tissueAtlas']) + self.assertEqual(expected_bionetwork_name, project['bionetworkName']) + self.assertTrue(project['isTissueAtlasProject']) class TestUnpopulatedIndexResponse(IndexResponseTestCase): @@ -3564,7 +3585,7 @@ def test_sorted_responses(self): sortable_fields = { field for field in self.field_mapping - if field not in {'assayType', 'organismAgeRange', 'accessions'} + if field not in {'assayType', 'organismAgeRange', 'accessions', 'tissueAtlas'} } for entity_type, field in product(self.entity_types(), sortable_fields):