From 4d7a991a75989fc8c8ed6f5705aefb5356091c93 Mon Sep 17 00:00:00 2001 From: Noa Aviel Dove Date: Tue, 1 Aug 2023 16:18:50 -0700 Subject: [PATCH 1/3] Normalize doublequotes in avro_pfb.py --- src/azul/service/avro_pfb.py | 282 +++++++++++++++++------------------ 1 file changed, 141 insertions(+), 141 deletions(-) diff --git a/src/azul/service/avro_pfb.py b/src/azul/service/avro_pfb.py index 84e8942076..07911c461d 100644 --- a/src/azul/service/avro_pfb.py +++ b/src/azul/service/avro_pfb.py @@ -283,23 +283,23 @@ def pfb_metadata_entity(field_types: FieldTypes): Unfortunately Terra does not display the relations between the nodes. """ return { - "id": None, - "name": "Metadata", - "object": { - "nodes": [ + 'id': None, + 'name': 'Metadata', + 'object': { + 'nodes': [ { - "name": field_type, - "ontology_reference": "", - "values": {}, - "links": [] if field_type == 'files' else [{ - "multiplicity": "MANY_TO_MANY", - "dst": "files", - "name": "files" + 'name': field_type, + 'ontology_reference': '', + 'values': {}, + 'links': [] if field_type == 'files' else [{ + 'multiplicity': 'MANY_TO_MANY', + 'dst': 'files', + 'name': 'files' }], - "properties": [] + 'properties': [] } for field_type in field_types ], - "misc": {} + 'misc': {} } } @@ -308,10 +308,10 @@ def pfb_schema_from_field_types(field_types: FieldTypes) -> JSON: field_types = _inject_reference_handover_columns(field_types) entity_schemas = ( { - "name": entity_type, - "namespace": "", - "type": "record", - "fields": list(_entity_schema_recursive(field_type, entity_type)) + 'name': entity_type, + 'namespace': '', + 'type': 'record', + 'fields': list(_entity_schema_recursive(field_type, entity_type)) } for entity_type, field_type in field_types.items() # We skip primitive top-level fields like total_estimated_cells @@ -333,102 +333,102 @@ def _avro_pfb_schema(azul_avro_schema: Iterable[JSON]) -> JSON: :return: The complete and valid Avro schema """ return { - "type": "record", - "name": "Entity", - "fields": [ + 'type': 'record', + 'name': 'Entity', + 'fields': [ { - "name": "id", - "type": ["null", "string"], - "default": None + 'name': 'id', + 'type': ['null', 'string'], + 'default': None }, { - "name": "name", - "type": "string" + 'name': 'name', + 'type': 'string' }, { - "name": "object", - "type": [ + 'name': 'object', + 'type': [ { - "type": "record", - "name": "Metadata", - "fields": [ + 'type': 'record', + 'name': 'Metadata', + 'fields': [ { - "name": "nodes", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "Node", - "fields": [ + 'name': 'nodes', + 'type': { + 'type': 'array', + 'items': { + 'type': 'record', + 'name': 'Node', + 'fields': [ { - "name": "name", - "type": "string" + 'name': 'name', + 'type': 'string' }, { - "name": "ontology_reference", - "type": "string", + 'name': 'ontology_reference', + 'type': 'string', }, { - "name": "values", - "type": { - "type": "map", - "values": "string", + 'name': 'values', + 'type': { + 'type': 'map', + 'values': 'string', }, }, { - "name": "links", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "Link", - "fields": [ + 'name': 'links', + 'type': { + 'type': 'array', + 'items': { + 'type': 'record', + 'name': 'Link', + 'fields': [ { - "name": "multiplicity", - "type": { - "type": "enum", - "name": "Multiplicity", - "symbols": [ - "ONE_TO_ONE", - "ONE_TO_MANY", - "MANY_TO_ONE", - "MANY_TO_MANY", + 'name': 'multiplicity', + 'type': { + 'type': 'enum', + 'name': 'Multiplicity', + 'symbols': [ + 'ONE_TO_ONE', + 'ONE_TO_MANY', + 'MANY_TO_ONE', + 'MANY_TO_MANY', ], }, }, { - "name": "dst", - "type": "string", + 'name': 'dst', + 'type': 'string', }, { - "name": "name", - "type": "string", + 'name': 'name', + 'type': 'string', }, ], }, }, }, { - "name": "properties", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "Property", - "fields": [ + 'name': 'properties', + 'type': { + 'type': 'array', + 'items': { + 'type': 'record', + 'name': 'Property', + 'fields': [ { - "name": "name", - "type": "string", + 'name': 'name', + 'type': 'string', }, { - "name": "ontology_reference", - "type": "string", + 'name': 'ontology_reference', + 'type': 'string', }, { - "name": "values", - "type": { - "type": "map", - "values": "string", + 'name': 'values', + 'type': { + 'type': 'map', + 'values': 'string', }, }, ], @@ -440,10 +440,10 @@ def _avro_pfb_schema(azul_avro_schema: Iterable[JSON]) -> JSON: }, }, { - "name": "misc", - "type": { - "type": "map", - "values": "string" + 'name': 'misc', + 'type': { + 'type': 'map', + 'values': 'string' }, }, ], @@ -452,25 +452,25 @@ def _avro_pfb_schema(azul_avro_schema: Iterable[JSON]) -> JSON: ] }, { - "name": "relations", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "Relation", - "fields": [ + 'name': 'relations', + 'type': { + 'type': 'array', + 'items': { + 'type': 'record', + 'name': 'Relation', + 'fields': [ { - "name": "dst_id", - "type": "string" + 'name': 'dst_id', + 'type': 'string' }, { - "name": "dst_name", - "type": "string" + 'name': 'dst_name', + 'type': 'string' }, ], }, }, - "default": [], + 'default': [], }, ], } @@ -531,16 +531,16 @@ def _entity_schema_recursive(field_types: FieldTypes, if isinstance(field_type, dict): yield { - "name": field_name, - "namespace": namespace, - "type": { + 'name': field_name, + 'namespace': namespace, + 'type': { # This is always an array, even if singleton is passed in - "type": "array", - "items": { - "name": field_name, - "namespace": namespace, - "type": "record", - "fields": list(_entity_schema_recursive(field_type, *path, field_name)) + 'type': 'array', + 'items': { + 'name': field_name, + 'namespace': namespace, + 'type': 'record', + 'fields': list(_entity_schema_recursive(field_type, *path, field_name)) } } } @@ -557,37 +557,37 @@ def _entity_schema_recursive(field_types: FieldTypes, # https://github.com/DataBiosphere/azul/issues/4094 if path[0] == 'files' and not plural or field_name in exceptions: yield { - "name": field_name, - "namespace": namespace, - "type": _nullable_to_pfb_types[field_type], + 'name': field_name, + 'namespace': namespace, + 'type': _nullable_to_pfb_types[field_type], } else: yield { - "name": field_name, - "namespace": namespace, - "type": { - "type": "array", - "items": _nullable_to_pfb_types[field_type], + 'name': field_name, + 'namespace': namespace, + 'type': { + 'type': 'array', + 'items': _nullable_to_pfb_types[field_type], } } elif field_type is pass_thru_uuid4: yield { - "name": field_name, - "namespace": namespace, - "type": ["string"], - "logicalType": "UUID" + 'name': field_name, + 'namespace': namespace, + 'type': ['string'], + 'logicalType': 'UUID' } elif isinstance(field_type, ClosedRange): yield { - "name": field_name, - "namespace": namespace, - "type": { - "type": "array", - "items": { - "type": "array", - "items": { - int: "long", - float: "double" + 'name': field_name, + 'namespace': namespace, + 'type': { + 'type': 'array', + 'items': { + 'type': 'array', + 'items': { + int: 'long', + float: 'double' }[field_type.ends_type.native_type] } } @@ -597,29 +597,29 @@ def _entity_schema_recursive(field_types: FieldTypes, # https://github.com/DataBiosphere/azul/issues/4094 elif field_type is value_and_unit: yield { - "name": field_name, - "namespace": namespace, - "type": { - "name": field_name, - "namespace": namespace, - "type": "array", - "items": [ + 'name': field_name, + 'namespace': namespace, + 'type': { + 'name': field_name, + 'namespace': namespace, + 'type': 'array', + 'items': [ # FIXME: Change 'string' to 'null' # https://github.com/DataBiosphere/azul/issues/2462 - "string", + 'string', { # FIXME: Why do we need to repeat `name` and `namespace` # with the same values at three different depths? # https://github.com/DataBiosphere/azul/issues/4094 - "name": field_name, - "namespace": namespace, - "type": "record", - "fields": [ + 'name': field_name, + 'namespace': namespace, + 'type': 'record', + 'fields': [ { - "name": name, - "namespace": namespace + '.' + field_name, + 'name': name, + 'namespace': namespace + '.' + field_name, # Although, not technically a null_str, it's effectively the same - "type": _nullable_to_pfb_types[null_str] + 'type': _nullable_to_pfb_types[null_str] } for name in ('value', 'unit') ] From e6626e11dea9c493ff8185efcf2d9a5f66a960c4 Mon Sep 17 00:00:00 2001 From: Noa Aviel Dove Date: Tue, 1 Aug 2023 16:23:53 -0700 Subject: [PATCH 2/3] Fix formatting and return type annotations --- src/azul/service/avro_pfb.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/azul/service/avro_pfb.py b/src/azul/service/avro_pfb.py index 07911c461d..f08aea2c23 100644 --- a/src/azul/service/avro_pfb.py +++ b/src/azul/service/avro_pfb.py @@ -151,7 +151,7 @@ def entities(self) -> Iterable[JSON]: yield entity.to_json(sorted(relations, key=attrgetter('dst_name', 'dst_id'))) -def _reversible_join(joiner: str, parts: Iterable[str]): +def _reversible_join(joiner: str, parts: Iterable[str]) -> str: parts = list(parts) reject(any(joiner in part for part in parts), parts) return joiner.join(parts) @@ -175,7 +175,8 @@ def __attrs_post_init__(self): def from_json(cls, name: str, object_: MutableJSON, - schema: JSON) -> 'PFBEntity': + schema: JSON + ) -> 'PFBEntity': """ Derive ID from object in a reproducible way so that we can distinguish entities by comparing their IDs. @@ -254,7 +255,7 @@ def _replace_null_with_empty_string(cls, object_json: AnyJSON) -> AnyMutableJSON else: return object_json - def to_json(self, relations: Iterable['PFBRelation']): + def to_json(self, relations: Iterable['PFBRelation']) -> JSON: return { 'id': self.id, 'name': self.name, @@ -272,7 +273,7 @@ class PFBRelation: dst_name: str @classmethod - def to_entity(cls, entity: PFBEntity): + def to_entity(cls, entity: PFBEntity) -> 'PFBRelation': return cls(dst_id=entity.id, dst_name=entity.name) @@ -510,7 +511,8 @@ def _inject_reference_handover_values(entity: MutableJSON, doc: JSON): def _entity_schema_recursive(field_types: FieldTypes, - *path: str) -> Iterable[JSON]: + *path: str + ) -> Iterable[JSON]: for field_name, field_type in field_types.items(): namespace = '.'.join(path) plural = isinstance(field_type, list) From 0f4b45f32db271f2b0e83a00e988449257bb3b28 Mon Sep 17 00:00:00 2001 From: Noa Aviel Dove Date: Tue, 1 Aug 2023 16:15:03 -0700 Subject: [PATCH 3/3] Allow null values in PFBs (#2462) --- src/azul/service/avro_pfb.py | 47 +-- test/service/data/pfb_manifest.results.json | 438 ++++++++++---------- test/service/data/pfb_manifest.schema.json | 183 +++++++- test/service/test_pfb.py | 1 + 4 files changed, 389 insertions(+), 280 deletions(-) diff --git a/src/azul/service/avro_pfb.py b/src/azul/service/avro_pfb.py index f08aea2c23..e26c93c589 100644 --- a/src/azul/service/avro_pfb.py +++ b/src/azul/service/avro_pfb.py @@ -57,8 +57,6 @@ value_and_unit, ) from azul.types import ( - AnyJSON, - AnyMutableJSON, JSON, MutableJSON, ) @@ -182,7 +180,6 @@ def from_json(cls, entities by comparing their IDs. """ cls._add_missing_fields(name, object_, schema) - object_ = cls._replace_null_with_empty_string(object_) ids = object_['document_id'] # document_id is an array unless the inner entity type matches the # outer entity type @@ -195,9 +192,7 @@ def from_json(cls, def _add_missing_fields(cls, name: str, object_: MutableJSON, schema): """ Compare entities against the schema and add any fields that are missing. - - None is the default value, but because of https://github.com/DataBiosphere/azul/issues/2370 - this isn't currently reflected in the schema. + None is the default value. """ if schema['type'] == 'record': object_schema = one(f for f in schema['fields'] if f['name'] == 'object') @@ -210,18 +205,14 @@ def _add_missing_fields(cls, name: str, object_: MutableJSON, schema): field_name, field_type = field['name'], field['type'] if field_name not in object_: if isinstance(field_type, list): - # FIXME: Change 'string' to 'null' - # https://github.com/DataBiosphere/azul/issues/2462 - assert 'string' in field_type or 'null' in field_type, field + assert 'null' in field_type, field default_value = None elif field_type['type'] == 'array': if isinstance(field_type['items'], dict): assert field_type['items']['type'] in ('record', 'array'), field default_value = [] else: - # FIXME: Change 'string' to 'null' - # https://github.com/DataBiosphere/azul/issues/2462 - assert 'string' in field_type['items'], field + assert 'null' in field_type['items'], field default_value = [None] else: assert False, field @@ -237,24 +228,6 @@ def _add_missing_fields(cls, name: str, object_: MutableJSON, schema): object_=sub_object, schema=field) - @classmethod - def _replace_null_with_empty_string(cls, object_json: AnyJSON) -> AnyMutableJSON: - # FIXME: remove with https://github.com/DataBiosphere/azul/issues/2462 - if object_json is None: - return '' - elif isinstance(object_json, dict): - return { - k: cls._replace_null_with_empty_string(v) - for k, v in object_json.items() - } - elif isinstance(object_json, list): - return [ - cls._replace_null_with_empty_string(item) - for item in object_json - ] - else: - return object_json - def to_json(self, relations: Iterable['PFBRelation']) -> JSON: return { 'id': self.id, @@ -502,11 +475,11 @@ def _inject_reference_handover_values(entity: MutableJSON, doc: JSON): # https://github.com/DataBiosphere/azul/issues/4094 _nullable_to_pfb_types = { - null_bool: ['string', 'boolean'], - null_float: ['string', 'double'], - null_int: ['string', 'long'], - null_str: ['string'], - null_datetime: ['string'], + null_bool: ['null', 'boolean'], + null_float: ['null', 'double'], + null_int: ['null', 'long'], + null_str: ['null', 'string'], + null_datetime: ['null', 'string'], } @@ -606,9 +579,7 @@ def _entity_schema_recursive(field_types: FieldTypes, 'namespace': namespace, 'type': 'array', 'items': [ - # FIXME: Change 'string' to 'null' - # https://github.com/DataBiosphere/azul/issues/2462 - 'string', + 'null', { # FIXME: Why do we need to repeat `name` and `namespace` # with the same values at three different depths? diff --git a/test/service/data/pfb_manifest.results.json b/test/service/data/pfb_manifest.results.json index 5bec14f440..eaff01d826 100644 --- a/test/service/data/pfb_manifest.results.json +++ b/test/service/data/pfb_manifest.results.json @@ -296,7 +296,7 @@ "b5894cf5-ecdc-4ea6-a0b9-5335ab678c7a" ], "has_input_biomaterial": [ - "" + null ], "organ": [ "brain" @@ -305,10 +305,10 @@ "temporal lobe" ], "preservation_method": [ - "" + null ], "storage_method": [ - "" + null ] }, "relations": [] @@ -330,10 +330,10 @@ "specimens" ], "model_organ": [ - "" + null ], "model_organ_part": [ - "" + null ], "organ": [ "brain" @@ -380,7 +380,7 @@ "b5894cf5-ecdc-4ea6-a0b9-5335ab678c7a" ], "has_input_biomaterial": [ - "" + null ], "organ": [ "brain" @@ -389,10 +389,10 @@ "temporal lobe" ], "preservation_method": [ - "" + null ], "storage_method": [ - "" + null ] }, "relations": [] @@ -414,10 +414,10 @@ "temporal lobe" ], "selected_cell_type": [ - "" + null ], "total_estimated_cells_redundant": 0, - "total_estimated_cells": "" + "total_estimated_cells": null }, "relations": [] }, @@ -435,7 +435,7 @@ "adult" ], "diseases": [ - "" + null ], "document_id": [ "242e38d2-c975-47ee-800a-6645b47e92d2" @@ -445,7 +445,7 @@ "Homo sapiens" ], "organism_age": [ - "" + null ], "organism_age_range": [] }, @@ -502,21 +502,21 @@ ], "accessions": [], "contact_names": [ - "" + null ], "contributors": [], "document_id": [ "6615efae-fca8-4dd2-a223-9cfcf30fe94d" ], - "estimated_cell_count": "", + "estimated_cell_count": null, "institutions": [ "Fake Institution" ], "laboratory": [ - "" + null ], "project_description": [ - "" + null ], "project_short_name": [ "integration/Smart-seq2/2018-10-10T02:23:36Z" @@ -525,11 +525,11 @@ "Q4_DEMO-Single cell RNA-seq of primary human glioblastomas" ], "publication_titles": [ - "" + null ], "publications": [], "supplementary_links": [ - "" + null ] }, "relations": [] @@ -541,7 +541,7 @@ "_type": "file", "content-type": "text/plain; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "e8b38826", "document_id": "0b34a80b-c818-4010-b193-e44a77036e27", @@ -550,11 +550,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_qc.quality_distribution_metrics.txt", - "read_index": "", + "read_index": null, "sha256": "fb2c9cb1dd646e7941086b570df03fb5d6f8c539b0789a1ae1d18fa1129d0838", "size": 1490, "uuid": "2526ee96-c839-4d0e-869c-585fc9066cda", @@ -623,7 +623,7 @@ "a21dc760-a500-4236-bcff-da34a0e873d2" ], "has_input_biomaterial": [ - "" + null ], "organ": [ "pancreas" @@ -632,10 +632,10 @@ "islet of Langerhans" ], "preservation_method": [ - "" + null ], "storage_method": [ - "" + null ] }, "relations": [] @@ -657,10 +657,10 @@ "specimens" ], "model_organ": [ - "" + null ], "model_organ_part": [ - "" + null ], "organ": [ "pancreas" @@ -707,7 +707,7 @@ "a21dc760-a500-4236-bcff-da34a0e873d2" ], "has_input_biomaterial": [ - "" + null ], "organ": [ "pancreas" @@ -716,10 +716,10 @@ "islet of Langerhans" ], "preservation_method": [ - "" + null ], "storage_method": [ - "" + null ] }, "relations": [] @@ -741,7 +741,7 @@ "islet of Langerhans" ], "selected_cell_type": [ - "" + null ], "total_estimated_cells_redundant": 0, "total_estimated_cells": 1 @@ -759,7 +759,7 @@ "DID_scRSq06" ], "development_stage": [ - "" + null ], "diseases": [ "normal" @@ -837,13 +837,13 @@ ], "accessions": [], "contact_names": [ - "" + null ], "contributors": [], "document_id": [ "e8642221-4c2c-4fd7-b926-a68bce363c88" ], - "estimated_cell_count": "", + "estimated_cell_count": null, "institutions": [ "Farmers Trucks", "University" @@ -852,7 +852,7 @@ "John Dear" ], "project_description": [ - "" + null ], "project_short_name": [ "Single of human pancreas" @@ -877,18 +877,18 @@ "_type": "file", "content-type": "application/gzip; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "1d998e49", "document_id": "0c5ac7c0-817e-40d4-b1b1-34c3d5cfecdb", "drs_uri": "drs://drs-test.lan/7b07f99e-4a8a-4ad0-bd4f-db0d7a00c7bb?version=2018-11-02T11%3A33%3A44.698028Z", "file_format": "fastq.gz", - "file_source": "", + "file_source": null, "file_type": "sequence_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "SRR3562915_1.fastq.gz", "read_index": "read1", "sha256": "77337cb51b2e584b5ae1b99db6c163b988cbc5b894dda2f5d22424978c3bfc7a", @@ -946,7 +946,7 @@ "_type": "file", "content-type": "application/gzip; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "3d94b063", "document_id": "14d63962-7cd3-43fc-a4d6-dc8f761c9ebd", @@ -955,11 +955,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_rsem.bam", - "read_index": "", + "read_index": null, "sha256": "f25053412d65429cefc0157c0d18ae12d4bf4c4113a6af7a1820b62246c075a4", "size": 3752733, "uuid": "b1c167da-0825-4c63-9cbc-2aada1ab367c", @@ -1015,7 +1015,7 @@ "_type": "file", "content-type": "text/csv; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "8c75cbf7", "document_id": "18791e67-0c74-43e0-be28-f0c3cd78c7a4", @@ -1024,11 +1024,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_bait_bias_detail_metrics.csv", - "read_index": "", + "read_index": null, "sha256": "e6e979ccfbdbadff06e07596b8da15223aefd278552877a318a3b6ee9c6b9e41", "size": 29544, "uuid": "35a4d374-9eb3-404b-8ef9-fa295062f969", @@ -1084,7 +1084,7 @@ "_type": "file", "content-type": "text/csv; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "062199a3", "document_id": "1d77c6c5-24b6-4d43-ad5d-b02fba050025", @@ -1093,11 +1093,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_QCs.csv", - "read_index": "", + "read_index": null, "sha256": "796caeb3c67f0bbcca14aa76b556e9cbc1eb76c031ef1696799363325fdb4e92", "size": 7928, "uuid": "e399a1c6-f595-400b-b71b-ea95707fbf74", @@ -1153,7 +1153,7 @@ "_type": "file", "content-type": "text/csv; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "ec79e888", "document_id": "21319798-fe9e-40a4-bcdb-a4c0780ee7bf", @@ -1162,11 +1162,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_pre_adapter_detail_metrics.csv", - "read_index": "", + "read_index": null, "sha256": "674f7fc16bb8c8e4ac0fb03779045ed0c1fdbe8ac6523dcac9341f94a7e2313d", "size": 27293, "uuid": "70367094-c901-4d72-a6b3-d2859729073a", @@ -1222,7 +1222,7 @@ "_type": "file", "content-type": "text/csv; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "38eadd82", "document_id": "28220905-f58d-48a1-9258-53f97dd9d388", @@ -1231,11 +1231,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_gc_bias.csv", - "read_index": "", + "read_index": null, "sha256": "2d6b0ed81a78a2026739a7053ba8319d006a7b0d9e0f46adb010180d22c76b5e", "size": 8654, "uuid": "4f53c7d8-13fd-4e60-8168-df2a26652d8f", @@ -1291,7 +1291,7 @@ "_type": "file", "content-type": "text/plain; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "a45828f5", "document_id": "416e31c0-b0ec-4674-ab37-ab8506a9f219", @@ -1300,11 +1300,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_qc.quality_by_cycle_metrics.txt", - "read_index": "", + "read_index": null, "sha256": "83ee700382b8abf0fd8f70a6d034e14e8387774aa9990cc9d9c4b89211197fca", "size": 1868, "uuid": "ee417573-2281-439a-98f9-05fcb5dd2faf", @@ -1360,7 +1360,7 @@ "_type": "file", "content-type": "text/plain; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "074766b5", "document_id": "48e42fe3-41cd-49bd-b7f4-1e3c49682131", @@ -1369,11 +1369,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_qc.bait_bias_summary_metrics.txt", - "read_index": "", + "read_index": null, "sha256": "cb53289c95fd1030726cf93e509d4dc37461bc21b29ff8d2a4165b086dd75f50", "size": 2708, "uuid": "ac880333-cb92-4f55-971a-614bd065d8ef", @@ -1429,18 +1429,18 @@ "_type": "file", "content-type": "application/gzip; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "4ef74578", "document_id": "5f0cdf49-aabe-40f4-8af3-033115805bb0", "drs_uri": "drs://drs-test.lan/c005f647-b3fb-45a8-857a-8f5e6a878ccf?version=2018-10-10T02%3A38%3A11.612423Z", "file_format": "fastq.gz", - "file_source": "", + "file_source": null, "file_type": "sequence_file", "indexed": false, - "is_intermediate": "", + "is_intermediate": null, "lane_index": 1, - "matrix_cell_count": "", + "matrix_cell_count": null, "name": "R1.fastq.gz", "read_index": "read1", "sha256": "fe6d4fdfea2ff1df97500dcfe7085ac3abfb760026bff75a34c20fb97a4b2b29", @@ -1498,18 +1498,18 @@ "_type": "file", "content-type": "application/gzip; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "54bb9c82", "document_id": "70d1af4a-82c8-478a-8960-e9028b3616ca", "drs_uri": "drs://drs-test.lan/74897eb7-0701-4e4f-9e6b-8b9521b2816b?version=2018-11-02T11%3A33%3A44.450442Z", "file_format": "fastq.gz", - "file_source": "", + "file_source": null, "file_type": "sequence_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "SRR3562915_2.fastq.gz", "read_index": "read2", "sha256": "465a230aa127376fa641f8b8f8cad3f08fef37c8aafc67be454f0f0e4e63d68d", @@ -1567,18 +1567,18 @@ "_type": "file", "content-type": "application/gzip; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "69987b3e", "document_id": "74c8c730-139e-40a5-b77e-f46088fa4d95", "drs_uri": "drs://drs-test.lan/b764ce7d-3938-4451-b68c-678feebc8f2a?version=2018-10-10T02%3A38%3A11.851483Z", "file_format": "fastq.gz", - "file_source": "", + "file_source": null, "file_type": "sequence_file", "indexed": false, - "is_intermediate": "", + "is_intermediate": null, "lane_index": 1, - "matrix_cell_count": "", + "matrix_cell_count": null, "name": "R2.fastq.gz", "read_index": "read2", "sha256": "c305bee37b3c3735585e11306272b6ab085f04cd22ea8703957b4503488cfeba", @@ -1636,7 +1636,7 @@ "_type": "file", "content-type": "text/plain; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "0d086593", "document_id": "8b4ed86d-99a8-4665-8f56-5a54fac6cb12", @@ -1645,11 +1645,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_qc.insert_size_metrics.txt", - "read_index": "", + "read_index": null, "sha256": "15c506c283d8b72c2a58fae1c8b58f525837e7e10adfa00080af72ea156fdaed", "size": 5005, "uuid": "2866dfcd-6346-4db8-ad1c-36a50dd99f44", @@ -1705,7 +1705,7 @@ "_type": "file", "content-type": "text/plain; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "132739db", "document_id": "9a827f0b-d74a-436f-a39f-f8c8c9eadfa3", @@ -1714,11 +1714,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_qc.rna_metrics.txt", - "read_index": "", + "read_index": null, "sha256": "cb2373c6dc2048b583c624291f7d3e4690c23934805100af773aa78682543141", "size": 3218, "uuid": "3a97e493-d758-4659-84f2-1c8060161459", @@ -1774,7 +1774,7 @@ "_type": "file", "content-type": "text/csv; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "d0ea20f5", "document_id": "9d88ec61-e189-4618-ac15-992a134749a8", @@ -1783,11 +1783,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_base_distribution_by_cycle_metrics.csv", - "read_index": "", + "read_index": null, "sha256": "c910ece6155921d0ef500074db6e8c6b7bc53839a256bbb16eaf558b79583f5c", "size": 4341, "uuid": "f7cbd75f-abb1-4703-bf11-7d55a28ae5c0", @@ -1843,7 +1843,7 @@ "_type": "file", "content-type": "application/octet-stream; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "24b04481", "document_id": "a3b39c62-3dd8-48f9-bcba-08a16f43a40b", @@ -1852,11 +1852,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_rsem.isoforms.results", - "read_index": "", + "read_index": null, "sha256": "abf4f12296e2e99fa153b63f920657bfa37718681111091d51c098207e7f6b41", "size": 18940597, "uuid": "55878ed2-a7bf-423f-b6ee-97969e2e66ef", @@ -1912,7 +1912,7 @@ "_type": "file", "content-type": "application/gzip; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "700bd519", "document_id": "a5acdc07-18bf-4c06-b212-2b36e52173ef", @@ -1921,11 +1921,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_qc.bam", - "read_index": "", + "read_index": null, "sha256": "e3cd90d79f520c0806dddb1ca0c5a11fbe26ac0c0be983ba5098d6769f78294c", "size": 550597, "uuid": "51c9ad31-5888-47eb-9e0c-02f042373c4e", @@ -1981,7 +1981,7 @@ "_type": "file", "content-type": "application/json; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "4e75003e", "document_id": "b4767274-c12b-401f-8bc1-9c9213149442", @@ -1990,11 +1990,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0.zarr/.zattrs", - "read_index": "", + "read_index": null, "sha256": "e9bf2c74bb562b91a45538bd74225edfd3a62bd8b74ab8c71b3c8f7295f3fcc6", "size": 144, "uuid": "c1c4a2bc-b5fb-4083-af64-f5dec70d7f9d", @@ -2050,7 +2050,7 @@ "_type": "file", "content-type": "application/json; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "444a7707", "document_id": "dfa9da1b-3f48-4c76-86a2-833ded3e2745", @@ -2059,11 +2059,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0.zarr/.zgroup", - "read_index": "", + "read_index": null, "sha256": "2383746e67b4bcc2762b3f100f06c3fa2d5f149ab5a8e5da5d33521464a01959", "size": 24, "uuid": "54541cc5-9010-425b-9037-22e43948c97c", @@ -2119,7 +2119,7 @@ "_type": "file", "content-type": "application/json; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "444a7707", "document_id": "11d84aef-c087-4fd1-bdc9-0c3402a0db01", @@ -2128,11 +2128,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0.zarr/expression_matrix/.zgroup", - "read_index": "", + "read_index": null, "sha256": "2383746e67b4bcc2762b3f100f06c3fa2d5f149ab5a8e5da5d33521464a01959", "size": 24, "uuid": "66b8f976-6f1e-45b3-bd97-069658c3c847", @@ -2188,7 +2188,7 @@ "_type": "file", "content-type": "application/json; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "c6ab0701", "document_id": "345c5bd6-ac02-4b5d-9d53-e2ec7d7928aa", @@ -2197,11 +2197,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0.zarr/expression_matrix/cell_id/.zarray", - "read_index": "", + "read_index": null, "sha256": "b0ae46aff3e2446c88dbde31abfb6253fcc8b571195f536f83d0641a29890669", "size": 333, "uuid": "ac05d7fb-d6b9-4ab1-8c04-6211450dbb62", @@ -2257,7 +2257,7 @@ "_type": "file", "content-type": "application/octet-stream; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "cd2fd51f", "document_id": "5aa074b9-f6e6-4790-9608-b5eb37425341", @@ -2266,11 +2266,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0.zarr/expression_matrix/cell_id/0.0", - "read_index": "", + "read_index": null, "sha256": "fa77e1466fbe9d016b8c20dade29ebc19356e586d725feaf83d3efc7b007161a", "size": 153, "uuid": "0c518a52-f315-4ea2-beed-1c9d8f2d802b", @@ -2326,7 +2326,7 @@ "_type": "file", "content-type": "application/json; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "b89e6723", "document_id": "b9672a5e-03be-41a6-9155-b29626815e11", @@ -2335,11 +2335,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0.zarr/expression_matrix/expression/.zarray", - "read_index": "", + "read_index": null, "sha256": "31f6f311ce1934669c993d3ae909f89084d605554312bc34262340e3f37005ca", "size": 341, "uuid": "136108ab-277e-47a4-acc3-1feed8fb2f25", @@ -2395,7 +2395,7 @@ "_type": "file", "content-type": "application/octet-stream; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "caaefa77", "document_id": "c0da3d0b-128b-4b37-a3d6-9538ab29e9b6", @@ -2404,11 +2404,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0.zarr/expression_matrix/expression/0.0", - "read_index": "", + "read_index": null, "sha256": "625a4c8c5224432840964847bbefaf43cd535d9313d045ac5c9cb19411c86bee", "size": 3054, "uuid": "0bef5419-739c-4a2c-aedb-43754d55d51c", @@ -2464,7 +2464,7 @@ "_type": "file", "content-type": "application/json; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "f629ec34", "document_id": "46465f1a-a0cc-413f-aebc-505e4714c6b2", @@ -2473,11 +2473,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0.zarr/expression_matrix/gene_id/.zarray", - "read_index": "", + "read_index": null, "sha256": "5c65571866242de9ca10a299eb14d239d5c5c5f4538835b2df600bb212076083", "size": 341, "uuid": "3a5f7299-1aa1-4060-9631-212c29b4d807", @@ -2533,7 +2533,7 @@ "_type": "file", "content-type": "application/octet-stream; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "59d86b68", "document_id": "7ba06d4d-c134-4324-84e6-3a37e57af2ce", @@ -2542,11 +2542,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0.zarr/expression_matrix/gene_id/0.0", - "read_index": "", + "read_index": null, "sha256": "ad468829c4ad38e218b5d0c1573803aba4a5fed31b6fc755c6394d97ac0d417b", "size": 192191, "uuid": "a8f0dc39-6019-4fc7-899d-4e34a48d03e5", @@ -2602,7 +2602,7 @@ "_type": "file", "content-type": "application/json; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "25d193cf", "document_id": "7cd4f7cc-8712-4ffd-b91d-9cb7c6182f52", @@ -2611,11 +2611,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0.zarr/expression_matrix/qc_metric/.zarray", - "read_index": "", + "read_index": null, "sha256": "334848718d4aee17991ea4cd9f0585654e24c7967a0a11c5f259c7b8825d8227", "size": 337, "uuid": "68ba4711-1447-42ac-aa40-9c0e4cda1666", @@ -2671,7 +2671,7 @@ "_type": "file", "content-type": "application/octet-stream; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "17a84191", "document_id": "54604922-909e-4094-83d7-9d8ac7e7cdbd", @@ -2680,11 +2680,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0.zarr/expression_matrix/qc_metric/0.0", - "read_index": "", + "read_index": null, "sha256": "215349395555fa4af2669d6119ef0ff39ba7aa3f34cb981041024466ff462693", "size": 4054, "uuid": "27e66328-e337-4bcd-ba15-7893ecaf841f", @@ -2740,7 +2740,7 @@ "_type": "file", "content-type": "application/json; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "25d193cf", "document_id": "e0fc82a4-9bc2-4219-b763-99c865e4f9c0", @@ -2749,11 +2749,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0.zarr/expression_matrix/qc_values/.zarray", - "read_index": "", + "read_index": null, "sha256": "334848718d4aee17991ea4cd9f0585654e24c7967a0a11c5f259c7b8825d8227", "size": 337, "uuid": "2ab1a516-ef36-41b6-a78f-513361658feb", @@ -2809,7 +2809,7 @@ "_type": "file", "content-type": "application/octet-stream; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "bdc30523", "document_id": "5c3246df-9ee8-42fe-ab19-ddd0ddea3e15", @@ -2818,11 +2818,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0.zarr/expression_matrix/qc_values/0.0", - "read_index": "", + "read_index": null, "sha256": "af4b69064dd3f393d9da8138f0f5b5599e5fcac35daa0cc0233ec229c5135d0a", "size": 1257, "uuid": "351970aa-bc4c-405e-a274-be9e08e42e98", @@ -2878,7 +2878,7 @@ "_type": "file", "content-type": "application/octet-stream; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "7581baea", "document_id": "b7f69b47-cbdf-4ec9-be6e-d316827aad8e", @@ -2887,11 +2887,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_rsem.genes.results", - "read_index": "", + "read_index": null, "sha256": "60462c3b1ccde59bbff0f3b62a4f8491d07a0d7a55abbb2df093f72aaadb77b5", "size": 7629174, "uuid": "cf859616-212d-4579-bd86-03c5f8e06ec0", @@ -2947,7 +2947,7 @@ "_type": "file", "content-type": "text/csv; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "d5d1e302", "document_id": "c059ee58-fbea-40d9-b552-acc1285138a3", @@ -2956,11 +2956,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_error_summary_metrics.csv", - "read_index": "", + "read_index": null, "sha256": "c691a24d6bd11b9b60b99efe4641e2392ff50ab518b6474b595e86610dfe9751", "size": 457, "uuid": "29466279-4d12-44dd-9f87-8429b61a9f13", @@ -3030,20 +3030,20 @@ "5275e5a0-6043-4ec9-86a1-6c1140cbeede" ], "has_input_biomaterial": [ - "" + null ], "organ": [ "blood", "lung" ], "organ_part": [ - "" + null ], "preservation_method": [ - "" + null ], "storage_method": [ - "" + null ] }, "relations": [] @@ -3067,17 +3067,17 @@ "specimens" ], "model_organ": [ - "" + null ], "model_organ_part": [ - "" + null ], "organ": [ "blood", "lung" ], "organ_part": [ - "" + null ] }, "relations": [] @@ -3103,20 +3103,20 @@ "5275e5a0-6043-4ec9-86a1-6c1140cbeede" ], "has_input_biomaterial": [ - "" + null ], "organ": [ "blood", "lung" ], "organ_part": [ - "" + null ], "preservation_method": [ - "" + null ], "storage_method": [ - "" + null ] }, "relations": [] @@ -3135,7 +3135,7 @@ "lung" ], "organ_part": [ - "" + null ], "selected_cell_type": [ "peripheral blood mononuclear cell" @@ -3159,7 +3159,7 @@ "blood" ], "organ_part": [ - "" + null ], "selected_cell_type": [ "peripheral blood mononuclear cell" @@ -3198,7 +3198,7 @@ "unit": "year", "value": "56" }, - "" + null ], "organism_age_range": [ [ @@ -3247,7 +3247,7 @@ ], "accessions": [], "contact_names": [ - "" + null ], "contributors": [], "document_id": [ @@ -3258,10 +3258,10 @@ "Newcastle University" ], "laboratory": [ - "" + null ], "project_description": [ - "" + null ], "project_short_name": [ "Covid19PBMC" @@ -3274,7 +3274,7 @@ ], "publications": [], "supplementary_links": [ - "" + null ] }, "relations": [] @@ -3296,10 +3296,10 @@ "file_type": "analysis_file", "indexed": false, "is_intermediate": false, - "lane_index": "", + "lane_index": null, "matrix_cell_count": 2100, "name": "AP1_file.h5ad", - "read_index": "", + "read_index": null, "sha256": "8908aec2bf49fdc557a5841352c52c8af857707c6f9c760ff51fb6e0858a6577", "size": 143, "uuid": "2ecedea4-b90c-5025-9967-fc08f27a4dc6", @@ -3351,7 +3351,7 @@ "_type": "file", "content-type": "application/octet-stream; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "6ce5efc2", "document_id": "da510e7a-5495-40c5-964d-b5291372102c", @@ -3360,11 +3360,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_qc.bam.bai", - "read_index": "", + "read_index": null, "sha256": "d7dd835ba3e9cf56958976fc90267786233958c92d9dfcd5a6cb3c0ce95a52e2", "size": 1399032, "uuid": "7eda54d3-3522-4079-9089-60839ee4d4f9", @@ -3420,7 +3420,7 @@ "_type": "file", "content-type": "text/csv; dcp-type=data", "content_description": [ - "" + null ], "crc32c": "5191baeb", "document_id": "fd67cd38-2483-49ff-8a9b-0fe1f2607901", @@ -3429,11 +3429,11 @@ "file_source": "DCP/2 Analysis", "file_type": "analysis_file", "indexed": false, - "is_intermediate": "", - "lane_index": "", - "matrix_cell_count": "", + "is_intermediate": null, + "lane_index": null, + "matrix_cell_count": null, "name": "377f2f5a-4a45-4c62-8fb0-db9ef33f5cf0_pre_adapter_summary_metrics.csv", - "read_index": "", + "read_index": null, "sha256": "8b90565cdda2daa693e74d5b9e7652eb4bb6aae91e6993b084f1103a9bfda23b", "size": 1899, "uuid": "84985aaf-e397-45ee-8b96-37ed2cc650d4", diff --git a/test/service/data/pfb_manifest.schema.json b/test/service/data/pfb_manifest.schema.json index 802c28a62f..a84f111bc2 100644 --- a/test/service/data/pfb_manifest.schema.json +++ b/test/service/data/pfb_manifest.schema.json @@ -122,6 +122,7 @@ "namespace": "samples", "type": { "items": [ + "null", "string" ], "type": "array" @@ -132,6 +133,7 @@ "namespace": "samples", "type": { "items": [ + "null", "string" ], "type": "array" @@ -142,6 +144,7 @@ "namespace": "samples", "type": { "items": [ + "null", "string" ], "type": "array" @@ -152,6 +155,7 @@ "namespace": "samples", "type": { "items": [ + "null", "string" ], "type": "array" @@ -162,6 +166,7 @@ "namespace": "samples", "type": { "items": [ + "null", "string" ], "type": "array" @@ -172,6 +177,7 @@ "namespace": "samples", "type": { "items": [ + "null", "string" ], "type": "array" @@ -182,6 +188,7 @@ "namespace": "samples", "type": { "items": [ + "null", "string" ], "type": "array" @@ -192,6 +199,7 @@ "namespace": "samples", "type": { "items": [ + "null", "string" ], "type": "array" @@ -209,6 +217,7 @@ "namespace": "sample_cell_lines", "type": { "items": [ + "null", "string" ], "type": "array" @@ -219,6 +228,7 @@ "namespace": "sample_cell_lines", "type": { "items": [ + "null", "string" ], "type": "array" @@ -229,6 +239,7 @@ "namespace": "sample_cell_lines", "type": { "items": [ + "null", "string" ], "type": "array" @@ -239,6 +250,7 @@ "namespace": "sample_cell_lines", "type": { "items": [ + "null", "string" ], "type": "array" @@ -256,6 +268,7 @@ "namespace": "sample_organoids", "type": { "items": [ + "null", "string" ], "type": "array" @@ -266,6 +279,7 @@ "namespace": "sample_organoids", "type": { "items": [ + "null", "string" ], "type": "array" @@ -276,6 +290,7 @@ "namespace": "sample_organoids", "type": { "items": [ + "null", "string" ], "type": "array" @@ -286,6 +301,7 @@ "namespace": "sample_organoids", "type": { "items": [ + "null", "string" ], "type": "array" @@ -303,6 +319,7 @@ "namespace": "sample_specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -313,6 +330,7 @@ "namespace": "sample_specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -323,6 +341,7 @@ "namespace": "sample_specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -333,6 +352,7 @@ "namespace": "sample_specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -343,6 +363,7 @@ "namespace": "sample_specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -353,6 +374,7 @@ "namespace": "sample_specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -363,6 +385,7 @@ "namespace": "sample_specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -373,6 +396,7 @@ "namespace": "sample_specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -383,6 +407,7 @@ "namespace": "sample_specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -393,6 +418,7 @@ "namespace": "sample_specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -410,6 +436,7 @@ "namespace": "sequencing_inputs", "type": { "items": [ + "null", "string" ], "type": "array" @@ -420,6 +447,7 @@ "namespace": "sequencing_inputs", "type": { "items": [ + "null", "string" ], "type": "array" @@ -430,6 +458,7 @@ "namespace": "sequencing_inputs", "type": { "items": [ + "null", "string" ], "type": "array" @@ -447,6 +476,7 @@ "namespace": "specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -457,6 +487,7 @@ "namespace": "specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -467,6 +498,7 @@ "namespace": "specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -477,6 +509,7 @@ "namespace": "specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -487,6 +520,7 @@ "namespace": "specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -497,6 +531,7 @@ "namespace": "specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -507,6 +542,7 @@ "namespace": "specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -517,6 +553,7 @@ "namespace": "specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -527,6 +564,7 @@ "namespace": "specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -537,6 +575,7 @@ "namespace": "specimens", "type": { "items": [ + "null", "string" ], "type": "array" @@ -554,6 +593,7 @@ "namespace": "cell_suspensions", "type": { "items": [ + "null", "string" ], "type": "array" @@ -564,6 +604,7 @@ "namespace": "cell_suspensions", "type": { "items": [ + "null", "string" ], "type": "array" @@ -573,7 +614,7 @@ "name": "total_estimated_cells", "namespace": "cell_suspensions", "type": [ - "string", + "null", "long" ] }, @@ -581,7 +622,7 @@ "name": "total_estimated_cells_redundant", "namespace": "cell_suspensions", "type": [ - "string", + "null", "long" ] }, @@ -590,6 +631,7 @@ "namespace": "cell_suspensions", "type": { "items": [ + "null", "string" ], "type": "array" @@ -600,6 +642,7 @@ "namespace": "cell_suspensions", "type": { "items": [ + "null", "string" ], "type": "array" @@ -610,6 +653,7 @@ "namespace": "cell_suspensions", "type": { "items": [ + "null", "string" ], "type": "array" @@ -627,6 +671,7 @@ "namespace": "cell_lines", "type": { "items": [ + "null", "string" ], "type": "array" @@ -637,6 +682,7 @@ "namespace": "cell_lines", "type": { "items": [ + "null", "string" ], "type": "array" @@ -647,6 +693,7 @@ "namespace": "cell_lines", "type": { "items": [ + "null", "string" ], "type": "array" @@ -657,6 +704,7 @@ "namespace": "cell_lines", "type": { "items": [ + "null", "string" ], "type": "array" @@ -674,6 +722,7 @@ "namespace": "donors", "type": { "items": [ + "null", "string" ], "type": "array" @@ -684,6 +733,7 @@ "namespace": "donors", "type": { "items": [ + "null", "string" ], "type": "array" @@ -694,6 +744,7 @@ "namespace": "donors", "type": { "items": [ + "null", "string" ], "type": "array" @@ -704,6 +755,7 @@ "namespace": "donors", "type": { "items": [ + "null", "string" ], "type": "array" @@ -714,6 +766,7 @@ "namespace": "donors", "type": { "items": [ + "null", "string" ], "type": "array" @@ -724,6 +777,7 @@ "namespace": "donors", "type": { "items": [ + "null", "string" ], "type": "array" @@ -734,13 +788,14 @@ "namespace": "donors", "type": { "items": [ - "string", + "null", { "fields": [ { "name": "value", "namespace": "donors.organism_age", "type": [ + "null", "string" ] }, @@ -748,6 +803,7 @@ "name": "unit", "namespace": "donors.organism_age", "type": [ + "null", "string" ] } @@ -777,7 +833,7 @@ "name": "donor_count", "namespace": "donors", "type": [ - "string", + "null", "long" ] } @@ -793,6 +849,7 @@ "namespace": "organoids", "type": { "items": [ + "null", "string" ], "type": "array" @@ -803,6 +860,7 @@ "namespace": "organoids", "type": { "items": [ + "null", "string" ], "type": "array" @@ -813,6 +871,7 @@ "namespace": "organoids", "type": { "items": [ + "null", "string" ], "type": "array" @@ -823,6 +882,7 @@ "namespace": "organoids", "type": { "items": [ + "null", "string" ], "type": "array" @@ -839,6 +899,7 @@ "name": "document_id", "namespace": "files", "type": [ + "null", "string" ] }, @@ -846,6 +907,7 @@ "name": "content-type", "namespace": "files", "type": [ + "null", "string" ] }, @@ -853,7 +915,7 @@ "name": "indexed", "namespace": "files", "type": [ - "string", + "null", "boolean" ] }, @@ -861,6 +923,7 @@ "name": "name", "namespace": "files", "type": [ + "null", "string" ] }, @@ -868,6 +931,7 @@ "name": "crc32c", "namespace": "files", "type": [ + "null", "string" ] }, @@ -875,6 +939,7 @@ "name": "sha256", "namespace": "files", "type": [ + "null", "string" ] }, @@ -882,7 +947,7 @@ "name": "size", "namespace": "files", "type": [ - "string", + "null", "long" ] }, @@ -898,6 +963,7 @@ "name": "drs_uri", "namespace": "files", "type": [ + "null", "string" ] }, @@ -905,6 +971,7 @@ "name": "version", "namespace": "files", "type": [ + "null", "string" ] }, @@ -912,6 +979,7 @@ "name": "file_type", "namespace": "files", "type": [ + "null", "string" ] }, @@ -919,6 +987,7 @@ "name": "file_format", "namespace": "files", "type": [ + "null", "string" ] }, @@ -927,6 +996,7 @@ "namespace": "files", "type": { "items": [ + "null", "string" ], "type": "array" @@ -936,7 +1006,7 @@ "name": "is_intermediate", "namespace": "files", "type": [ - "string", + "null", "boolean" ] }, @@ -944,6 +1014,7 @@ "name": "file_source", "namespace": "files", "type": [ + "null", "string" ] }, @@ -951,6 +1022,7 @@ "name": "_type", "namespace": "files", "type": [ + "null", "string" ] }, @@ -958,6 +1030,7 @@ "name": "read_index", "namespace": "files", "type": [ + "null", "string" ] }, @@ -965,7 +1038,7 @@ "name": "lane_index", "namespace": "files", "type": [ - "string", + "null", "long" ] }, @@ -973,7 +1046,7 @@ "name": "matrix_cell_count", "namespace": "files", "type": [ - "string", + "null", "long" ] } @@ -989,6 +1062,7 @@ "namespace": "analysis_protocols", "type": { "items": [ + "null", "string" ], "type": "array" @@ -999,6 +1073,7 @@ "namespace": "analysis_protocols", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1016,6 +1091,7 @@ "namespace": "imaging_protocols", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1033,6 +1109,7 @@ "namespace": "library_preparation_protocols", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1043,6 +1120,7 @@ "namespace": "library_preparation_protocols", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1053,6 +1131,7 @@ "namespace": "library_preparation_protocols", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1070,6 +1149,7 @@ "namespace": "sequencing_protocols", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1080,6 +1160,7 @@ "namespace": "sequencing_protocols", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1090,7 +1171,7 @@ "namespace": "sequencing_protocols", "type": { "items": [ - "string", + "null", "boolean" ], "type": "array" @@ -1108,6 +1189,7 @@ "namespace": "sequencing_processes", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1125,6 +1207,7 @@ "namespace": "matrices", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1141,6 +1224,7 @@ "namespace": "matrices.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1151,6 +1235,7 @@ "namespace": "matrices.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1161,7 +1246,7 @@ "namespace": "matrices.file", "type": { "items": [ - "string", + "null", "boolean" ], "type": "array" @@ -1172,6 +1257,7 @@ "namespace": "matrices.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1182,6 +1268,7 @@ "namespace": "matrices.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1192,6 +1279,7 @@ "namespace": "matrices.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1202,7 +1290,7 @@ "namespace": "matrices.file", "type": { "items": [ - "string", + "null", "long" ], "type": "array" @@ -1221,6 +1309,7 @@ "namespace": "matrices.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1231,6 +1320,7 @@ "namespace": "matrices.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1241,6 +1331,7 @@ "namespace": "matrices.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1251,6 +1342,7 @@ "namespace": "matrices.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1261,6 +1353,7 @@ "namespace": "matrices.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1271,7 +1364,7 @@ "namespace": "matrices.file", "type": { "items": [ - "string", + "null", "boolean" ], "type": "array" @@ -1282,6 +1375,7 @@ "namespace": "matrices.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1292,6 +1386,7 @@ "namespace": "matrices.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1302,6 +1397,7 @@ "namespace": "matrices.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1312,7 +1408,7 @@ "namespace": "matrices.file", "type": { "items": [ - "string", + "null", "long" ], "type": "array" @@ -1323,7 +1419,7 @@ "namespace": "matrices.file", "type": { "items": [ - "string", + "null", "long" ], "type": "array" @@ -1349,6 +1445,7 @@ "namespace": "contributed_analyses", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1365,6 +1462,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1375,6 +1473,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1385,7 +1484,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ - "string", + "null", "boolean" ], "type": "array" @@ -1396,6 +1495,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1406,6 +1506,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1416,6 +1517,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1426,7 +1528,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ - "string", + "null", "long" ], "type": "array" @@ -1445,6 +1547,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1455,6 +1558,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1465,6 +1569,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1475,6 +1580,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1485,6 +1591,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1495,7 +1602,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ - "string", + "null", "boolean" ], "type": "array" @@ -1506,6 +1613,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1516,6 +1624,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1526,6 +1635,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1536,7 +1646,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ - "string", + "null", "long" ], "type": "array" @@ -1547,7 +1657,7 @@ "namespace": "contributed_analyses.file", "type": { "items": [ - "string", + "null", "long" ], "type": "array" @@ -1573,6 +1683,7 @@ "namespace": "projects", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1583,6 +1694,7 @@ "namespace": "projects", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1593,6 +1705,7 @@ "namespace": "projects", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1603,6 +1716,7 @@ "namespace": "projects", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1613,6 +1727,7 @@ "namespace": "projects", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1623,6 +1738,7 @@ "namespace": "projects", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1633,6 +1749,7 @@ "namespace": "projects", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1649,6 +1766,7 @@ "namespace": "projects.contributors", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1659,7 +1777,7 @@ "namespace": "projects.contributors", "type": { "items": [ - "string", + "null", "boolean" ], "type": "array" @@ -1670,6 +1788,7 @@ "namespace": "projects.contributors", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1680,6 +1799,7 @@ "namespace": "projects.contributors", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1690,6 +1810,7 @@ "namespace": "projects.contributors", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1700,6 +1821,7 @@ "namespace": "projects.contributors", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1718,6 +1840,7 @@ "namespace": "projects", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1734,6 +1857,7 @@ "namespace": "projects.publications", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1744,6 +1868,7 @@ "namespace": "projects.publications", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1754,7 +1879,7 @@ "namespace": "projects.publications", "type": { "items": [ - "string", + "null", "boolean" ], "type": "array" @@ -1765,6 +1890,7 @@ "namespace": "projects.publications", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1783,6 +1909,7 @@ "namespace": "projects", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1793,6 +1920,7 @@ "namespace": "projects", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1809,6 +1937,7 @@ "namespace": "projects.accessions", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1819,6 +1948,7 @@ "namespace": "projects.accessions", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1836,7 +1966,7 @@ "name": "estimated_cell_count", "namespace": "projects", "type": [ - "string", + "null", "long" ] } @@ -1852,6 +1982,7 @@ "namespace": "dates", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1862,6 +1993,7 @@ "namespace": "dates", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1872,6 +2004,7 @@ "namespace": "dates", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1882,6 +2015,7 @@ "namespace": "dates", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1892,6 +2026,7 @@ "namespace": "dates", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1902,6 +2037,7 @@ "namespace": "dates", "type": { "items": [ + "null", "string" ], "type": "array" @@ -1912,6 +2048,7 @@ "namespace": "dates", "type": { "items": [ + "null", "string" ], "type": "array" diff --git a/test/service/test_pfb.py b/test/service/test_pfb.py index 678549a9cf..c292428708 100644 --- a/test/service/test_pfb.py +++ b/test/service/test_pfb.py @@ -23,6 +23,7 @@ class TestPFB(AzulUnitTestCase): def test_pfb_schema(self): + self.maxDiff = None field_types = FileTransformer.field_types() schema = avro_pfb.pfb_schema_from_field_types(field_types) fastavro.parse_schema(schema)