diff --git a/scripts/can_bundle.py b/scripts/can_bundle.py index cb3eec15b..a2bdc7e22 100644 --- a/scripts/can_bundle.py +++ b/scripts/can_bundle.py @@ -38,6 +38,9 @@ from azul.plugins import ( RepositoryPlugin, ) +from azul.plugins.anvil import ( + AnvilBundle, +) from azul.types import ( AnyJSON, AnyMutableJSON, @@ -117,10 +120,12 @@ def save_bundle(bundle: Bundle, output_dir: str) -> None: def redact_bundle(bundle: Bundle, key: bytes) -> None: - for name in bundle.metadata_files.keys(): - entity_type = name.split('_')[0] - if entity_type in redacted_entity_types: - bundle.metadata_files[name] = redact_json(bundle.metadata_files[name], key) + if isinstance(bundle, AnvilBundle): + for entity_ref, entity_metadata in bundle.entities.items(): + if entity_ref.entity_type in redacted_entity_types: + bundle.entities[entity_ref] = redact_json(entity_metadata, key) + else: + raise RuntimeError('HCA bundles do not support redaction', type(bundle)) def redact_json(o: AnyJSON, key: bytes) -> AnyMutableJSON: diff --git a/src/azul/indexer/__init__.py b/src/azul/indexer/__init__.py index 1b62bd8ad..6916645a8 100644 --- a/src/azul/indexer/__init__.py +++ b/src/azul/indexer/__init__.py @@ -33,7 +33,6 @@ AnyJSON, JSON, MutableJSON, - MutableJSONs, SupportsLessThan, get_generic_type_params, ) @@ -399,23 +398,6 @@ def to_json(self) -> SourcedBundleFQIDJSON: @attr.s(auto_attribs=True, kw_only=True) class Bundle(Generic[BUNDLE_FQID], metaclass=ABCMeta): fqid: BUNDLE_FQID - manifest: MutableJSONs - """ - Each item of the `manifest` attribute's value has this shape: - { - 'content-type': 'application/json; dcp-type="metadata/biomaterial"', - 'crc32c': 'fd239631', - 'indexed': True, - 'name': 'cell_suspension_0.json', - 's3_etag': 'aa31c093cc816edb1f3a42e577872ec6', - 'sha1': 'f413a9a7923dee616309e4f40752859195798a5d', - 'sha256': 'ea4c9ed9e53a3aa2ca4b7dffcacb6bbe9108a460e8e15d2b3d5e8e5261fb043e', - 'size': 1366, - 'uuid': '0136ebb4-1317-42a0-8826-502fae25c29f', - 'version': '2019-05-16T162155.020000Z' - } - """ - metadata_files: MutableJSON @property def uuid(self) -> BundleUUID: @@ -453,26 +435,19 @@ def reject_joiner(self): """ Raise a requirement error if the given string is found in the bundle """ - self._reject_joiner(self.manifest) - self._reject_joiner(self.metadata_files) + self._reject_joiner(self.to_json()) @classmethod def extension(cls) -> str: return '' - def to_json(self) -> MutableJSON: - return { - 'manifest': self.manifest, - 'metadata': self.metadata_files - } - @classmethod def from_json(cls, fqid: BUNDLE_FQID, json_: JSON) -> 'Bundle': - manifest = json_['manifest'] - metadata = json_['metadata'] - assert isinstance(manifest, list), manifest - assert isinstance(metadata, dict), metadata - return cls(fqid=fqid, manifest=manifest, metadata_files=metadata) + raise NotImplementedError + + @abstractmethod + def to_json(self) -> MutableJSON: + raise NotImplementedError BUNDLE = TypeVar('BUNDLE', bound=Bundle) diff --git a/src/azul/plugins/__init__.py b/src/azul/plugins/__init__.py index ead6626ed..3a9f9cbba 100644 --- a/src/azul/plugins/__init__.py +++ b/src/azul/plugins/__init__.py @@ -499,6 +499,11 @@ def _bundle_fqid_cls(self) -> Type[BUNDLE_FQID]: bundle_cls, spec_cls, ref_cls, fqid_cls = self._generic_params return fqid_cls + @property + def _bundle_cls(self) -> Type[BUNDLE]: + bundle_cls, spec_cls, ref_cls, fqid_cls = self._generic_params + return bundle_cls + def resolve_source(self, spec: str) -> SOURCE_REF: """ Return an instance of :class:`SourceRef` for the repository source diff --git a/src/azul/plugins/anvil.py b/src/azul/plugins/anvil.py new file mode 100644 index 000000000..ee63250cb --- /dev/null +++ b/src/azul/plugins/anvil.py @@ -0,0 +1,104 @@ +from abc import ( + ABC, +) +from typing import ( + AbstractSet, + Generic, + Iterable, + Optional, + TypeVar, + Union, +) + +import attr +from more_itertools import ( + one, +) + +from azul.indexer import ( + BUNDLE_FQID, + Bundle, +) +from azul.indexer.document import ( + EntityReference, + EntityType, +) +from azul.types import ( + JSON, + MutableJSON, +) + +# AnVIL snapshots do not use UUIDs for primary/foreign keys. +# This type alias helps us distinguish these keys from the document UUIDs, +# which are drawn from the `datarepo_row_id` column. +# Note that entities from different tables may have the same key, so +# `KeyReference` should be used when mixing keys from different entity types. +Key = str + + +@attr.s(frozen=True, auto_attribs=True, kw_only=True, slots=True) +class KeyReference: + key: Key + entity_type: EntityType + + +ENTITY_REF = TypeVar(name='ENTITY_REF', bound=Union[EntityReference, KeyReference]) + + +@attr.s(auto_attribs=True, frozen=True, kw_only=True, order=False) +class Link(Generic[ENTITY_REF]): + inputs: AbstractSet[ENTITY_REF] = attr.ib(factory=frozenset, converter=frozenset) + activity: Optional[ENTITY_REF] = attr.ib(default=None) + outputs: AbstractSet[ENTITY_REF] = attr.ib(factory=frozenset, converter=frozenset) + + @property + def all_entities(self) -> AbstractSet[ENTITY_REF]: + return self.inputs | self.outputs | (set() if self.activity is None else {self.activity}) + + @classmethod + def from_json(cls, link: JSON) -> 'Link': + return cls(inputs=set(map(EntityReference.parse, link['inputs'])), + activity=None if link['activity'] is None else EntityReference.parse(link['activity']), + outputs=set(map(EntityReference.parse, link['outputs']))) + + def to_json(self) -> MutableJSON: + return { + 'inputs': sorted(map(str, self.inputs)), + 'activity': None if self.activity is None else str(self.activity), + 'outputs': sorted(map(str, self.outputs)) + } + + @classmethod + def merge(cls, links: Iterable['Link']) -> 'Link': + return cls(inputs=frozenset.union(*[link.inputs for link in links]), + activity=one({link.activity for link in links}), + outputs=frozenset.union(*[link.outputs for link in links])) + + def __lt__(self, other: 'Link') -> bool: + return min(self.inputs) < min(other.inputs) + + +@attr.s(auto_attribs=True, kw_only=True) +class AnvilBundle(Bundle[BUNDLE_FQID], ABC): + entities: dict[EntityReference, MutableJSON] = attr.ib(factory=dict) + links: set[Link[EntityReference]] = attr.ib(factory=set) + + def to_json(self) -> MutableJSON: + return { + 'entities': { + str(entity_ref): entity + for entity_ref, entity in sorted(self.entities.items()) + }, + 'links': [link.to_json() for link in sorted(self.links)] + } + + @classmethod + def from_json(cls, fqid: BUNDLE_FQID, json_: JSON) -> 'AnvilBundle': + return cls( + fqid=fqid, + entities={ + EntityReference.parse(entity_ref): entity + for entity_ref, entity in json_['entities'].items() + }, + links=set(map(Link.from_json, json_['links'])) + ) diff --git a/src/azul/plugins/hca.py b/src/azul/plugins/hca.py new file mode 100644 index 000000000..365bba6f5 --- /dev/null +++ b/src/azul/plugins/hca.py @@ -0,0 +1,53 @@ +from abc import ( + ABC, +) +import logging + +import attr + +from azul.indexer import ( + BUNDLE_FQID, + Bundle, +) +from azul.types import ( + JSON, + MutableJSON, + MutableJSONs, +) + +log = logging.getLogger(__name__) + + +@attr.s(auto_attribs=True, kw_only=True) +class HCABundle(Bundle[BUNDLE_FQID], ABC): + manifest: MutableJSONs + """ + Each item of the `manifest` attribute's value has this shape: + { + 'content-type': 'application/json; dcp-type="metadata/biomaterial"', + 'crc32c': 'fd239631', + 'indexed': True, + 'name': 'cell_suspension_0.json', + 's3_etag': 'aa31c093cc816edb1f3a42e577872ec6', + 'sha1': 'f413a9a7923dee616309e4f40752859195798a5d', + 'sha256': 'ea4c9ed9e53a3aa2ca4b7dffcacb6bbe9108a460e8e15d2b3d5e8e5261fb043e', + 'size': 1366, + 'uuid': '0136ebb4-1317-42a0-8826-502fae25c29f', + 'version': '2019-05-16T162155.020000Z' + } + """ + metadata_files: MutableJSON + + def to_json(self) -> MutableJSON: + return { + 'manifest': self.manifest, + 'metadata': self.metadata_files + } + + @classmethod + def from_json(cls, fqid: BUNDLE_FQID, json_: JSON) -> 'Bundle': + manifest = json_['manifest'] + metadata = json_['metadata'] + assert isinstance(manifest, list), manifest + assert isinstance(metadata, dict), metadata + return cls(fqid=fqid, manifest=manifest, metadata_files=metadata) diff --git a/src/azul/plugins/metadata/anvil/__init__.py b/src/azul/plugins/metadata/anvil/__init__.py index 53ee9f391..498f8dc02 100644 --- a/src/azul/plugins/metadata/anvil/__init__.py +++ b/src/azul/plugins/metadata/anvil/__init__.py @@ -6,15 +6,15 @@ Type, ) -from azul.indexer import ( - Bundle, -) from azul.plugins import ( DocumentSlice, ManifestConfig, MetadataPlugin, Sorting, ) +from azul.plugins.anvil import ( + AnvilBundle, +) from azul.plugins.metadata.anvil.indexer.transform import ( ActivityTransformer, BaseTransformer, @@ -42,7 +42,7 @@ ) -class Plugin(MetadataPlugin[Bundle]): +class Plugin(MetadataPlugin[AnvilBundle]): @classmethod def atlas(cls) -> str: @@ -71,7 +71,7 @@ def transformer_types(self) -> Iterable[Type[BaseTransformer]]: FileTransformer, ) - def transformers(self, bundle: Bundle, *, delete: bool) -> Iterable[BaseTransformer]: + def transformers(self, bundle: AnvilBundle, *, delete: bool) -> Iterable[BaseTransformer]: return [ transformer_cls(bundle=bundle, deleted=delete) for transformer_cls in self.transformer_types() diff --git a/src/azul/plugins/metadata/anvil/indexer/transform.py b/src/azul/plugins/metadata/anvil/indexer/transform.py index 67891d250..447e03dbc 100644 --- a/src/azul/plugins/metadata/anvil/indexer/transform.py +++ b/src/azul/plugins/metadata/anvil/indexer/transform.py @@ -13,10 +13,13 @@ from itertools import ( chain, ) +from operator import ( + attrgetter, +) from typing import ( Callable, + Collection, Iterable, - Mapping, Optional, ) from uuid import ( @@ -29,7 +32,6 @@ ) from azul.indexer import ( - Bundle, BundlePartition, ) from azul.indexer.aggregate import ( @@ -38,7 +40,6 @@ from azul.indexer.document import ( Contribution, ContributionCoordinates, - EntityID, EntityReference, EntityType, FieldTypes, @@ -52,6 +53,10 @@ from azul.indexer.transform import ( Transformer, ) +from azul.plugins.anvil import ( + AnvilBundle, + Link, +) from azul.plugins.metadata.anvil.indexer.aggregate import ( ActivityAggregator, BiosampleAggregator, @@ -60,31 +65,31 @@ DonorAggregator, FileAggregator, ) -from azul.plugins.repository.tdr_hca import ( - EntitiesByType, -) from azul.strings import ( pluralize, ) from azul.types import ( - JSON, - JSONs, MutableJSON, MutableJSONs, ) +EntityRefsByType = dict[EntityType, set[EntityReference]] + @attr.s(auto_attribs=True, kw_only=True, frozen=True) class LinkedEntities: origin: EntityReference - ancestors: EntitiesByType - descendants: EntitiesByType + ancestors: EntityRefsByType + descendants: EntityRefsByType - def __getitem__(self, item: EntityType) -> set[EntityID]: + def __getitem__(self, item: EntityType) -> set[EntityReference]: return self.ancestors[item] | self.descendants[item] @classmethod - def from_links(cls, origin: EntityReference, links: JSONs) -> 'LinkedEntities': + def from_links(cls, + origin: EntityReference, + links: Collection[Link[EntityReference]] + ) -> 'LinkedEntities': return cls(origin=origin, ancestors=cls._search(origin, links, from_='outputs', to='inputs'), descendants=cls._search(origin, links, from_='inputs', to='outputs')) @@ -92,30 +97,29 @@ def from_links(cls, origin: EntityReference, links: JSONs) -> 'LinkedEntities': @classmethod def _search(cls, entity_ref: EntityReference, - links: JSONs, - entities: Optional[EntitiesByType] = None, + links: Collection[Link[EntityReference]], + entities: Optional[EntityRefsByType] = None, *, from_: str, to: str - ) -> EntitiesByType: + ) -> EntityRefsByType: entities = defaultdict(set) if entities is None else entities if entity_ref.entity_type.endswith('activity'): - follow = [one(link for link in links if str(entity_ref) == link['activity'])] + follow = [one(link for link in links if entity_ref == link.activity)] else: - follow = [link for link in links if str(entity_ref) in link[from_]] + follow = [link for link in links if entity_ref in getattr(link, from_)] for link in follow: - for relative in [link['activity'], *link[to]]: + for relative in [link.activity, *getattr(link, to)]: if relative is not None: - relative = EntityReference.parse(relative) if relative != entity_ref and relative.entity_id not in entities[relative.entity_type]: - entities[relative.entity_type].add(relative.entity_id) + entities[relative.entity_type].add(relative) cls._search(relative, links, entities, from_=from_, to=to) return entities @attr.s(frozen=True, kw_only=True, auto_attribs=True) class BaseTransformer(Transformer, metaclass=ABCMeta): - bundle: Bundle + bundle: AnvilBundle deleted: bool @classmethod @@ -147,14 +151,17 @@ def get_aggregator(cls, entity_type) -> EntityAggregator: assert False, entity_type def estimate(self, partition: BundlePartition) -> int: - return sum(map(partial(self._contains, partition), self.bundle.manifest)) + return sum(map(partial(self._contains, partition), self.bundle.entities)) def transform(self, partition: BundlePartition) -> Iterable[Contribution]: - return map(self._transform, - filter(partial(self._contains, partition), self.bundle.manifest)) + return ( + self._transform(entity) + for entity in self.bundle.entities + if self._contains(partition, entity) + ) @abstractmethod - def _transform(self, manifest_entry: JSON) -> Contribution: + def _transform(self, entity: EntityReference) -> Contribution: raise NotImplementedError def _pluralize(self, entity_type: str) -> str: @@ -163,34 +170,21 @@ def _pluralize(self, entity_type: str) -> str: else: return pluralize(entity_type) - def _contains(self, partition: BundlePartition, manifest_entry: JSON) -> bool: + def _contains(self, partition: BundlePartition, entity: EntityReference) -> bool: return ( - self._pluralize(self._entity_type(manifest_entry)).endswith(self.entity_type()) - and partition.contains(UUID(manifest_entry['uuid'])) + pluralize(entity.entity_type).endswith(self.entity_type()) + and partition.contains(UUID(entity.entity_id)) ) - def _entity_type(self, manifest_entry: JSON) -> EntityType: - return manifest_entry['name'].split('_')[0] - - @cached_property - def _entries_by_entity_id(self) -> Mapping[EntityID, JSON]: - return { - manifest_entry['uuid']: manifest_entry - for manifest_entry in self.bundle.manifest - } - @cached_property - def _entities_by_type(self) -> EntitiesByType: + def _entities_by_type(self) -> dict[EntityType, set[EntityReference]]: entries = defaultdict(set) - for e in self.bundle.manifest: - entries[self._entity_type(e)].add(e['uuid']) + for e in self.bundle.entities: + entries[e.entity_type].add(e) return entries - def _linked_entities(self, manifest_entry: JSON) -> LinkedEntities: - entity_ref = EntityReference(entity_type=self._entity_type(manifest_entry), - entity_id=manifest_entry['uuid']) - links = self.bundle.metadata_files['links'] - return LinkedEntities.from_links(entity_ref, links) + def _linked_entities(self, entity: EntityReference) -> LinkedEntities: + return LinkedEntities.from_links(entity, self.bundle.links) @classmethod def _entity_types(cls) -> FieldTypes: @@ -294,8 +288,8 @@ def _aggregate_file_types(cls) -> FieldTypes: 'count': pass_thru_int # Added by FileAggregator, ever null } - def _range(self, manifest_entry: JSON, *field_prefixes: str) -> MutableJSON: - metadata = self.bundle.metadata_files[manifest_entry['name']] + def _range(self, entity: EntityReference, *field_prefixes: str) -> MutableJSON: + metadata = self.bundle.entities[entity] def get_bound(field_name: str) -> Optional[float]: val = metadata[field_name] @@ -310,11 +304,16 @@ def get_bound(field_name: str) -> Optional[float]: } def _contribution(self, + entity: EntityReference, contents: MutableJSON, - entity_id: EntityID ) -> Contribution: - entity = EntityReference(entity_type=self.entity_type(), - entity_id=entity_id) + # The entity type is used to determine the index name. + # All activities go into the same index, regardless of their polymorphic type. + # Index names use plural forms. + entity_type = pluralize('activity' + if entity.entity_type.endswith('activity') else + entity.entity_type) + entity = attr.evolve(entity, entity_type=entity_type) coordinates = ContributionCoordinates(entity=entity, bundle=self.bundle.fqid.upcast(), deleted=self.deleted) @@ -324,14 +323,13 @@ def _contribution(self, contents=contents) def _entity(self, - manifest_entry: JSON, + entity: EntityReference, field_types: FieldTypes, **additional_fields ) -> MutableJSON: - metadata = self.bundle.metadata_files[manifest_entry['name']] + metadata = self.bundle.entities[entity] field_values = ChainMap(metadata, - {'document_id': manifest_entry['uuid']}, - manifest_entry, + {'document_id': entity.entity_id}, additional_fields) return { field: field_values[field] @@ -339,22 +337,20 @@ def _entity(self, } def _entities(self, - factory: Callable[[JSON], MutableJSON], - entity_ids: Iterable[EntityID], + factory: Callable[[EntityReference], MutableJSON], + entities: Iterable[EntityReference], ) -> MutableJSONs: - entities = [] - for entity_id in sorted(entity_ids): - manifest_entry = self._entries_by_entity_id[entity_id] - entities.append(factory(manifest_entry)) - return entities + return [ + factory(entity) + for entity in sorted(entities, key=attrgetter('entity_id')) + ] - def _activity(self, manifest_entry: JSON) -> MutableJSON: - activity_table = self._entity_type(manifest_entry) - metadata = self.bundle.metadata_files[manifest_entry['name']] + def _activity(self, activity: EntityReference) -> MutableJSON: + metadata = self.bundle.entities[activity] field_types = self._activity_types() common_fields = { - 'activity_table': activity_table, - 'activity_id': metadata[f'{activity_table}_id'] + 'activity_table': activity.entity_type, + 'activity_id': metadata[f'{activity.entity_type}_id'] } # Activities are unique in that they may not contain every field defined # in their field types due to polymorphism, so we need to pad the field @@ -364,37 +360,39 @@ def _activity(self, manifest_entry: JSON) -> MutableJSON: for field_name, field_type in field_types.items() if field_name not in common_fields } - activity = self._entity(manifest_entry, + activity = self._entity(activity, self._activity_types(), **common_fields, **union_fields) return activity - def _biosample(self, manifest_entry: JSON) -> MutableJSON: - return self._entity(manifest_entry, + def _biosample(self, biosample: EntityReference) -> MutableJSON: + return self._entity(biosample, self._biosample_types(), - **self._range(manifest_entry, 'donor_age_at_collection')) + **self._range(biosample, 'donor_age_at_collection')) - def _dataset(self, manifest_entry: JSON) -> MutableJSON: - return self._entity(manifest_entry, self._dataset_types()) + def _dataset(self, dataset: EntityReference) -> MutableJSON: + return self._entity(dataset, self._dataset_types()) - def _diagnosis(self, manifest_entry: JSON) -> MutableJSON: - return self._entity(manifest_entry, + def _diagnosis(self, diagnosis: EntityReference) -> MutableJSON: + return self._entity(diagnosis, self._diagnosis_types(), - **self._range(manifest_entry, 'diagnosis_age', 'onset_age')) + **self._range(diagnosis, 'diagnosis_age', 'onset_age')) - def _donor(self, manifest_entry: JSON) -> MutableJSON: - return self._entity(manifest_entry, self._donor_types()) + def _donor(self, donor: EntityReference) -> MutableJSON: + return self._entity(donor, self._donor_types()) - def _file(self, manifest_entry: JSON) -> MutableJSON: - metadata = self.bundle.metadata_files[manifest_entry['name']] - return self._entity(manifest_entry, + def _file(self, file: EntityReference) -> MutableJSON: + metadata = self.bundle.entities[file] + return self._entity(file, self._file_types(), - size=metadata['file_size']) + size=metadata['file_size'], + name=metadata['file_name'], + uuid=file.entity_id) def _only_dataset(self) -> MutableJSON: - return self._dataset(self._entries_by_entity_id[one(self._entities_by_type['dataset'])]) + return self._dataset(one(self._entities_by_type['dataset'])) _activity_polymorphic_types = { 'activity', @@ -411,17 +409,17 @@ class ActivityTransformer(BaseTransformer): def entity_type(cls) -> str: return 'activities' - def _transform(self, manifest_entry: JSON) -> Contribution: - linked = self._linked_entities(manifest_entry) + def _transform(self, entity: EntityReference) -> Contribution: + linked = self._linked_entities(entity) contents = dict( - activities=[self._activity(manifest_entry)], + activities=[self._activity(entity)], biosamples=self._entities(self._biosample, linked['biosample']), datasets=[self._only_dataset()], diagnoses=self._entities(self._diagnosis, linked['diagnosis']), donors=self._entities(self._donor, linked['donor']), files=self._entities(self._file, linked['file']), ) - return self._contribution(contents, manifest_entry['uuid']) + return self._contribution(entity, contents) class BiosampleTransformer(BaseTransformer): @@ -430,20 +428,20 @@ class BiosampleTransformer(BaseTransformer): def entity_type(cls) -> str: return 'biosamples' - def _transform(self, manifest_entry: JSON) -> Contribution: - linked = self._linked_entities(manifest_entry) + def _transform(self, entity: EntityReference) -> Contribution: + linked = self._linked_entities(entity) contents = dict( activities=self._entities(self._activity, chain.from_iterable( linked[activity_type] for activity_type in self._activity_polymorphic_types )), - biosamples=[self._biosample(manifest_entry)], + biosamples=[self._biosample(entity)], datasets=[self._only_dataset()], diagnoses=self._entities(self._diagnosis, linked['diagnosis']), donors=self._entities(self._donor, linked['donor']), files=self._entities(self._file, linked['file']), ) - return self._contribution(contents, manifest_entry['uuid']) + return self._contribution(entity, contents) class DatasetTransformer(BaseTransformer): @@ -452,19 +450,19 @@ class DatasetTransformer(BaseTransformer): def entity_type(cls) -> str: return 'datasets' - def _transform(self, manifest_entry: JSON) -> Contribution: + def _transform(self, entity: EntityReference) -> Contribution: contents = dict( activities=self._entities(self._activity, chain.from_iterable( self._entities_by_type[activity_type] for activity_type in self._activity_polymorphic_types )), biosamples=self._entities(self._biosample, self._entities_by_type['biosample']), - datasets=[self._dataset(manifest_entry)], + datasets=[self._dataset(entity)], diagnoses=self._entities(self._diagnosis, self._entities_by_type['diagnosis']), donors=self._entities(self._donor, self._entities_by_type['donor']), files=self._entities(self._file, self._entities_by_type['file']), ) - return self._contribution(contents, manifest_entry['uuid']) + return self._contribution(entity, contents) class DonorTransformer(BaseTransformer): @@ -473,8 +471,8 @@ class DonorTransformer(BaseTransformer): def entity_type(cls) -> str: return 'donors' - def _transform(self, manifest_entry: JSON) -> Contribution: - linked = self._linked_entities(manifest_entry) + def _transform(self, entity: EntityReference) -> Contribution: + linked = self._linked_entities(entity) contents = dict( activities=self._entities(self._activity, chain.from_iterable( linked[activity_type] @@ -483,10 +481,10 @@ def _transform(self, manifest_entry: JSON) -> Contribution: biosamples=self._entities(self._biosample, linked['biosample']), datasets=[self._only_dataset()], diagnoses=self._entities(self._diagnosis, linked['diagnosis']), - donors=[self._donor(manifest_entry)], + donors=[self._donor(entity)], files=self._entities(self._file, linked['file']), ) - return self._contribution(contents, manifest_entry['uuid']) + return self._contribution(entity, contents) class FileTransformer(BaseTransformer): @@ -495,8 +493,8 @@ class FileTransformer(BaseTransformer): def entity_type(cls) -> str: return 'files' - def _transform(self, manifest_entry: JSON) -> Contribution: - linked = self._linked_entities(manifest_entry) + def _transform(self, entity: EntityReference) -> Contribution: + linked = self._linked_entities(entity) contents = dict( activities=self._entities(self._activity, chain.from_iterable( linked[activity_type] @@ -506,6 +504,6 @@ def _transform(self, manifest_entry: JSON) -> Contribution: datasets=[self._only_dataset()], diagnoses=self._entities(self._diagnosis, linked['diagnosis']), donors=self._entities(self._donor, linked['donor']), - files=[self._file(manifest_entry)], + files=[self._file(entity)], ) - return self._contribution(contents, manifest_entry['uuid']) + return self._contribution(entity, contents) diff --git a/src/azul/plugins/metadata/hca/__init__.py b/src/azul/plugins/metadata/hca/__init__.py index 03961fdcd..797cdd278 100644 --- a/src/azul/plugins/metadata/hca/__init__.py +++ b/src/azul/plugins/metadata/hca/__init__.py @@ -8,9 +8,6 @@ Type, ) -from azul.indexer import ( - Bundle, -) from azul.indexer.document import ( Aggregate, ) @@ -20,6 +17,9 @@ MetadataPlugin, Sorting, ) +from azul.plugins.hca import ( + HCABundle, +) from azul.plugins.metadata.hca.indexer.aggregate import ( HCAAggregate, ) @@ -53,7 +53,7 @@ ) -class Plugin(MetadataPlugin[Bundle]): +class Plugin(MetadataPlugin[HCABundle]): @classmethod def atlas(cls) -> str: @@ -68,7 +68,7 @@ def transformer_types(self) -> Iterable[Type[BaseTransformer]]: BundleTransformer ) - def transformers(self, bundle: Bundle, *, delete: bool) -> Iterable[BaseTransformer]: + def transformers(self, bundle: HCABundle, *, delete: bool) -> Iterable[BaseTransformer]: api_bundle = api.Bundle(uuid=bundle.uuid, version=bundle.version, manifest=bundle.manifest, diff --git a/src/azul/plugins/metadata/hca/indexer/transform.py b/src/azul/plugins/metadata/hca/indexer/transform.py index 9eaf5b679..029595d73 100644 --- a/src/azul/plugins/metadata/hca/indexer/transform.py +++ b/src/azul/plugins/metadata/hca/indexer/transform.py @@ -54,7 +54,6 @@ auto, ) from azul.indexer import ( - Bundle, BundlePartition, ) from azul.indexer.aggregate import ( @@ -87,6 +86,9 @@ from azul.openapi import ( schema, ) +from azul.plugins.hca import ( + HCABundle, +) from azul.plugins.metadata.hca.indexer.aggregate import ( CellLineAggregator, CellSuspensionAggregator, @@ -449,7 +451,7 @@ class DatedEntity(Entity, Protocol): @attr.s(frozen=True, kw_only=True, auto_attribs=True) class BaseTransformer(Transformer, metaclass=ABCMeta): - bundle: Bundle + bundle: HCABundle api_bundle: api.Bundle deleted: bool @@ -462,7 +464,7 @@ class BaseTransformer(Transformer, metaclass=ABCMeta): # noinspection PyDataclass,PyUnusedLocal def __init__(self, *, - bundle: Bundle, + bundle: HCABundle, api_bundle: api.Bundle, deleted: bool): ... diff --git a/src/azul/plugins/repository/canned/__init__.py b/src/azul/plugins/repository/canned/__init__.py index e7dcb35d9..1d91d4a63 100644 --- a/src/azul/plugins/repository/canned/__init__.py +++ b/src/azul/plugins/repository/canned/__init__.py @@ -39,7 +39,6 @@ Authentication, ) from azul.indexer import ( - Bundle, SimpleSourceSpec, SourceRef, SourcedBundleFQID, @@ -48,6 +47,9 @@ RepositoryFileDownload, RepositoryPlugin, ) +from azul.plugins.hca import ( + HCABundle, +) from azul.time import ( parse_dcp2_version, ) @@ -75,7 +77,7 @@ class CannedBundleFQID(SourcedBundleFQID[CannedSourceRef]): pass -class CannedBundle(Bundle[CannedBundleFQID]): +class CannedBundle(HCABundle[CannedBundleFQID]): def drs_path(self, manifest_entry: JSON) -> Optional[str]: return None diff --git a/src/azul/plugins/repository/dss/__init__.py b/src/azul/plugins/repository/dss/__init__.py index 59eb95ebc..eb6a8ee6f 100644 --- a/src/azul/plugins/repository/dss/__init__.py +++ b/src/azul/plugins/repository/dss/__init__.py @@ -39,7 +39,6 @@ aws, ) from azul.indexer import ( - Bundle, SimpleSourceSpec, SourceRef, SourcedBundleFQID, @@ -48,6 +47,9 @@ RepositoryFileDownload, RepositoryPlugin, ) +from azul.plugins.hca import ( + HCABundle, +) from azul.time import ( parse_dcp2_version, ) @@ -80,7 +82,7 @@ class DSSBundleFQID(SourcedBundleFQID[DSSSourceRef]): pass -class DSSBundle(Bundle[DSSBundleFQID]): +class DSSBundle(HCABundle[DSSBundleFQID]): @classmethod def extension(cls) -> str: diff --git a/src/azul/plugins/repository/tdr.py b/src/azul/plugins/repository/tdr.py index bbddcb9a7..d2db6ec17 100644 --- a/src/azul/plugins/repository/tdr.py +++ b/src/azul/plugins/repository/tdr.py @@ -1,4 +1,5 @@ from abc import ( + ABC, abstractmethod, ) from collections.abc import ( @@ -72,7 +73,7 @@ class TDRBundleFQID(SourcedBundleFQID[TDRSourceRef]): pass -class TDRBundle(Bundle[TDRBundleFQID]): +class TDRBundle(Bundle[TDRBundleFQID], ABC): @classmethod def extension(cls): diff --git a/src/azul/plugins/repository/tdr_anvil/__init__.py b/src/azul/plugins/repository/tdr_anvil/__init__.py index 93c38b6f4..c5294a2a0 100644 --- a/src/azul/plugins/repository/tdr_anvil/__init__.py +++ b/src/azul/plugins/repository/tdr_anvil/__init__.py @@ -12,10 +12,8 @@ from typing import ( AbstractSet, Callable, - Iterable, Mapping, Optional, - Union, ) import attr @@ -39,6 +37,12 @@ EntityReference, EntityType, ) +from azul.plugins.anvil import ( + AnvilBundle, + Key, + KeyReference, + Link, +) from azul.plugins.repository.tdr import ( TDRBundle, TDRBundleFQID, @@ -49,8 +53,6 @@ TDRSourceSpec, ) from azul.types import ( - AnyMutableJSON, - JSON, MutableJSON, MutableJSONs, ) @@ -60,59 +62,11 @@ log = logging.getLogger(__name__) -# AnVIL snapshots do not use UUIDs for primary/foreign keys. -# This type alias helps us distinguish these keys from the document UUIDs, -# which are drawn from the `datarepo_row_id` column. -# Note that entities from different tables may have the same key, so -# `KeyReference` should be used when mixing keys from different entity types. -Key = str - - -@attr.s(frozen=True, auto_attribs=True, kw_only=True, slots=True) -class KeyReference: - key: Key - entity_type: EntityType - - Keys = AbstractSet[KeyReference] MutableKeys = set[KeyReference] KeysByType = dict[EntityType, AbstractSet[Key]] MutableKeysByType = dict[EntityType, set[Key]] - - -@attr.s(frozen=True, auto_attribs=True, kw_only=True, slots=True) -class Link: - inputs: Keys - activity: Optional[KeyReference] - outputs: Keys - - @property - def all_entities(self) -> Keys: - return self.inputs | self.outputs | (set() if self.activity is None else {self.activity}) - - @classmethod - def create(cls, - *, - inputs: Union[KeyReference, Iterable[KeyReference]], - outputs: Union[KeyReference, Iterable[KeyReference]], - activity: Optional[KeyReference] = None - ) -> 'Link': - if isinstance(inputs, KeyReference): - inputs = (inputs,) - if isinstance(outputs, KeyReference): - outputs = (outputs,) - return cls(inputs=frozenset(inputs), - outputs=frozenset(outputs), - activity=activity) - - @classmethod - def merge(cls, links: Iterable['Link']) -> 'Link': - return cls(inputs=frozenset.union(*[link.inputs for link in links]), - activity=one({link.activity for link in links}), - outputs=frozenset.union(*[link.outputs for link in links])) - - -Links = set[Link] +KeyLinks = set[Link[KeyReference]] class BundleEntityType(Enum): @@ -155,69 +109,42 @@ def to_json(self) -> SourcedBundleFQIDJSON: entity_type=self.entity_type.value) -class TDRAnvilBundle(TDRBundle): +class TDRAnvilBundle(AnvilBundle[AnvilBundleFQID], TDRBundle): + + @classmethod + def extension(cls) -> str: + return 'tdr.anvil' def add_entity(self, entity: EntityReference, version: str, row: MutableJSON ) -> None: - pk_column = entity.entity_type + '_id' - self._add_entity( - manifest_entry={ - 'uuid': entity.entity_id, - 'version': version, - 'name': f'{entity.entity_type}_{row[pk_column]}', - 'indexed': True, - 'crc32': '', - 'sha256': '', - **( - {'drs_path': self._parse_drs_uri(row.get('file_ref'))} - if entity.entity_type == 'file' else {} - ) - }, - metadata=row - ) + assert entity not in self.entities, entity + metadata = dict(row, + version=version) + if entity.entity_type == 'file': + metadata.update(drs_path=self._parse_drs_uri(row.get('file_ref')), + sha256='', + crc32='') + self.entities[entity] = metadata def add_links(self, - links: Links, + links: KeyLinks, entities_by_key: Mapping[KeyReference, EntityReference]) -> None: - def link_sort_key(link: JSON): - return link['activity'] or '', link['inputs'], link['outputs'] + def key_ref_to_entity_ref(key_ref: KeyReference) -> EntityReference: + return entities_by_key[key_ref] - def key_ref_to_entity_ref(key_ref: KeyReference) -> str: - return str(entities_by_key[key_ref]) - - def optional_key_ref_to_entity_ref(key_ref: Optional[KeyReference]) -> str: + def optional_key_ref_to_entity_ref(key_ref: Optional[KeyReference]) -> Optional[EntityReference]: return None if key_ref is None else key_ref_to_entity_ref(key_ref) - self._add_entity( - manifest_entry={ - 'uuid': self.fqid.uuid, - 'version': self.fqid.version, - 'name': 'links', - 'indexed': True - }, - metadata=sorted(( - { - 'inputs': sorted(map(key_ref_to_entity_ref, link.inputs)), - 'activity': optional_key_ref_to_entity_ref(link.activity), - 'outputs': sorted(map(key_ref_to_entity_ref, link.outputs)) - } - for link in links - ), key=link_sort_key) + self.links.update( + Link(inputs=set(map(key_ref_to_entity_ref, link.inputs)), + activity=optional_key_ref_to_entity_ref(link.activity), + outputs=set(map(key_ref_to_entity_ref, link.outputs))) + for link in links ) - def _add_entity(self, - *, - manifest_entry: MutableJSON, - metadata: AnyMutableJSON - ) -> None: - name = manifest_entry['name'] - assert name not in self.metadata_files, name - self.manifest.append(manifest_entry) - self.metadata_files[name] = metadata - def _parse_drs_uri(self, file_ref: Optional[str]) -> Optional[str]: if file_ref is None: return None @@ -326,10 +253,10 @@ def _primary_bundle(self, bundle_fqid: AnvilBundleFQID) -> TDRAnvilBundle: bundle_entity = self._bundle_entity(bundle_fqid) keys: MutableKeys = {bundle_entity} - links: Links = set() + links: KeyLinks = set() for method in [self._follow_downstream, self._follow_upstream]: - method: Callable[[TDRSourceSpec, KeysByType], Links] + method: Callable[[TDRSourceSpec, KeysByType], KeyLinks] n = len(keys) frontier: Keys = keys while frontier: @@ -348,7 +275,7 @@ def _primary_bundle(self, bundle_fqid: AnvilBundleFQID) -> TDRAnvilBundle: len(keys), bundle_fqid.uuid, arg) self._simplify_links(links) - result = TDRAnvilBundle(fqid=bundle_fqid, manifest=[], metadata_files={}) + result = TDRAnvilBundle(fqid=bundle_fqid) entities_by_key: dict[KeyReference, EntityReference] = {} for entity_type, typed_keys in sorted(keys_by_type.items()): pk_column = entity_type + '_id' @@ -367,7 +294,7 @@ def _supplementary_bundle(self, bundle_fqid: AnvilBundleFQID) -> TDRAnvilBundle: self.datarepo_row_uuid_version) source = bundle_fqid.source.spec bundle_entity_type = bundle_fqid.entity_type.value - result = TDRAnvilBundle(fqid=bundle_fqid, manifest=[], metadata_files={}) + result = TDRAnvilBundle(fqid=bundle_fqid) columns = self._columns(bundle_entity_type) bundle_entity = dict(one(self._run_sql(f''' SELECT {', '.join(sorted(columns))} @@ -390,8 +317,8 @@ def _supplementary_bundle(self, bundle_fqid: AnvilBundleFQID) -> TDRAnvilBundle: key_ref = KeyReference(key=row[entity_type + '_id'], entity_type=entity_type) entities_by_key[key_ref] = entity_ref result.add_entity(entity_ref, self._version, row) - link_args[arg] = key_ref - result.add_links({Link.create(**link_args)}, entities_by_key) + link_args[arg] = {key_ref} + result.add_links({Link(**link_args)}, entities_by_key) return result def _bundle_entity(self, bundle_fqid: AnvilBundleFQID) -> KeyReference: @@ -423,8 +350,8 @@ def _consolidate_by_type(self, entities: Keys) -> MutableKeysByType: result[e.entity_type].add(e.key) return result - def _simplify_links(self, links: Links) -> None: - grouped_links: Mapping[KeyReference, Links] = defaultdict(set) + def _simplify_links(self, links: KeyLinks) -> None: + grouped_links: Mapping[KeyReference, KeyLinks] = defaultdict(set) for link in links: grouped_links[link.activity].add(link) for activity, convergent_links in grouped_links.items(): @@ -435,7 +362,7 @@ def _simplify_links(self, links: Links) -> None: def _follow_upstream(self, source: TDRSourceSpec, entities: KeysByType - ) -> Links: + ) -> KeyLinks: return set.union( self._upstream_from_files(source, entities['file']), self._upstream_from_biosamples(source, entities['biosample']), @@ -471,7 +398,7 @@ def _follow_upstream(self, def _follow_downstream(self, source: TDRSourceSpec, entities: KeysByType - ) -> Links: + ) -> KeyLinks: return set.union( self._downstream_from_biosamples(source, entities['biosample']), self._downstream_from_files(source, entities['file']) @@ -480,24 +407,24 @@ def _follow_downstream(self, def _upstream_from_biosamples(self, source: TDRSourceSpec, biosample_ids: AbstractSet[Key] - ) -> Links: + ) -> KeyLinks: if biosample_ids: rows = self._run_sql(f''' SELECT b.biosample_id, b.donor_id, b.part_of_dataset_id FROM {backtick(self._full_table_name(source, 'biosample'))} AS b WHERE b.biosample_id IN ({', '.join(map(repr, biosample_ids))}) ''') - result: Links = set() + result: KeyLinks = set() for row in rows: downstream_ref = KeyReference(entity_type='biosample', key=row['biosample_id']) - result.add(Link.create(outputs=downstream_ref, - inputs=KeyReference(entity_type='dataset', - key=one(row['part_of_dataset_id'])))) + result.add(Link(outputs={downstream_ref}, + inputs={KeyReference(entity_type='dataset', + key=one(row['part_of_dataset_id']))})) for donor_id in row['donor_id']: - result.add(Link.create(outputs=downstream_ref, - inputs=KeyReference(entity_type='donor', - key=donor_id))) + result.add(Link(outputs={downstream_ref}, + inputs={KeyReference(entity_type='donor', + key=donor_id)})) return result else: return set() @@ -505,7 +432,7 @@ def _upstream_from_biosamples(self, def _upstream_from_files(self, source: TDRSourceSpec, file_ids: AbstractSet[Key] - ) -> Links: + ) -> KeyLinks: if file_ids: rows = self._run_sql(f''' WITH file AS ( @@ -559,18 +486,18 @@ def _upstream_from_files(self, ON f.file_id IN UNNEST(a.generated_file_id) ''') return { - Link.create( + Link( activity=KeyReference(entity_type=row['activity_table'], key=row['activity_id']), # The generated link is not a complete representation of the # upstream activity because it does not include generated files # that are not ancestors of the downstream file - outputs=KeyReference(entity_type='file', key=row['generated_file_id']), - inputs=[ + outputs={KeyReference(entity_type='file', key=row['generated_file_id'])}, + inputs={ KeyReference(entity_type=entity_type, key=key) for entity_type, column in [('file', 'uses_file_id'), ('biosample', 'uses_biosample_id')] for key in row[column] - ] + } ) for row in rows } @@ -580,7 +507,7 @@ def _upstream_from_files(self, def _diagnoses_from_donors(self, source: TDRSourceSpec, donor_ids: AbstractSet[Key] - ) -> Links: + ) -> KeyLinks: if donor_ids: rows = self._run_sql(f''' SELECT dgn.donor_id, dgn.diagnosis_id @@ -588,9 +515,9 @@ def _diagnoses_from_donors(self, WHERE dgn.donor_id IN ({', '.join(map(repr, donor_ids))}) ''') return { - Link.create(inputs={KeyReference(key=row['diagnosis_id'], entity_type='diagnosis')}, - outputs={KeyReference(key=row['donor_id'], entity_type='donor')}, - activity=None) + Link(inputs={KeyReference(key=row['diagnosis_id'], entity_type='diagnosis')}, + outputs={KeyReference(key=row['donor_id'], entity_type='donor')}, + activity=None) for row in rows } else: @@ -599,7 +526,7 @@ def _diagnoses_from_donors(self, def _downstream_from_biosamples(self, source: TDRSourceSpec, biosample_ids: AbstractSet[Key], - ) -> Links: + ) -> KeyLinks: if biosample_ids: rows = self._run_sql(f''' WITH activities AS ( @@ -633,12 +560,12 @@ def _downstream_from_biosamples(self, WHERE biosample_id IN ({', '.join(map(repr, biosample_ids))}) ''') return { - Link.create(inputs={KeyReference(key=row['biosample_id'], entity_type='biosample')}, - outputs=[ - KeyReference(key=output_id, entity_type='file') - for output_id in row['generated_file_id'] - ], - activity=KeyReference(key=row['activity_id'], entity_type=row['activity_table'])) + Link(inputs={KeyReference(key=row['biosample_id'], entity_type='biosample')}, + outputs={ + KeyReference(key=output_id, entity_type='file') + for output_id in row['generated_file_id'] + }, + activity=KeyReference(key=row['activity_id'], entity_type=row['activity_table'])) for row in rows } else: @@ -647,7 +574,7 @@ def _downstream_from_biosamples(self, def _downstream_from_files(self, source: TDRSourceSpec, file_ids: AbstractSet[Key] - ) -> Links: + ) -> KeyLinks: if file_ids: rows = self._run_sql(f''' WITH activities AS ( @@ -679,12 +606,12 @@ def _downstream_from_files(self, WHERE used_file_id IN ({', '.join(map(repr, file_ids))}) ''') return { - Link.create(inputs=KeyReference(key=row['used_file_id'], entity_type='file'), - outputs=[ - KeyReference(key=file_id, entity_type='file') - for file_id in row['generated_file_id'] - ], - activity=KeyReference(key=row['activity_id'], entity_type=row['activity_table'])) + Link(inputs={KeyReference(key=row['used_file_id'], entity_type='file')}, + outputs={ + KeyReference(key=file_id, entity_type='file') + for file_id in row['generated_file_id'] + }, + activity=KeyReference(key=row['activity_id'], entity_type=row['activity_table'])) for row in rows } else: diff --git a/src/azul/plugins/repository/tdr_hca/__init__.py b/src/azul/plugins/repository/tdr_hca/__init__.py index 67e78245b..759e97150 100644 --- a/src/azul/plugins/repository/tdr_hca/__init__.py +++ b/src/azul/plugins/repository/tdr_hca/__init__.py @@ -50,6 +50,9 @@ EntityReference, EntityType, ) +from azul.plugins.hca import ( + HCABundle, +) from azul.plugins.repository.tdr import ( TDRBundle, TDRBundleFQID, @@ -172,7 +175,7 @@ def extract_field(field: attr.Attribute) -> tuple[str, Any]: return cls(**dict(map(extract_field, attr.fields(cls)))) -class TDRHCABundle(TDRBundle): +class TDRHCABundle(HCABundle[TDRBundleFQID], TDRBundle): def add_entity(self, *, @@ -224,6 +227,8 @@ def add_entity(self, 'project_id' } + _suffix = 'tdr.' + def _add_manifest_entry(self, *, name: str, diff --git a/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json b/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json index de931a713..0fa4c500a 100644 --- a/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json +++ b/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json @@ -142,7 +142,7 @@ "donors": [ { "document_id": [ - "6708dc44-5232-44ca-a050-5dee58637284" + "bfd991f2-2797-4083-972a-da7c6d7f1b2e" ], "source_datarepo_row_ids": [ "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" @@ -151,7 +151,7 @@ "1e2bd7e5-f45e-a391-daea-7c060be76acd" ], "organism_type": [ - "~null" + "redacted-ACw+6ecI" ], "phenotypic_sex": [ "redacted-JfQ0b3xG" @@ -167,7 +167,7 @@ "files": [ { "document_id": [ - "6a85e0ab-2386-4f7e-8503-d72d90b4bc47" + "15b76f9c-6b46-433f-851d-34e89f1b9ba6" ], "source_datarepo_row_ids": [ "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" @@ -203,7 +203,7 @@ "2022-06-01T00:00:00.000000Z" ], "uuid": [ - "6a85e0ab-2386-4f7e-8503-d72d90b4bc47" + "15b76f9c-6b46-433f-851d-34e89f1b9ba6" ], "size": [ 213021639 @@ -212,7 +212,7 @@ 213021639 ], "name": [ - "file_1e269f04-4347-4188-b060-1dcc69e71d67" + "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz" ], "crc32": [ "" @@ -221,7 +221,7 @@ "" ], "drs_path": [ - "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_1e269f04-4347-4188-b060-1dcc69e71d67" + "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_1e269f04-4347-4188-b060-1dcc69e71d67" ], "count": 1 } @@ -375,12 +375,12 @@ ], "donors": [ { - "document_id": "6708dc44-5232-44ca-a050-5dee58637284", + "document_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e", "source_datarepo_row_ids": [ "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" ], "donor_id": "1e2bd7e5-f45e-a391-daea-7c060be76acd", - "organism_type": "~null", + "organism_type": "redacted-ACw+6ecI", "phenotypic_sex": "redacted-JfQ0b3xG", "reported_ethnicity": [ "redacted-NSkwDycK" @@ -392,7 +392,7 @@ ], "files": [ { - "document_id": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", + "document_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", "source_datarepo_row_ids": [ "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" ], @@ -410,13 +410,13 @@ "file_name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", "is_supplementary": 0, "version": "2022-06-01T00:00:00.000000Z", - "uuid": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", + "uuid": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", "size": 213021639, "size_": 213021639, - "name": "file_1e269f04-4347-4188-b060-1dcc69e71d67", + "name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", "crc32": "", "sha256": "", - "drs_path": "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_1e269f04-4347-4188-b060-1dcc69e71d67" + "drs_path": "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_1e269f04-4347-4188-b060-1dcc69e71d67" } ] }, @@ -431,26 +431,23 @@ } }, { - "_index": "azul_v2_nadove4_test_datasets_aggregate", + "_index": "azul_v2_nadove4_test_files_aggregate", "_type": "_doc", - "_id": "2370f948-2783-4eb6-afea-e022897f4dcf", + "_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", "_score": 1.0, "_source": { - "entity_id": "2370f948-2783-4eb6-afea-e022897f4dcf", + "entity_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", "contents": { "activities": [ { "document_id": [ - "1509ef40-d1ba-440d-b298-16b7c173dcd4", - "816e364e-1193-4e5b-a91a-14e4b009157c" + "1509ef40-d1ba-440d-b298-16b7c173dcd4" ], "source_datarepo_row_ids": [ - "sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b", "sequencing:d4f6c0c4-1e11-438e-8218-cfea63b8b051" ], "activity_id": [ - "18b3be87-e26b-4376-0d8d-c1e370e90e07", - "a60c5138-3749-f7cb-8714-52d389ad5231" + "18b3be87-e26b-4376-0d8d-c1e370e90e07" ], "activity_table": [ "sequencingactivity" @@ -508,11 +505,15 @@ ], "datasets": [ { - "document_id": "2370f948-2783-4eb6-afea-e022897f4dcf", + "document_id": [ + "2370f948-2783-4eb6-afea-e022897f4dcf" + ], "source_datarepo_row_ids": [ "workspace_attributes:7a22b629-9d81-4e4d-9297-f9e44ed760bc" ], - "dataset_id": "52ee7665-7033-63f2-a8d9-ce8e32666739", + "dataset_id": [ + "52ee7665-7033-63f2-a8d9-ce8e32666739" + ], "consent_group": [ "DS-BDIS" ], @@ -528,7 +529,9 @@ "registered_identifier": [ "phs000693" ], - "title": "ANVIL_CMG_UWASH_DS_BDIS", + "title": [ + "ANVIL_CMG_UWASH_DS_BDIS" + ], "data_modality": [ "~null" ] @@ -580,7 +583,7 @@ "donors": [ { "document_id": [ - "6708dc44-5232-44ca-a050-5dee58637284" + "bfd991f2-2797-4083-972a-da7c6d7f1b2e" ], "source_datarepo_row_ids": [ "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" @@ -589,7 +592,7 @@ "1e2bd7e5-f45e-a391-daea-7c060be76acd" ], "organism_type": [ - "~null" + "redacted-ACw+6ecI" ], "phenotypic_sex": [ "redacted-JfQ0b3xG" @@ -604,124 +607,31 @@ ], "files": [ { - "document_id": [ - "677f207e-2d12-4eca-8f7a-039325af91ad" - ], - "source_datarepo_row_ids": [ - "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" - ], - "file_id": [ - "8b722e88-8103-49c1-b351-e64fa7c6ab37" - ], - "data_modality": [ - "~null" - ], - "file_format": [ - ".bam" - ], - "file_size": [ - 3306845592 - ], - "file_size_": [ - 3306845592 - ], - "file_md5sum": [ - "fNn9e1SovzgOROk3BvH6LQ==" - ], - "reference_assembly": [ - "~null" - ], - "file_name": [ - "307500.merged.matefixed.sorted.markeddups.recal.bam" - ], - "is_supplementary": [ - 0 - ], - "version": [ - "2022-06-01T00:00:00.000000Z" - ], - "uuid": [ - "677f207e-2d12-4eca-8f7a-039325af91ad" - ], - "size": [ - 3306845592 - ], - "size_": [ - 3306845592 - ], - "name": [ - "file_8b722e88-8103-49c1-b351-e64fa7c6ab37" - ], - "crc32": [ - "" - ], - "sha256": [ - "" - ], - "drs_path": [ - "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_8b722e88-8103-49c1-b351-e64fa7c6ab37" - ], - "count": 1 - }, - { - "document_id": [ - "6a85e0ab-2386-4f7e-8503-d72d90b4bc47" - ], + "document_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", "source_datarepo_row_ids": [ "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" ], - "file_id": [ - "1e269f04-4347-4188-b060-1dcc69e71d67" - ], + "file_id": "1e269f04-4347-4188-b060-1dcc69e71d67", "data_modality": [ "~null" ], - "file_format": [ - ".vcf.gz" - ], - "file_size": [ - 213021639 - ], - "file_size_": [ - 213021639 - ], - "file_md5sum": [ - "vuxgbuCqKZ/fkT9CWTFmIg==" - ], + "file_format": ".vcf.gz", + "file_size": 213021639, + "file_size_": 213021639, + "file_md5sum": "vuxgbuCqKZ/fkT9CWTFmIg==", "reference_assembly": [ "~null" ], - "file_name": [ - "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz" - ], - "is_supplementary": [ - 0 - ], - "version": [ - "2022-06-01T00:00:00.000000Z" - ], - "uuid": [ - "6a85e0ab-2386-4f7e-8503-d72d90b4bc47" - ], - "size": [ - 213021639 - ], - "size_": [ - 213021639 - ], - "name": [ - "file_1e269f04-4347-4188-b060-1dcc69e71d67" - ], - "crc32": [ - "" - ], - "sha256": [ - "" - ], - "drs_path": [ - "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_1e269f04-4347-4188-b060-1dcc69e71d67" - ], - "count": 1 + "file_name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", + "is_supplementary": 0, + "version": "2022-06-01T00:00:00.000000Z", + "uuid": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", + "size": 213021639, + "size_": 213021639, + "name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", + "crc32": "", + "sha256": "", + "drs_path": "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_1e269f04-4347-4188-b060-1dcc69e71d67" } ] }, @@ -741,12 +651,12 @@ } }, { - "_index": "azul_v2_nadove4_test_datasets", + "_index": "azul_v2_nadove4_test_files", "_type": "_doc", - "_id": "2370f948-2783-4eb6-afea-e022897f4dcf_826dea02-e274-affe-aabc-eb3db63ad068__exists", + "_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6_826dea02-e274-affe-aabc-eb3db63ad068__exists", "_score": 1.0, "_source": { - "entity_id": "2370f948-2783-4eb6-afea-e022897f4dcf", + "entity_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", "contents": { "activities": [ { @@ -767,25 +677,6 @@ "~null" ], "date_created": "9999-01-01T00:00:00.000000Z" - }, - { - "document_id": "816e364e-1193-4e5b-a91a-14e4b009157c", - "source_datarepo_row_ids": [ - "sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b" - ], - "activity_id": "a60c5138-3749-f7cb-8714-52d389ad5231", - "activity_table": "sequencingactivity", - "activity_type": "Sequencing", - "assay_type": [ - "~null" - ], - "data_modality": [ - "~null" - ], - "reference_assembly": [ - "~null" - ], - "date_created": "9999-01-01T00:00:00.000000Z" } ], "biosamples": [ @@ -892,12 +783,12 @@ ], "donors": [ { - "document_id": "6708dc44-5232-44ca-a050-5dee58637284", + "document_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e", "source_datarepo_row_ids": [ "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" ], "donor_id": "1e2bd7e5-f45e-a391-daea-7c060be76acd", - "organism_type": "~null", + "organism_type": "redacted-ACw+6ecI", "phenotypic_sex": "redacted-JfQ0b3xG", "reported_ethnicity": [ "redacted-NSkwDycK" @@ -909,34 +800,7 @@ ], "files": [ { - "document_id": "677f207e-2d12-4eca-8f7a-039325af91ad", - "source_datarepo_row_ids": [ - "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" - ], - "file_id": "8b722e88-8103-49c1-b351-e64fa7c6ab37", - "data_modality": [ - "~null" - ], - "file_format": ".bam", - "file_size": 3306845592, - "file_size_": 3306845592, - "file_md5sum": "fNn9e1SovzgOROk3BvH6LQ==", - "reference_assembly": [ - "~null" - ], - "file_name": "307500.merged.matefixed.sorted.markeddups.recal.bam", - "is_supplementary": 0, - "version": "2022-06-01T00:00:00.000000Z", - "uuid": "677f207e-2d12-4eca-8f7a-039325af91ad", - "size": 3306845592, - "size_": 3306845592, - "name": "file_8b722e88-8103-49c1-b351-e64fa7c6ab37", - "crc32": "", - "sha256": "", - "drs_path": "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_8b722e88-8103-49c1-b351-e64fa7c6ab37" - }, - { - "document_id": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", + "document_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", "source_datarepo_row_ids": [ "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" ], @@ -954,17 +818,17 @@ "file_name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", "is_supplementary": 0, "version": "2022-06-01T00:00:00.000000Z", - "uuid": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", + "uuid": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", "size": 213021639, "size_": 213021639, - "name": "file_1e269f04-4347-4188-b060-1dcc69e71d67", + "name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", "crc32": "", "sha256": "", - "drs_path": "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_1e269f04-4347-4188-b060-1dcc69e71d67" + "drs_path": "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_1e269f04-4347-4188-b060-1dcc69e71d67" } ] }, - "document_id": "2370f948-2783-4eb6-afea-e022897f4dcf_826dea02-e274-affe-aabc-eb3db63ad068__exists", + "document_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6_826dea02-e274-affe-aabc-eb3db63ad068__exists", "source": { "id": "cafebabe-feed-4bad-dead-beaf8badf00d", "spec": "tdr:test_project:snapshot/snapshot:/2" @@ -975,12 +839,12 @@ } }, { - "_index": "azul_v2_nadove4_test_donors_aggregate", + "_index": "azul_v2_nadove4_test_datasets_aggregate", "_type": "_doc", - "_id": "6708dc44-5232-44ca-a050-5dee58637284", + "_id": "2370f948-2783-4eb6-afea-e022897f4dcf", "_score": 1.0, "_source": { - "entity_id": "6708dc44-5232-44ca-a050-5dee58637284", + "entity_id": "2370f948-2783-4eb6-afea-e022897f4dcf", "contents": { "activities": [ { @@ -1052,15 +916,11 @@ ], "datasets": [ { - "document_id": [ - "2370f948-2783-4eb6-afea-e022897f4dcf" - ], + "document_id": "2370f948-2783-4eb6-afea-e022897f4dcf", "source_datarepo_row_ids": [ "workspace_attributes:7a22b629-9d81-4e4d-9297-f9e44ed760bc" ], - "dataset_id": [ - "52ee7665-7033-63f2-a8d9-ce8e32666739" - ], + "dataset_id": "52ee7665-7033-63f2-a8d9-ce8e32666739", "consent_group": [ "DS-BDIS" ], @@ -1076,9 +936,7 @@ "registered_identifier": [ "phs000693" ], - "title": [ - "ANVIL_CMG_UWASH_DS_BDIS" - ], + "title": "ANVIL_CMG_UWASH_DS_BDIS", "data_modality": [ "~null" ] @@ -1129,13 +987,21 @@ ], "donors": [ { - "document_id": "6708dc44-5232-44ca-a050-5dee58637284", + "document_id": [ + "bfd991f2-2797-4083-972a-da7c6d7f1b2e" + ], "source_datarepo_row_ids": [ "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" ], - "donor_id": "1e2bd7e5-f45e-a391-daea-7c060be76acd", - "organism_type": "~null", - "phenotypic_sex": "redacted-JfQ0b3xG", + "donor_id": [ + "1e2bd7e5-f45e-a391-daea-7c060be76acd" + ], + "organism_type": [ + "redacted-ACw+6ecI" + ], + "phenotypic_sex": [ + "redacted-JfQ0b3xG" + ], "reported_ethnicity": [ "redacted-NSkwDycK" ], @@ -1147,34 +1013,34 @@ "files": [ { "document_id": [ - "677f207e-2d12-4eca-8f7a-039325af91ad" + "15b76f9c-6b46-433f-851d-34e89f1b9ba6" ], "source_datarepo_row_ids": [ - "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" + "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" ], "file_id": [ - "8b722e88-8103-49c1-b351-e64fa7c6ab37" + "1e269f04-4347-4188-b060-1dcc69e71d67" ], "data_modality": [ "~null" ], "file_format": [ - ".bam" + ".vcf.gz" ], "file_size": [ - 3306845592 + 213021639 ], "file_size_": [ - 3306845592 + 213021639 ], "file_md5sum": [ - "fNn9e1SovzgOROk3BvH6LQ==" + "vuxgbuCqKZ/fkT9CWTFmIg==" ], "reference_assembly": [ "~null" ], "file_name": [ - "307500.merged.matefixed.sorted.markeddups.recal.bam" + "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz" ], "is_supplementary": [ 0 @@ -1183,16 +1049,16 @@ "2022-06-01T00:00:00.000000Z" ], "uuid": [ - "677f207e-2d12-4eca-8f7a-039325af91ad" + "15b76f9c-6b46-433f-851d-34e89f1b9ba6" ], "size": [ - 3306845592 + 213021639 ], "size_": [ - 3306845592 + 213021639 ], "name": [ - "file_8b722e88-8103-49c1-b351-e64fa7c6ab37" + "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz" ], "crc32": [ "" @@ -1201,40 +1067,40 @@ "" ], "drs_path": [ - "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_8b722e88-8103-49c1-b351-e64fa7c6ab37" + "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_1e269f04-4347-4188-b060-1dcc69e71d67" ], "count": 1 }, { "document_id": [ - "6a85e0ab-2386-4f7e-8503-d72d90b4bc47" + "3b17377b-16b1-431c-9967-e5d01fc5923f" ], "source_datarepo_row_ids": [ - "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" + "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" ], "file_id": [ - "1e269f04-4347-4188-b060-1dcc69e71d67" + "8b722e88-8103-49c1-b351-e64fa7c6ab37" ], "data_modality": [ "~null" ], "file_format": [ - ".vcf.gz" + ".bam" ], "file_size": [ - 213021639 + 3306845592 ], "file_size_": [ - 213021639 + 3306845592 ], "file_md5sum": [ - "vuxgbuCqKZ/fkT9CWTFmIg==" + "fNn9e1SovzgOROk3BvH6LQ==" ], "reference_assembly": [ "~null" ], "file_name": [ - "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz" + "307500.merged.matefixed.sorted.markeddups.recal.bam" ], "is_supplementary": [ 0 @@ -1243,16 +1109,16 @@ "2022-06-01T00:00:00.000000Z" ], "uuid": [ - "6a85e0ab-2386-4f7e-8503-d72d90b4bc47" + "3b17377b-16b1-431c-9967-e5d01fc5923f" ], "size": [ - 213021639 + 3306845592 ], "size_": [ - 213021639 + 3306845592 ], "name": [ - "file_1e269f04-4347-4188-b060-1dcc69e71d67" + "307500.merged.matefixed.sorted.markeddups.recal.bam" ], "crc32": [ "" @@ -1261,7 +1127,7 @@ "" ], "drs_path": [ - "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_1e269f04-4347-4188-b060-1dcc69e71d67" + "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_8b722e88-8103-49c1-b351-e64fa7c6ab37" ], "count": 1 } @@ -1283,12 +1149,12 @@ } }, { - "_index": "azul_v2_nadove4_test_donors", + "_index": "azul_v2_nadove4_test_datasets", "_type": "_doc", - "_id": "6708dc44-5232-44ca-a050-5dee58637284_826dea02-e274-affe-aabc-eb3db63ad068__exists", + "_id": "2370f948-2783-4eb6-afea-e022897f4dcf_826dea02-e274-affe-aabc-eb3db63ad068__exists", "_score": 1.0, "_source": { - "entity_id": "6708dc44-5232-44ca-a050-5dee58637284", + "entity_id": "2370f948-2783-4eb6-afea-e022897f4dcf", "contents": { "activities": [ { @@ -1434,12 +1300,12 @@ ], "donors": [ { - "document_id": "6708dc44-5232-44ca-a050-5dee58637284", + "document_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e", "source_datarepo_row_ids": [ "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" ], "donor_id": "1e2bd7e5-f45e-a391-daea-7c060be76acd", - "organism_type": "~null", + "organism_type": "redacted-ACw+6ecI", "phenotypic_sex": "redacted-JfQ0b3xG", "reported_ethnicity": [ "redacted-NSkwDycK" @@ -1451,62 +1317,62 @@ ], "files": [ { - "document_id": "677f207e-2d12-4eca-8f7a-039325af91ad", + "document_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", "source_datarepo_row_ids": [ - "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" + "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" ], - "file_id": "8b722e88-8103-49c1-b351-e64fa7c6ab37", + "file_id": "1e269f04-4347-4188-b060-1dcc69e71d67", "data_modality": [ "~null" ], - "file_format": ".bam", - "file_size": 3306845592, - "file_size_": 3306845592, - "file_md5sum": "fNn9e1SovzgOROk3BvH6LQ==", + "file_format": ".vcf.gz", + "file_size": 213021639, + "file_size_": 213021639, + "file_md5sum": "vuxgbuCqKZ/fkT9CWTFmIg==", "reference_assembly": [ "~null" ], - "file_name": "307500.merged.matefixed.sorted.markeddups.recal.bam", + "file_name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", "is_supplementary": 0, "version": "2022-06-01T00:00:00.000000Z", - "uuid": "677f207e-2d12-4eca-8f7a-039325af91ad", - "size": 3306845592, - "size_": 3306845592, - "name": "file_8b722e88-8103-49c1-b351-e64fa7c6ab37", + "uuid": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", + "size": 213021639, + "size_": 213021639, + "name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", "crc32": "", "sha256": "", - "drs_path": "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_8b722e88-8103-49c1-b351-e64fa7c6ab37" + "drs_path": "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_1e269f04-4347-4188-b060-1dcc69e71d67" }, { - "document_id": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", + "document_id": "3b17377b-16b1-431c-9967-e5d01fc5923f", "source_datarepo_row_ids": [ - "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" + "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" ], - "file_id": "1e269f04-4347-4188-b060-1dcc69e71d67", + "file_id": "8b722e88-8103-49c1-b351-e64fa7c6ab37", "data_modality": [ "~null" ], - "file_format": ".vcf.gz", - "file_size": 213021639, - "file_size_": 213021639, - "file_md5sum": "vuxgbuCqKZ/fkT9CWTFmIg==", + "file_format": ".bam", + "file_size": 3306845592, + "file_size_": 3306845592, + "file_md5sum": "fNn9e1SovzgOROk3BvH6LQ==", "reference_assembly": [ "~null" ], - "file_name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", + "file_name": "307500.merged.matefixed.sorted.markeddups.recal.bam", "is_supplementary": 0, "version": "2022-06-01T00:00:00.000000Z", - "uuid": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", - "size": 213021639, - "size_": 213021639, - "name": "file_1e269f04-4347-4188-b060-1dcc69e71d67", + "uuid": "3b17377b-16b1-431c-9967-e5d01fc5923f", + "size": 3306845592, + "size_": 3306845592, + "name": "307500.merged.matefixed.sorted.markeddups.recal.bam", "crc32": "", "sha256": "", - "drs_path": "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_1e269f04-4347-4188-b060-1dcc69e71d67" + "drs_path": "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_8b722e88-8103-49c1-b351-e64fa7c6ab37" } ] }, - "document_id": "6708dc44-5232-44ca-a050-5dee58637284_826dea02-e274-affe-aabc-eb3db63ad068__exists", + "document_id": "2370f948-2783-4eb6-afea-e022897f4dcf_826dea02-e274-affe-aabc-eb3db63ad068__exists", "source": { "id": "cafebabe-feed-4bad-dead-beaf8badf00d", "spec": "tdr:test_project:snapshot/snapshot:/2" @@ -1519,10 +1385,10 @@ { "_index": "azul_v2_nadove4_test_files_aggregate", "_type": "_doc", - "_id": "677f207e-2d12-4eca-8f7a-039325af91ad", + "_id": "3b17377b-16b1-431c-9967-e5d01fc5923f", "_score": 1.0, "_source": { - "entity_id": "677f207e-2d12-4eca-8f7a-039325af91ad", + "entity_id": "3b17377b-16b1-431c-9967-e5d01fc5923f", "contents": { "activities": [ { @@ -1669,7 +1535,7 @@ "donors": [ { "document_id": [ - "6708dc44-5232-44ca-a050-5dee58637284" + "bfd991f2-2797-4083-972a-da7c6d7f1b2e" ], "source_datarepo_row_ids": [ "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" @@ -1678,7 +1544,7 @@ "1e2bd7e5-f45e-a391-daea-7c060be76acd" ], "organism_type": [ - "~null" + "redacted-ACw+6ecI" ], "phenotypic_sex": [ "redacted-JfQ0b3xG" @@ -1693,7 +1559,7 @@ ], "files": [ { - "document_id": "677f207e-2d12-4eca-8f7a-039325af91ad", + "document_id": "3b17377b-16b1-431c-9967-e5d01fc5923f", "source_datarepo_row_ids": [ "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" ], @@ -1711,13 +1577,13 @@ "file_name": "307500.merged.matefixed.sorted.markeddups.recal.bam", "is_supplementary": 0, "version": "2022-06-01T00:00:00.000000Z", - "uuid": "677f207e-2d12-4eca-8f7a-039325af91ad", + "uuid": "3b17377b-16b1-431c-9967-e5d01fc5923f", "size": 3306845592, "size_": 3306845592, - "name": "file_8b722e88-8103-49c1-b351-e64fa7c6ab37", + "name": "307500.merged.matefixed.sorted.markeddups.recal.bam", "crc32": "", "sha256": "", - "drs_path": "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_8b722e88-8103-49c1-b351-e64fa7c6ab37" + "drs_path": "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_8b722e88-8103-49c1-b351-e64fa7c6ab37" } ] }, @@ -1739,10 +1605,10 @@ { "_index": "azul_v2_nadove4_test_files", "_type": "_doc", - "_id": "677f207e-2d12-4eca-8f7a-039325af91ad_826dea02-e274-affe-aabc-eb3db63ad068__exists", + "_id": "3b17377b-16b1-431c-9967-e5d01fc5923f_826dea02-e274-affe-aabc-eb3db63ad068__exists", "_score": 1.0, "_source": { - "entity_id": "677f207e-2d12-4eca-8f7a-039325af91ad", + "entity_id": "3b17377b-16b1-431c-9967-e5d01fc5923f", "contents": { "activities": [ { @@ -1869,12 +1735,12 @@ ], "donors": [ { - "document_id": "6708dc44-5232-44ca-a050-5dee58637284", + "document_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e", "source_datarepo_row_ids": [ "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" ], "donor_id": "1e2bd7e5-f45e-a391-daea-7c060be76acd", - "organism_type": "~null", + "organism_type": "redacted-ACw+6ecI", "phenotypic_sex": "redacted-JfQ0b3xG", "reported_ethnicity": [ "redacted-NSkwDycK" @@ -1886,7 +1752,7 @@ ], "files": [ { - "document_id": "677f207e-2d12-4eca-8f7a-039325af91ad", + "document_id": "3b17377b-16b1-431c-9967-e5d01fc5923f", "source_datarepo_row_ids": [ "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" ], @@ -1904,17 +1770,17 @@ "file_name": "307500.merged.matefixed.sorted.markeddups.recal.bam", "is_supplementary": 0, "version": "2022-06-01T00:00:00.000000Z", - "uuid": "677f207e-2d12-4eca-8f7a-039325af91ad", + "uuid": "3b17377b-16b1-431c-9967-e5d01fc5923f", "size": 3306845592, "size_": 3306845592, - "name": "file_8b722e88-8103-49c1-b351-e64fa7c6ab37", + "name": "307500.merged.matefixed.sorted.markeddups.recal.bam", "crc32": "", "sha256": "", - "drs_path": "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_8b722e88-8103-49c1-b351-e64fa7c6ab37" + "drs_path": "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_8b722e88-8103-49c1-b351-e64fa7c6ab37" } ] }, - "document_id": "677f207e-2d12-4eca-8f7a-039325af91ad_826dea02-e274-affe-aabc-eb3db63ad068__exists", + "document_id": "3b17377b-16b1-431c-9967-e5d01fc5923f_826dea02-e274-affe-aabc-eb3db63ad068__exists", "source": { "id": "cafebabe-feed-4bad-dead-beaf8badf00d", "spec": "tdr:test_project:snapshot/snapshot:/2" @@ -1925,30 +1791,22 @@ } }, { - "_index": "azul_v2_nadove4_test_files_aggregate", + "_index": "azul_v2_nadove4_test_activities_aggregate", "_type": "_doc", - "_id": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", + "_id": "816e364e-1193-4e5b-a91a-14e4b009157c", "_score": 1.0, "_source": { - "entity_id": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", + "entity_id": "816e364e-1193-4e5b-a91a-14e4b009157c", "contents": { "activities": [ { - "document_id": [ - "1509ef40-d1ba-440d-b298-16b7c173dcd4" - ], + "document_id": "816e364e-1193-4e5b-a91a-14e4b009157c", "source_datarepo_row_ids": [ - "sequencing:d4f6c0c4-1e11-438e-8218-cfea63b8b051" - ], - "activity_id": [ - "18b3be87-e26b-4376-0d8d-c1e370e90e07" - ], - "activity_table": [ - "sequencingactivity" - ], - "activity_type": [ - "Sequencing" + "sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b" ], + "activity_id": "a60c5138-3749-f7cb-8714-52d389ad5231", + "activity_table": "sequencingactivity", + "activity_type": "Sequencing", "assay_type": [ "~null" ], @@ -1958,9 +1816,7 @@ "reference_assembly": [ "~null" ], - "date_created": [ - "9999-01-01T00:00:00.000000Z" - ] + "date_created": "9999-01-01T00:00:00.000000Z" } ], "biosamples": [ @@ -2077,7 +1933,7 @@ "donors": [ { "document_id": [ - "6708dc44-5232-44ca-a050-5dee58637284" + "bfd991f2-2797-4083-972a-da7c6d7f1b2e" ], "source_datarepo_row_ids": [ "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" @@ -2086,7 +1942,7 @@ "1e2bd7e5-f45e-a391-daea-7c060be76acd" ], "organism_type": [ - "~null" + "redacted-ACw+6ecI" ], "phenotypic_sex": [ "redacted-JfQ0b3xG" @@ -2101,31 +1957,64 @@ ], "files": [ { - "document_id": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", + "document_id": [ + "3b17377b-16b1-431c-9967-e5d01fc5923f" + ], "source_datarepo_row_ids": [ - "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" + "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" + ], + "file_id": [ + "8b722e88-8103-49c1-b351-e64fa7c6ab37" ], - "file_id": "1e269f04-4347-4188-b060-1dcc69e71d67", "data_modality": [ "~null" ], - "file_format": ".vcf.gz", - "file_size": 213021639, - "file_size_": 213021639, - "file_md5sum": "vuxgbuCqKZ/fkT9CWTFmIg==", + "file_format": [ + ".bam" + ], + "file_size": [ + 3306845592 + ], + "file_size_": [ + 3306845592 + ], + "file_md5sum": [ + "fNn9e1SovzgOROk3BvH6LQ==" + ], "reference_assembly": [ "~null" ], - "file_name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", - "is_supplementary": 0, - "version": "2022-06-01T00:00:00.000000Z", - "uuid": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", - "size": 213021639, - "size_": 213021639, - "name": "file_1e269f04-4347-4188-b060-1dcc69e71d67", - "crc32": "", - "sha256": "", - "drs_path": "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_1e269f04-4347-4188-b060-1dcc69e71d67" + "file_name": [ + "307500.merged.matefixed.sorted.markeddups.recal.bam" + ], + "is_supplementary": [ + 0 + ], + "version": [ + "2022-06-01T00:00:00.000000Z" + ], + "uuid": [ + "3b17377b-16b1-431c-9967-e5d01fc5923f" + ], + "size": [ + 3306845592 + ], + "size_": [ + 3306845592 + ], + "name": [ + "307500.merged.matefixed.sorted.markeddups.recal.bam" + ], + "crc32": [ + "" + ], + "sha256": [ + "" + ], + "drs_path": [ + "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_8b722e88-8103-49c1-b351-e64fa7c6ab37" + ], + "count": 1 } ] }, @@ -2145,20 +2034,20 @@ } }, { - "_index": "azul_v2_nadove4_test_files", + "_index": "azul_v2_nadove4_test_activities", "_type": "_doc", - "_id": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47_826dea02-e274-affe-aabc-eb3db63ad068__exists", + "_id": "816e364e-1193-4e5b-a91a-14e4b009157c_826dea02-e274-affe-aabc-eb3db63ad068__exists", "_score": 1.0, "_source": { - "entity_id": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", + "entity_id": "816e364e-1193-4e5b-a91a-14e4b009157c", "contents": { "activities": [ { - "document_id": "1509ef40-d1ba-440d-b298-16b7c173dcd4", + "document_id": "816e364e-1193-4e5b-a91a-14e4b009157c", "source_datarepo_row_ids": [ - "sequencing:d4f6c0c4-1e11-438e-8218-cfea63b8b051" + "sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b" ], - "activity_id": "18b3be87-e26b-4376-0d8d-c1e370e90e07", + "activity_id": "a60c5138-3749-f7cb-8714-52d389ad5231", "activity_table": "sequencingactivity", "activity_type": "Sequencing", "assay_type": [ @@ -2277,12 +2166,12 @@ ], "donors": [ { - "document_id": "6708dc44-5232-44ca-a050-5dee58637284", + "document_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e", "source_datarepo_row_ids": [ "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" ], "donor_id": "1e2bd7e5-f45e-a391-daea-7c060be76acd", - "organism_type": "~null", + "organism_type": "redacted-ACw+6ecI", "phenotypic_sex": "redacted-JfQ0b3xG", "reported_ethnicity": [ "redacted-NSkwDycK" @@ -2294,35 +2183,35 @@ ], "files": [ { - "document_id": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", + "document_id": "3b17377b-16b1-431c-9967-e5d01fc5923f", "source_datarepo_row_ids": [ - "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" + "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" ], - "file_id": "1e269f04-4347-4188-b060-1dcc69e71d67", + "file_id": "8b722e88-8103-49c1-b351-e64fa7c6ab37", "data_modality": [ "~null" ], - "file_format": ".vcf.gz", - "file_size": 213021639, - "file_size_": 213021639, - "file_md5sum": "vuxgbuCqKZ/fkT9CWTFmIg==", + "file_format": ".bam", + "file_size": 3306845592, + "file_size_": 3306845592, + "file_md5sum": "fNn9e1SovzgOROk3BvH6LQ==", "reference_assembly": [ "~null" ], - "file_name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", + "file_name": "307500.merged.matefixed.sorted.markeddups.recal.bam", "is_supplementary": 0, "version": "2022-06-01T00:00:00.000000Z", - "uuid": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", - "size": 213021639, - "size_": 213021639, - "name": "file_1e269f04-4347-4188-b060-1dcc69e71d67", + "uuid": "3b17377b-16b1-431c-9967-e5d01fc5923f", + "size": 3306845592, + "size_": 3306845592, + "name": "307500.merged.matefixed.sorted.markeddups.recal.bam", "crc32": "", "sha256": "", - "drs_path": "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_1e269f04-4347-4188-b060-1dcc69e71d67" + "drs_path": "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_8b722e88-8103-49c1-b351-e64fa7c6ab37" } ] }, - "document_id": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47_826dea02-e274-affe-aabc-eb3db63ad068__exists", + "document_id": "816e364e-1193-4e5b-a91a-14e4b009157c_826dea02-e274-affe-aabc-eb3db63ad068__exists", "source": { "id": "cafebabe-feed-4bad-dead-beaf8badf00d", "spec": "tdr:test_project:snapshot/snapshot:/2" @@ -2333,22 +2222,33 @@ } }, { - "_index": "azul_v2_nadove4_test_activities_aggregate", + "_index": "azul_v2_nadove4_test_biosamples_aggregate", "_type": "_doc", - "_id": "816e364e-1193-4e5b-a91a-14e4b009157c", + "_id": "826dea02-e274-4ffe-aabc-eb3db63ad068", "_score": 1.0, "_source": { - "entity_id": "816e364e-1193-4e5b-a91a-14e4b009157c", + "entity_id": "826dea02-e274-4ffe-aabc-eb3db63ad068", "contents": { "activities": [ { - "document_id": "816e364e-1193-4e5b-a91a-14e4b009157c", + "document_id": [ + "1509ef40-d1ba-440d-b298-16b7c173dcd4", + "816e364e-1193-4e5b-a91a-14e4b009157c" + ], "source_datarepo_row_ids": [ - "sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b" + "sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b", + "sequencing:d4f6c0c4-1e11-438e-8218-cfea63b8b051" + ], + "activity_id": [ + "18b3be87-e26b-4376-0d8d-c1e370e90e07", + "a60c5138-3749-f7cb-8714-52d389ad5231" + ], + "activity_table": [ + "sequencingactivity" + ], + "activity_type": [ + "Sequencing" ], - "activity_id": "a60c5138-3749-f7cb-8714-52d389ad5231", - "activity_table": "sequencingactivity", - "activity_type": "Sequencing", "assay_type": [ "~null" ], @@ -2358,41 +2258,29 @@ "reference_assembly": [ "~null" ], - "date_created": "9999-01-01T00:00:00.000000Z" + "date_created": [ + "9999-01-01T00:00:00.000000Z" + ] } ], "biosamples": [ { - "document_id": [ - "826dea02-e274-4ffe-aabc-eb3db63ad068" - ], + "document_id": "826dea02-e274-4ffe-aabc-eb3db63ad068", "source_datarepo_row_ids": [ "sample:98048c3b-2525-4090-94fd-477de31f2608" ], - "biosample_id": [ - "f9d40cf6-37b8-22f3-ce35-0dc614d2452b" - ], - "anatomical_site": [ - "~null" - ], + "biosample_id": "f9d40cf6-37b8-22f3-ce35-0dc614d2452b", + "anatomical_site": "~null", "apriori_cell_type": [ "~null" ], - "biosample_type": [ - "~null" - ], - "disease": [ - "~null" - ], - "donor_age_at_collection_unit": [ - "~null" - ], - "donor_age_at_collection": [ - { - "gte": null, - "lte": null - } - ] + "biosample_type": "~null", + "disease": "~null", + "donor_age_at_collection_unit": "~null", + "donor_age_at_collection": { + "gte": null, + "lte": null + } } ], "datasets": [ @@ -2475,7 +2363,7 @@ "donors": [ { "document_id": [ - "6708dc44-5232-44ca-a050-5dee58637284" + "bfd991f2-2797-4083-972a-da7c6d7f1b2e" ], "source_datarepo_row_ids": [ "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" @@ -2484,7 +2372,7 @@ "1e2bd7e5-f45e-a391-daea-7c060be76acd" ], "organism_type": [ - "~null" + "redacted-ACw+6ecI" ], "phenotypic_sex": [ "redacted-JfQ0b3xG" @@ -2500,7 +2388,67 @@ "files": [ { "document_id": [ - "677f207e-2d12-4eca-8f7a-039325af91ad" + "15b76f9c-6b46-433f-851d-34e89f1b9ba6" + ], + "source_datarepo_row_ids": [ + "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" + ], + "file_id": [ + "1e269f04-4347-4188-b060-1dcc69e71d67" + ], + "data_modality": [ + "~null" + ], + "file_format": [ + ".vcf.gz" + ], + "file_size": [ + 213021639 + ], + "file_size_": [ + 213021639 + ], + "file_md5sum": [ + "vuxgbuCqKZ/fkT9CWTFmIg==" + ], + "reference_assembly": [ + "~null" + ], + "file_name": [ + "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz" + ], + "is_supplementary": [ + 0 + ], + "version": [ + "2022-06-01T00:00:00.000000Z" + ], + "uuid": [ + "15b76f9c-6b46-433f-851d-34e89f1b9ba6" + ], + "size": [ + 213021639 + ], + "size_": [ + 213021639 + ], + "name": [ + "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz" + ], + "crc32": [ + "" + ], + "sha256": [ + "" + ], + "drs_path": [ + "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_1e269f04-4347-4188-b060-1dcc69e71d67" + ], + "count": 1 + }, + { + "document_id": [ + "3b17377b-16b1-431c-9967-e5d01fc5923f" ], "source_datarepo_row_ids": [ "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" @@ -2536,7 +2484,7 @@ "2022-06-01T00:00:00.000000Z" ], "uuid": [ - "677f207e-2d12-4eca-8f7a-039325af91ad" + "3b17377b-16b1-431c-9967-e5d01fc5923f" ], "size": [ 3306845592 @@ -2545,7 +2493,7 @@ 3306845592 ], "name": [ - "file_8b722e88-8103-49c1-b351-e64fa7c6ab37" + "307500.merged.matefixed.sorted.markeddups.recal.bam" ], "crc32": [ "" @@ -2554,7 +2502,7 @@ "" ], "drs_path": [ - "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_8b722e88-8103-49c1-b351-e64fa7c6ab37" + "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_8b722e88-8103-49c1-b351-e64fa7c6ab37" ], "count": 1 } @@ -2576,14 +2524,33 @@ } }, { - "_index": "azul_v2_nadove4_test_activities", + "_index": "azul_v2_nadove4_test_biosamples", "_type": "_doc", - "_id": "816e364e-1193-4e5b-a91a-14e4b009157c_826dea02-e274-affe-aabc-eb3db63ad068__exists", + "_id": "826dea02-e274-4ffe-aabc-eb3db63ad068_826dea02-e274-affe-aabc-eb3db63ad068__exists", "_score": 1.0, "_source": { - "entity_id": "816e364e-1193-4e5b-a91a-14e4b009157c", + "entity_id": "826dea02-e274-4ffe-aabc-eb3db63ad068", "contents": { "activities": [ + { + "document_id": "1509ef40-d1ba-440d-b298-16b7c173dcd4", + "source_datarepo_row_ids": [ + "sequencing:d4f6c0c4-1e11-438e-8218-cfea63b8b051" + ], + "activity_id": "18b3be87-e26b-4376-0d8d-c1e370e90e07", + "activity_table": "sequencingactivity", + "activity_type": "Sequencing", + "assay_type": [ + "~null" + ], + "data_modality": [ + "~null" + ], + "reference_assembly": [ + "~null" + ], + "date_created": "9999-01-01T00:00:00.000000Z" + }, { "document_id": "816e364e-1193-4e5b-a91a-14e4b009157c", "source_datarepo_row_ids": [ @@ -2708,12 +2675,12 @@ ], "donors": [ { - "document_id": "6708dc44-5232-44ca-a050-5dee58637284", + "document_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e", "source_datarepo_row_ids": [ "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" ], "donor_id": "1e2bd7e5-f45e-a391-daea-7c060be76acd", - "organism_type": "~null", + "organism_type": "redacted-ACw+6ecI", "phenotypic_sex": "redacted-JfQ0b3xG", "reported_ethnicity": [ "redacted-NSkwDycK" @@ -2725,7 +2692,34 @@ ], "files": [ { - "document_id": "677f207e-2d12-4eca-8f7a-039325af91ad", + "document_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", + "source_datarepo_row_ids": [ + "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" + ], + "file_id": "1e269f04-4347-4188-b060-1dcc69e71d67", + "data_modality": [ + "~null" + ], + "file_format": ".vcf.gz", + "file_size": 213021639, + "file_size_": 213021639, + "file_md5sum": "vuxgbuCqKZ/fkT9CWTFmIg==", + "reference_assembly": [ + "~null" + ], + "file_name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", + "is_supplementary": 0, + "version": "2022-06-01T00:00:00.000000Z", + "uuid": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", + "size": 213021639, + "size_": 213021639, + "name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", + "crc32": "", + "sha256": "", + "drs_path": "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_1e269f04-4347-4188-b060-1dcc69e71d67" + }, + { + "document_id": "3b17377b-16b1-431c-9967-e5d01fc5923f", "source_datarepo_row_ids": [ "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" ], @@ -2743,17 +2737,17 @@ "file_name": "307500.merged.matefixed.sorted.markeddups.recal.bam", "is_supplementary": 0, "version": "2022-06-01T00:00:00.000000Z", - "uuid": "677f207e-2d12-4eca-8f7a-039325af91ad", + "uuid": "3b17377b-16b1-431c-9967-e5d01fc5923f", "size": 3306845592, "size_": 3306845592, - "name": "file_8b722e88-8103-49c1-b351-e64fa7c6ab37", + "name": "307500.merged.matefixed.sorted.markeddups.recal.bam", "crc32": "", "sha256": "", - "drs_path": "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_8b722e88-8103-49c1-b351-e64fa7c6ab37" + "drs_path": "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_8b722e88-8103-49c1-b351-e64fa7c6ab37" } ] }, - "document_id": "816e364e-1193-4e5b-a91a-14e4b009157c_826dea02-e274-affe-aabc-eb3db63ad068__exists", + "document_id": "826dea02-e274-4ffe-aabc-eb3db63ad068_826dea02-e274-affe-aabc-eb3db63ad068__exists", "source": { "id": "cafebabe-feed-4bad-dead-beaf8badf00d", "spec": "tdr:test_project:snapshot/snapshot:/2" @@ -2764,12 +2758,12 @@ } }, { - "_index": "azul_v2_nadove4_test_biosamples_aggregate", + "_index": "azul_v2_nadove4_test_donors_aggregate", "_type": "_doc", - "_id": "826dea02-e274-4ffe-aabc-eb3db63ad068", + "_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e", "_score": 1.0, "_source": { - "entity_id": "826dea02-e274-4ffe-aabc-eb3db63ad068", + "entity_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e", "contents": { "activities": [ { @@ -2807,22 +2801,36 @@ ], "biosamples": [ { - "document_id": "826dea02-e274-4ffe-aabc-eb3db63ad068", + "document_id": [ + "826dea02-e274-4ffe-aabc-eb3db63ad068" + ], "source_datarepo_row_ids": [ "sample:98048c3b-2525-4090-94fd-477de31f2608" ], - "biosample_id": "f9d40cf6-37b8-22f3-ce35-0dc614d2452b", - "anatomical_site": "~null", + "biosample_id": [ + "f9d40cf6-37b8-22f3-ce35-0dc614d2452b" + ], + "anatomical_site": [ + "~null" + ], "apriori_cell_type": [ "~null" ], - "biosample_type": "~null", - "disease": "~null", - "donor_age_at_collection_unit": "~null", - "donor_age_at_collection": { - "gte": null, - "lte": null - } + "biosample_type": [ + "~null" + ], + "disease": [ + "~null" + ], + "donor_age_at_collection_unit": [ + "~null" + ], + "donor_age_at_collection": [ + { + "gte": null, + "lte": null + } + ] } ], "datasets": [ @@ -2904,21 +2912,13 @@ ], "donors": [ { - "document_id": [ - "6708dc44-5232-44ca-a050-5dee58637284" - ], + "document_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e", "source_datarepo_row_ids": [ "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" ], - "donor_id": [ - "1e2bd7e5-f45e-a391-daea-7c060be76acd" - ], - "organism_type": [ - "~null" - ], - "phenotypic_sex": [ - "redacted-JfQ0b3xG" - ], + "donor_id": "1e2bd7e5-f45e-a391-daea-7c060be76acd", + "organism_type": "redacted-ACw+6ecI", + "phenotypic_sex": "redacted-JfQ0b3xG", "reported_ethnicity": [ "redacted-NSkwDycK" ], @@ -2930,34 +2930,34 @@ "files": [ { "document_id": [ - "677f207e-2d12-4eca-8f7a-039325af91ad" + "15b76f9c-6b46-433f-851d-34e89f1b9ba6" ], "source_datarepo_row_ids": [ - "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" + "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" ], "file_id": [ - "8b722e88-8103-49c1-b351-e64fa7c6ab37" + "1e269f04-4347-4188-b060-1dcc69e71d67" ], "data_modality": [ "~null" ], "file_format": [ - ".bam" + ".vcf.gz" ], "file_size": [ - 3306845592 + 213021639 ], "file_size_": [ - 3306845592 + 213021639 ], "file_md5sum": [ - "fNn9e1SovzgOROk3BvH6LQ==" + "vuxgbuCqKZ/fkT9CWTFmIg==" ], "reference_assembly": [ "~null" ], "file_name": [ - "307500.merged.matefixed.sorted.markeddups.recal.bam" + "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz" ], "is_supplementary": [ 0 @@ -2966,16 +2966,16 @@ "2022-06-01T00:00:00.000000Z" ], "uuid": [ - "677f207e-2d12-4eca-8f7a-039325af91ad" + "15b76f9c-6b46-433f-851d-34e89f1b9ba6" ], "size": [ - 3306845592 + 213021639 ], "size_": [ - 3306845592 + 213021639 ], "name": [ - "file_8b722e88-8103-49c1-b351-e64fa7c6ab37" + "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz" ], "crc32": [ "" @@ -2984,40 +2984,40 @@ "" ], "drs_path": [ - "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_8b722e88-8103-49c1-b351-e64fa7c6ab37" + "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_1e269f04-4347-4188-b060-1dcc69e71d67" ], "count": 1 }, { "document_id": [ - "6a85e0ab-2386-4f7e-8503-d72d90b4bc47" + "3b17377b-16b1-431c-9967-e5d01fc5923f" ], "source_datarepo_row_ids": [ - "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" + "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" ], "file_id": [ - "1e269f04-4347-4188-b060-1dcc69e71d67" + "8b722e88-8103-49c1-b351-e64fa7c6ab37" ], "data_modality": [ "~null" ], "file_format": [ - ".vcf.gz" + ".bam" ], "file_size": [ - 213021639 + 3306845592 ], "file_size_": [ - 213021639 + 3306845592 ], "file_md5sum": [ - "vuxgbuCqKZ/fkT9CWTFmIg==" + "fNn9e1SovzgOROk3BvH6LQ==" ], "reference_assembly": [ "~null" ], "file_name": [ - "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz" + "307500.merged.matefixed.sorted.markeddups.recal.bam" ], "is_supplementary": [ 0 @@ -3026,16 +3026,16 @@ "2022-06-01T00:00:00.000000Z" ], "uuid": [ - "6a85e0ab-2386-4f7e-8503-d72d90b4bc47" + "3b17377b-16b1-431c-9967-e5d01fc5923f" ], "size": [ - 213021639 + 3306845592 ], "size_": [ - 213021639 + 3306845592 ], "name": [ - "file_1e269f04-4347-4188-b060-1dcc69e71d67" + "307500.merged.matefixed.sorted.markeddups.recal.bam" ], "crc32": [ "" @@ -3044,7 +3044,7 @@ "" ], "drs_path": [ - "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_1e269f04-4347-4188-b060-1dcc69e71d67" + "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_8b722e88-8103-49c1-b351-e64fa7c6ab37" ], "count": 1 } @@ -3066,12 +3066,12 @@ } }, { - "_index": "azul_v2_nadove4_test_biosamples", + "_index": "azul_v2_nadove4_test_donors", "_type": "_doc", - "_id": "826dea02-e274-4ffe-aabc-eb3db63ad068_826dea02-e274-affe-aabc-eb3db63ad068__exists", + "_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e_826dea02-e274-affe-aabc-eb3db63ad068__exists", "_score": 1.0, "_source": { - "entity_id": "826dea02-e274-4ffe-aabc-eb3db63ad068", + "entity_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e", "contents": { "activities": [ { @@ -3217,12 +3217,12 @@ ], "donors": [ { - "document_id": "6708dc44-5232-44ca-a050-5dee58637284", + "document_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e", "source_datarepo_row_ids": [ "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" ], "donor_id": "1e2bd7e5-f45e-a391-daea-7c060be76acd", - "organism_type": "~null", + "organism_type": "redacted-ACw+6ecI", "phenotypic_sex": "redacted-JfQ0b3xG", "reported_ethnicity": [ "redacted-NSkwDycK" @@ -3234,62 +3234,62 @@ ], "files": [ { - "document_id": "677f207e-2d12-4eca-8f7a-039325af91ad", + "document_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", "source_datarepo_row_ids": [ - "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" + "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" ], - "file_id": "8b722e88-8103-49c1-b351-e64fa7c6ab37", + "file_id": "1e269f04-4347-4188-b060-1dcc69e71d67", "data_modality": [ "~null" ], - "file_format": ".bam", - "file_size": 3306845592, - "file_size_": 3306845592, - "file_md5sum": "fNn9e1SovzgOROk3BvH6LQ==", + "file_format": ".vcf.gz", + "file_size": 213021639, + "file_size_": 213021639, + "file_md5sum": "vuxgbuCqKZ/fkT9CWTFmIg==", "reference_assembly": [ "~null" ], - "file_name": "307500.merged.matefixed.sorted.markeddups.recal.bam", + "file_name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", "is_supplementary": 0, "version": "2022-06-01T00:00:00.000000Z", - "uuid": "677f207e-2d12-4eca-8f7a-039325af91ad", - "size": 3306845592, - "size_": 3306845592, - "name": "file_8b722e88-8103-49c1-b351-e64fa7c6ab37", + "uuid": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", + "size": 213021639, + "size_": 213021639, + "name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", "crc32": "", "sha256": "", - "drs_path": "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_8b722e88-8103-49c1-b351-e64fa7c6ab37" + "drs_path": "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_1e269f04-4347-4188-b060-1dcc69e71d67" }, { - "document_id": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", + "document_id": "3b17377b-16b1-431c-9967-e5d01fc5923f", "source_datarepo_row_ids": [ - "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" + "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" ], - "file_id": "1e269f04-4347-4188-b060-1dcc69e71d67", + "file_id": "8b722e88-8103-49c1-b351-e64fa7c6ab37", "data_modality": [ "~null" ], - "file_format": ".vcf.gz", - "file_size": 213021639, - "file_size_": 213021639, - "file_md5sum": "vuxgbuCqKZ/fkT9CWTFmIg==", + "file_format": ".bam", + "file_size": 3306845592, + "file_size_": 3306845592, + "file_md5sum": "fNn9e1SovzgOROk3BvH6LQ==", "reference_assembly": [ "~null" ], - "file_name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", + "file_name": "307500.merged.matefixed.sorted.markeddups.recal.bam", "is_supplementary": 0, "version": "2022-06-01T00:00:00.000000Z", - "uuid": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", - "size": 213021639, - "size_": 213021639, - "name": "file_1e269f04-4347-4188-b060-1dcc69e71d67", + "uuid": "3b17377b-16b1-431c-9967-e5d01fc5923f", + "size": 3306845592, + "size_": 3306845592, + "name": "307500.merged.matefixed.sorted.markeddups.recal.bam", "crc32": "", "sha256": "", - "drs_path": "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_1e269f04-4347-4188-b060-1dcc69e71d67" + "drs_path": "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_8b722e88-8103-49c1-b351-e64fa7c6ab37" } ] }, - "document_id": "826dea02-e274-4ffe-aabc-eb3db63ad068_826dea02-e274-affe-aabc-eb3db63ad068__exists", + "document_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e_826dea02-e274-affe-aabc-eb3db63ad068__exists", "source": { "id": "cafebabe-feed-4bad-dead-beaf8badf00d", "spec": "tdr:test_project:snapshot/snapshot:/2" diff --git a/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.tdr.anvil.json b/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.tdr.anvil.json new file mode 100644 index 000000000..a943847f1 --- /dev/null +++ b/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.tdr.anvil.json @@ -0,0 +1,214 @@ +{ + "entities": { + "biosample/826dea02-e274-4ffe-aabc-eb3db63ad068": { + "anatomical_site": null, + "apriori_cell_type": [], + "biosample_id": "f9d40cf6-37b8-22f3-ce35-0dc614d2452b", + "biosample_type": null, + "datarepo_row_id": "826dea02-e274-4ffe-aabc-eb3db63ad068", + "disease": null, + "donor_age_at_collection_lower_bound": null, + "donor_age_at_collection_unit": null, + "donor_age_at_collection_upper_bound": null, + "source_datarepo_row_ids": [ + "sample:98048c3b-2525-4090-94fd-477de31f2608" + ], + "version": "redacted-/JIktcFZ" + }, + "dataset/2370f948-2783-4eb6-afea-e022897f4dcf": { + "consent_group": [ + "DS-BDIS" + ], + "data_modality": [], + "data_use_permission": [ + "DS-BDIS" + ], + "datarepo_row_id": "2370f948-2783-4eb6-afea-e022897f4dcf", + "dataset_id": "52ee7665-7033-63f2-a8d9-ce8e32666739", + "owner": [ + "Debbie Nickerson" + ], + "principal_investigator": [], + "registered_identifier": [ + "phs000693" + ], + "source_datarepo_row_ids": [ + "workspace_attributes:7a22b629-9d81-4e4d-9297-f9e44ed760bc" + ], + "title": "ANVIL_CMG_UWASH_DS_BDIS", + "version": "2022-06-01T00:00:00.000000Z" + }, + "diagnosis/15d85d30-ad4a-4f50-87a8-a27f59dd1b5f": { + "datarepo_row_id": "15d85d30-ad4a-4f50-87a8-a27f59dd1b5f", + "diagnosis_age_lower_bound": null, + "diagnosis_age_unit": null, + "diagnosis_age_upper_bound": null, + "diagnosis_id": "5ebe9bc4-a1be-0ddf-7277-b1e88276d0f6", + "disease": [ + "redacted-A61iJlLx" + ], + "onset_age_lower_bound": null, + "onset_age_unit": null, + "onset_age_upper_bound": null, + "phenopacket": [], + "phenotype": [ + "redacted-acSYHZUr" + ], + "source_datarepo_row_ids": [ + "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" + ], + "version": "redacted-/JIktcFZ" + }, + "diagnosis/939a4bd3-86ed-4a8a-81f4-fbe0ee673461": { + "datarepo_row_id": "939a4bd3-86ed-4a8a-81f4-fbe0ee673461", + "diagnosis_age_lower_bound": null, + "diagnosis_age_unit": null, + "diagnosis_age_upper_bound": null, + "diagnosis_id": "25ff8d32-18c9-fc3e-020a-5de20d35d906", + "disease": [ + "redacted-g50ublm/" + ], + "onset_age_lower_bound": null, + "onset_age_unit": null, + "onset_age_upper_bound": null, + "phenopacket": [], + "phenotype": [ + "redacted-acSYHZUr" + ], + "source_datarepo_row_ids": [ + "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" + ], + "version": "redacted-/JIktcFZ" + }, + "donor/bfd991f2-2797-4083-972a-da7c6d7f1b2e": { + "datarepo_row_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e", + "donor_id": "1e2bd7e5-f45e-a391-daea-7c060be76acd", + "genetic_ancestry": [], + "organism_type": "redacted-ACw+6ecI", + "phenotypic_sex": "redacted-JfQ0b3xG", + "reported_ethnicity": [ + "redacted-NSkwDycK" + ], + "source_datarepo_row_ids": [ + "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" + ], + "version": "redacted-/JIktcFZ" + }, + "file/15b76f9c-6b46-433f-851d-34e89f1b9ba6": { + "data_modality": [], + "datarepo_row_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", + "file_format": ".vcf.gz", + "file_id": "1e269f04-4347-4188-b060-1dcc69e71d67", + "file_md5sum": "vuxgbuCqKZ/fkT9CWTFmIg==", + "file_name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", + "file_ref": "drs://data.terra.bio/v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_1e269f04-4347-4188-b060-1dcc69e71d67", + "file_size": 213021639, + "is_supplementary": false, + "reference_assembly": [], + "source_datarepo_row_ids": [ + "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" + ], + "version": "2022-06-01T00:00:00.000000Z", + "drs_path": "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_1e269f04-4347-4188-b060-1dcc69e71d67", + "sha256": "", + "crc32": "" + }, + "file/3b17377b-16b1-431c-9967-e5d01fc5923f": { + "data_modality": [], + "datarepo_row_id": "3b17377b-16b1-431c-9967-e5d01fc5923f", + "file_format": ".bam", + "file_id": "8b722e88-8103-49c1-b351-e64fa7c6ab37", + "file_md5sum": "fNn9e1SovzgOROk3BvH6LQ==", + "file_name": "307500.merged.matefixed.sorted.markeddups.recal.bam", + "file_ref": "drs://data.terra.bio/v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_8b722e88-8103-49c1-b351-e64fa7c6ab37", + "file_size": 3306845592, + "is_supplementary": false, + "reference_assembly": [], + "source_datarepo_row_ids": [ + "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" + ], + "version": "2022-06-01T00:00:00.000000Z", + "drs_path": "v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_8b722e88-8103-49c1-b351-e64fa7c6ab37", + "sha256": "", + "crc32": "" + }, + "sequencingactivity/1509ef40-d1ba-440d-b298-16b7c173dcd4": { + "activity_type": "Sequencing", + "assay_type": [], + "data_modality": [], + "datarepo_row_id": "1509ef40-d1ba-440d-b298-16b7c173dcd4", + "sequencingactivity_id": "18b3be87-e26b-4376-0d8d-c1e370e90e07", + "source_datarepo_row_ids": [ + "sequencing:d4f6c0c4-1e11-438e-8218-cfea63b8b051" + ], + "version": "2022-06-01T00:00:00.000000Z" + }, + "sequencingactivity/816e364e-1193-4e5b-a91a-14e4b009157c": { + "activity_type": "Sequencing", + "assay_type": [], + "data_modality": [], + "datarepo_row_id": "816e364e-1193-4e5b-a91a-14e4b009157c", + "sequencingactivity_id": "a60c5138-3749-f7cb-8714-52d389ad5231", + "source_datarepo_row_ids": [ + "sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b" + ], + "version": "2022-06-01T00:00:00.000000Z" + } + }, + "links": [ + { + "inputs": [ + "biosample/826dea02-e274-4ffe-aabc-eb3db63ad068" + ], + "activity": "sequencingactivity/816e364e-1193-4e5b-a91a-14e4b009157c", + "outputs": [ + "file/3b17377b-16b1-431c-9967-e5d01fc5923f" + ] + }, + { + "inputs": [ + "biosample/826dea02-e274-4ffe-aabc-eb3db63ad068" + ], + "activity": "sequencingactivity/1509ef40-d1ba-440d-b298-16b7c173dcd4", + "outputs": [ + "file/15b76f9c-6b46-433f-851d-34e89f1b9ba6" + ] + }, + { + "inputs": [ + "dataset/2370f948-2783-4eb6-afea-e022897f4dcf" + ], + "activity": null, + "outputs": [ + "biosample/826dea02-e274-4ffe-aabc-eb3db63ad068" + ] + }, + { + "inputs": [ + "diagnosis/15d85d30-ad4a-4f50-87a8-a27f59dd1b5f" + ], + "activity": null, + "outputs": [ + "donor/bfd991f2-2797-4083-972a-da7c6d7f1b2e" + ] + }, + { + "inputs": [ + "diagnosis/939a4bd3-86ed-4a8a-81f4-fbe0ee673461" + ], + "activity": null, + "outputs": [ + "donor/bfd991f2-2797-4083-972a-da7c6d7f1b2e" + ] + }, + { + "inputs": [ + "donor/bfd991f2-2797-4083-972a-da7c6d7f1b2e" + ], + "activity": null, + "outputs": [ + "biosample/826dea02-e274-4ffe-aabc-eb3db63ad068" + ] + } + ] +} \ No newline at end of file diff --git a/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.tdr.json b/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.tdr.json deleted file mode 100644 index 732ebe2be..000000000 --- a/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.tdr.json +++ /dev/null @@ -1,281 +0,0 @@ -{ - "manifest": [ - { - "uuid": "826dea02-e274-4ffe-aabc-eb3db63ad068", - "version": "2022-06-01T00:00:00.000000Z", - "name": "biosample_f9d40cf6-37b8-22f3-ce35-0dc614d2452b", - "indexed": true, - "crc32": "", - "sha256": "" - }, - { - "uuid": "2370f948-2783-4eb6-afea-e022897f4dcf", - "version": "2022-06-01T00:00:00.000000Z", - "name": "dataset_52ee7665-7033-63f2-a8d9-ce8e32666739", - "indexed": true, - "crc32": "", - "sha256": "" - }, - { - "uuid": "939a4bd3-86ed-4a8a-81f4-fbe0ee673461", - "version": "2022-06-01T00:00:00.000000Z", - "name": "diagnosis_25ff8d32-18c9-fc3e-020a-5de20d35d906", - "indexed": true, - "crc32": "", - "sha256": "" - }, - { - "uuid": "15d85d30-ad4a-4f50-87a8-a27f59dd1b5f", - "version": "2022-06-01T00:00:00.000000Z", - "name": "diagnosis_5ebe9bc4-a1be-0ddf-7277-b1e88276d0f6", - "indexed": true, - "crc32": "", - "sha256": "" - }, - { - "uuid": "6708dc44-5232-44ca-a050-5dee58637284", - "version": "2022-06-01T00:00:00.000000Z", - "name": "donor_1e2bd7e5-f45e-a391-daea-7c060be76acd", - "indexed": true, - "crc32": "", - "sha256": "" - }, - { - "uuid": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", - "version": "2022-06-01T00:00:00.000000Z", - "name": "file_1e269f04-4347-4188-b060-1dcc69e71d67", - "indexed": true, - "crc32": "", - "sha256": "", - "drs_path": "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_1e269f04-4347-4188-b060-1dcc69e71d67" - }, - { - "uuid": "677f207e-2d12-4eca-8f7a-039325af91ad", - "version": "2022-06-01T00:00:00.000000Z", - "name": "file_8b722e88-8103-49c1-b351-e64fa7c6ab37", - "indexed": true, - "crc32": "", - "sha256": "", - "drs_path": "v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_8b722e88-8103-49c1-b351-e64fa7c6ab37" - }, - { - "uuid": "1509ef40-d1ba-440d-b298-16b7c173dcd4", - "version": "2022-06-01T00:00:00.000000Z", - "name": "sequencingactivity_18b3be87-e26b-4376-0d8d-c1e370e90e07", - "indexed": true, - "crc32": "", - "sha256": "" - }, - { - "uuid": "816e364e-1193-4e5b-a91a-14e4b009157c", - "version": "2022-06-01T00:00:00.000000Z", - "name": "sequencingactivity_a60c5138-3749-f7cb-8714-52d389ad5231", - "indexed": true, - "crc32": "", - "sha256": "" - }, - { - "uuid": "826dea02-e274-affe-aabc-eb3db63ad068", - "version": null, - "name": "links", - "indexed": true - } - ], - "metadata": { - "biosample_f9d40cf6-37b8-22f3-ce35-0dc614d2452b": { - "anatomical_site": null, - "apriori_cell_type": [], - "biosample_id": "f9d40cf6-37b8-22f3-ce35-0dc614d2452b", - "biosample_type": null, - "datarepo_row_id": "826dea02-e274-4ffe-aabc-eb3db63ad068", - "disease": null, - "donor_age_at_collection_lower_bound": null, - "donor_age_at_collection_unit": null, - "donor_age_at_collection_upper_bound": null, - "source_datarepo_row_ids": [ - "sample:98048c3b-2525-4090-94fd-477de31f2608" - ] - }, - "dataset_52ee7665-7033-63f2-a8d9-ce8e32666739": { - "consent_group": [ - "DS-BDIS" - ], - "data_modality": [], - "data_use_permission": [ - "DS-BDIS" - ], - "datarepo_row_id": "2370f948-2783-4eb6-afea-e022897f4dcf", - "dataset_id": "52ee7665-7033-63f2-a8d9-ce8e32666739", - "owner": [ - "Debbie Nickerson" - ], - "principal_investigator": [], - "registered_identifier": [ - "phs000693" - ], - "source_datarepo_row_ids": [ - "workspace_attributes:7a22b629-9d81-4e4d-9297-f9e44ed760bc" - ], - "title": "ANVIL_CMG_UWASH_DS_BDIS" - }, - "diagnosis_25ff8d32-18c9-fc3e-020a-5de20d35d906": { - "datarepo_row_id": "939a4bd3-86ed-4a8a-81f4-fbe0ee673461", - "diagnosis_age_lower_bound": null, - "diagnosis_age_unit": null, - "diagnosis_age_upper_bound": null, - "diagnosis_id": "25ff8d32-18c9-fc3e-020a-5de20d35d906", - "disease": [ - "redacted-g50ublm/" - ], - "onset_age_lower_bound": null, - "onset_age_unit": null, - "onset_age_upper_bound": null, - "phenopacket": [], - "phenotype": [ - "redacted-acSYHZUr" - ], - "source_datarepo_row_ids": [ - "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" - ] - }, - "diagnosis_5ebe9bc4-a1be-0ddf-7277-b1e88276d0f6": { - "datarepo_row_id": "15d85d30-ad4a-4f50-87a8-a27f59dd1b5f", - "diagnosis_age_lower_bound": null, - "diagnosis_age_unit": null, - "diagnosis_age_upper_bound": null, - "diagnosis_id": "5ebe9bc4-a1be-0ddf-7277-b1e88276d0f6", - "disease": [ - "redacted-A61iJlLx" - ], - "onset_age_lower_bound": null, - "onset_age_unit": null, - "onset_age_upper_bound": null, - "phenopacket": [], - "phenotype": [ - "redacted-acSYHZUr" - ], - "source_datarepo_row_ids": [ - "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" - ] - }, - "donor_1e2bd7e5-f45e-a391-daea-7c060be76acd": { - "datarepo_row_id": "6708dc44-5232-44ca-a050-5dee58637284", - "donor_id": "1e2bd7e5-f45e-a391-daea-7c060be76acd", - "genetic_ancestry": [], - "organism_type": null, - "phenotypic_sex": "redacted-JfQ0b3xG", - "reported_ethnicity": [ - "redacted-NSkwDycK" - ], - "source_datarepo_row_ids": [ - "subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef" - ] - }, - "file_1e269f04-4347-4188-b060-1dcc69e71d67": { - "data_modality": [], - "datarepo_row_id": "6a85e0ab-2386-4f7e-8503-d72d90b4bc47", - "file_format": ".vcf.gz", - "file_id": "1e269f04-4347-4188-b060-1dcc69e71d67", - "file_md5sum": "vuxgbuCqKZ/fkT9CWTFmIg==", - "file_name": "307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz", - "file_ref": "drs://data.terra.bio/v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_1e269f04-4347-4188-b060-1dcc69e71d67", - "file_size": 213021639, - "reference_assembly": [], - "is_supplementary": false, - "source_datarepo_row_ids": [ - "file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0" - ] - }, - "file_8b722e88-8103-49c1-b351-e64fa7c6ab37": { - "data_modality": [], - "datarepo_row_id": "677f207e-2d12-4eca-8f7a-039325af91ad", - "file_format": ".bam", - "file_id": "8b722e88-8103-49c1-b351-e64fa7c6ab37", - "file_md5sum": "fNn9e1SovzgOROk3BvH6LQ==", - "file_name": "307500.merged.matefixed.sorted.markeddups.recal.bam", - "file_ref": "drs://data.terra.bio/v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_8b722e88-8103-49c1-b351-e64fa7c6ab37", - "file_size": 3306845592, - "reference_assembly": [], - "is_supplementary": false, - "source_datarepo_row_ids": [ - "file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2" - ] - }, - "sequencingactivity_18b3be87-e26b-4376-0d8d-c1e370e90e07": { - "activity_type": "Sequencing", - "assay_type": [], - "data_modality": [], - "datarepo_row_id": "1509ef40-d1ba-440d-b298-16b7c173dcd4", - "sequencingactivity_id": "18b3be87-e26b-4376-0d8d-c1e370e90e07", - "source_datarepo_row_ids": [ - "sequencing:d4f6c0c4-1e11-438e-8218-cfea63b8b051" - ] - }, - "sequencingactivity_a60c5138-3749-f7cb-8714-52d389ad5231": { - "activity_type": "Sequencing", - "assay_type": [], - "data_modality": [], - "datarepo_row_id": "816e364e-1193-4e5b-a91a-14e4b009157c", - "sequencingactivity_id": "a60c5138-3749-f7cb-8714-52d389ad5231", - "source_datarepo_row_ids": [ - "sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b" - ] - }, - "links": [ - { - "inputs": [ - "dataset/2370f948-2783-4eb6-afea-e022897f4dcf" - ], - "activity": null, - "outputs": [ - "biosample/826dea02-e274-4ffe-aabc-eb3db63ad068" - ] - }, - { - "inputs": [ - "diagnosis/15d85d30-ad4a-4f50-87a8-a27f59dd1b5f" - ], - "activity": null, - "outputs": [ - "donor/6708dc44-5232-44ca-a050-5dee58637284" - ] - }, - { - "inputs": [ - "diagnosis/939a4bd3-86ed-4a8a-81f4-fbe0ee673461" - ], - "activity": null, - "outputs": [ - "donor/6708dc44-5232-44ca-a050-5dee58637284" - ] - }, - { - "inputs": [ - "donor/6708dc44-5232-44ca-a050-5dee58637284" - ], - "activity": null, - "outputs": [ - "biosample/826dea02-e274-4ffe-aabc-eb3db63ad068" - ] - }, - { - "inputs": [ - "biosample/826dea02-e274-4ffe-aabc-eb3db63ad068" - ], - "activity": "sequencingactivity/1509ef40-d1ba-440d-b298-16b7c173dcd4", - "outputs": [ - "file/6a85e0ab-2386-4f7e-8503-d72d90b4bc47" - ] - }, - { - "inputs": [ - "biosample/826dea02-e274-4ffe-aabc-eb3db63ad068" - ], - "activity": "sequencingactivity/816e364e-1193-4e5b-a91a-14e4b009157c", - "outputs": [ - "file/677f207e-2d12-4eca-8f7a-039325af91ad" - ] - } - ] - } -} diff --git a/test/indexer/test_anvil.py b/test/indexer/test_anvil.py index 6c77568c9..f4cae3ab4 100644 --- a/test/indexer/test_anvil.py +++ b/test/indexer/test_anvil.py @@ -17,6 +17,9 @@ from azul.logging import ( configure_test_logging, ) +from azul.plugins.repository.tdr_anvil import ( + TDRAnvilBundle, +) from indexer import ( IndexerTestCase, ) @@ -78,9 +81,11 @@ def test_indexing(self): @unittest.skip('TinyQuery does not support the WITH clause') def test_fetch_bundle(self): canned_bundle = self._load_canned_bundle(self.bundle) + assert isinstance(canned_bundle, TDRAnvilBundle) self._make_mock_tdr_tables(self.bundle) plugin = self.plugin_for_source_spec(canned_bundle.fqid.source.spec) bundle = plugin.fetch_bundle(self.bundle) + assert isinstance(bundle, TDRAnvilBundle) self.assertEqual(canned_bundle.fqid, bundle.fqid) - self.assertEqual(canned_bundle.manifest, bundle.manifest) - self.assertEqual(canned_bundle.metadata_files, bundle.metadata_files) + self.assertEqual(canned_bundle.entities, bundle.entities) + self.assertEqual(canned_bundle.links, bundle.links) diff --git a/test/integration_test.py b/test/integration_test.py index 9cd8effc0..45b413e6e 100644 --- a/test/integration_test.py +++ b/test/integration_test.py @@ -111,6 +111,9 @@ SourceRef, SourcedBundleFQID, ) +from azul.indexer.document import ( + EntityReference, +) from azul.indexer.index_service import ( IndexExistsAndDiffersException, IndexService, @@ -127,6 +130,9 @@ MetadataPlugin, RepositoryPlugin, ) +from azul.plugins.anvil import ( + Link, +) from azul.plugins.repository.tdr import ( TDRSourceRef, ) @@ -1474,23 +1480,39 @@ def _test_catalog(self, catalog: config.Catalog): with open(os.path.join(d, generated_file)) as f: bundle_json = json.load(f) - self.assertEqual({'manifest', 'metadata'}, bundle_json.keys()) - manifest = bundle_json['manifest'] - metadata = bundle_json['metadata'] - self.assertIsInstance(manifest, list) - self.assertIsInstance(metadata, dict) - - manifest_files = sorted(e['name'] for e in manifest if e['indexed']) - metadata_files = sorted(metadata.keys()) - - if catalog.plugins['repository'].name == 'canned': - # FIXME: Manifest entry not generated for links.json by - # StagingArea.get_bundle - # https://github.com/DataBiosphere/hca-metadata-api/issues/52 - assert 'links.json' not in manifest_files - metadata_files.remove('links.json') - - self.assertListEqual(manifest_files, metadata_files) + metadata_plugin_name = catalog.plugins['metadata'].name + if metadata_plugin_name == 'hca': + self.assertEqual({'manifest', 'metadata'}, bundle_json.keys()) + manifest = bundle_json['manifest'] + metadata = bundle_json['metadata'] + self.assertIsInstance(manifest, list) + self.assertIsInstance(metadata, dict) + + manifest_files = sorted(e['name'] for e in manifest if e['indexed']) + metadata_files = sorted(metadata.keys()) + + if catalog.plugins['repository'].name == 'canned': + # FIXME: Manifest entry not generated for links.json by + # StagingArea.get_bundle + # https://github.com/DataBiosphere/hca-metadata-api/issues/52 + assert 'links.json' not in manifest_files + metadata_files.remove('links.json') + + self.assertListEqual(manifest_files, metadata_files) + elif metadata_plugin_name == 'anvil': + self.assertEqual({'entities', 'links'}, bundle_json.keys()) + entities = bundle_json['entities'] + links = bundle_json['links'] + self.assertIsInstance(entities, dict) + self.assertIsInstance(links, list) + entities = set(map(EntityReference.parse, entities.keys())) + linked_entities = frozenset.union(*( + Link.from_json(link).all_entities + for link in links + )) + self.assertEqual(entities, linked_entities) + else: + assert False, metadata_plugin_name def test_can_bundle_configured_catalogs(self): for catalog_name, catalog in config.catalogs.items(): diff --git a/test/service/test_response_anvil.py b/test/service/test_response_anvil.py index 44fbedac3..bf0e74662 100644 --- a/test/service/test_response_anvil.py +++ b/test/service/test_response_anvil.py @@ -134,7 +134,7 @@ def test_entity_indices(self): 'donors': [ { 'organism_type': [ - None + 'redacted-ACw+6ecI' ], 'phenotypic_sex': [ 'redacted-JfQ0b3xG' @@ -155,12 +155,12 @@ def test_entity_indices(self): 'file_format': [ '.vcf.gz' ], - 'is_supplementary': [ - False - ], 'reference_assembly': [ None ], + 'is_supplementary': [ + False + ], 'count': 1 } ] @@ -256,7 +256,7 @@ def test_entity_indices(self): 'donors': [ { 'organism_type': [ - None + 'redacted-ACw+6ecI' ], 'phenotypic_sex': [ 'redacted-JfQ0b3xG' @@ -277,12 +277,12 @@ def test_entity_indices(self): 'file_format': [ '.bam' ], - 'is_supplementary': [ - False - ], 'reference_assembly': [ None ], + 'is_supplementary': [ + False + ], 'count': 1 } ] @@ -343,6 +343,16 @@ def test_entity_indices(self): 'total': 2, 'type': 'terms' }, + 'files.is_supplementary': { + 'terms': [ + { + 'term': 'false', + 'count': 2 + } + ], + 'total': 2, + 'type': 'terms' + }, 'files.data_modality': { 'terms': [ { @@ -406,7 +416,7 @@ def test_entity_indices(self): 'donors.organism_type': { 'terms': [ { - 'term': None, + 'term': 'redacted-ACw+6ecI', 'count': 2 } ], @@ -487,16 +497,6 @@ def test_entity_indices(self): 'total': 2, 'type': 'terms' }, - 'files.is_supplementary': { - 'terms': [ - { - 'count': 2, - 'term': 'false' - } - ], - 'total': 2, - 'type': 'terms' - }, 'files.reference_assembly': { 'terms': [ { @@ -592,7 +592,7 @@ def test_entity_indices(self): 'donors': [ { 'organism_type': [ - None + 'redacted-ACw+6ecI' ], 'phenotypic_sex': [ 'redacted-JfQ0b3xG' @@ -611,14 +611,14 @@ def test_entity_indices(self): None ], 'file_format': [ - '.bam' - ], - 'is_supplementary': [ - False + '.vcf.gz' ], 'reference_assembly': [ None ], + 'is_supplementary': [ + False + ], 'count': 1 }, { @@ -626,14 +626,14 @@ def test_entity_indices(self): None ], 'file_format': [ - '.vcf.gz' - ], - 'is_supplementary': [ - False + '.bam' ], 'reference_assembly': [ None ], + 'is_supplementary': [ + False + ], 'count': 1 } ] @@ -694,6 +694,16 @@ def test_entity_indices(self): 'total': 1, 'type': 'terms' }, + 'files.is_supplementary': { + 'terms': [ + { + 'term': 'false', + 'count': 1 + } + ], + 'total': 1, + 'type': 'terms' + }, 'files.data_modality': { 'terms': [ { @@ -757,7 +767,7 @@ def test_entity_indices(self): 'donors.organism_type': { 'terms': [ { - 'term': None, + 'term': 'redacted-ACw+6ecI', 'count': 1 } ], @@ -838,16 +848,6 @@ def test_entity_indices(self): 'total': 1, 'type': 'terms' }, - 'files.is_supplementary': { - 'terms': [ - { - 'count': 1, - 'term': 'false' - } - ], - 'total': 1, - 'type': 'terms' - }, 'files.reference_assembly': { 'terms': [ { @@ -963,7 +963,7 @@ def test_entity_indices(self): 'donors': [ { 'organism_type': [ - None + 'redacted-ACw+6ecI' ], 'phenotypic_sex': [ 'redacted-JfQ0b3xG' @@ -982,14 +982,14 @@ def test_entity_indices(self): None ], 'file_format': [ - '.bam' - ], - 'is_supplementary': [ - False + '.vcf.gz' ], 'reference_assembly': [ None ], + 'is_supplementary': [ + False + ], 'count': 1 }, { @@ -997,14 +997,14 @@ def test_entity_indices(self): None ], 'file_format': [ - '.vcf.gz' - ], - 'is_supplementary': [ - False + '.bam' ], 'reference_assembly': [ None ], + 'is_supplementary': [ + False + ], 'count': 1 } ] @@ -1065,6 +1065,16 @@ def test_entity_indices(self): 'total': 1, 'type': 'terms' }, + 'files.is_supplementary': { + 'terms': [ + { + 'term': 'false', + 'count': 1 + } + ], + 'total': 1, + 'type': 'terms' + }, 'files.data_modality': { 'terms': [ { @@ -1128,7 +1138,7 @@ def test_entity_indices(self): 'donors.organism_type': { 'terms': [ { - 'term': None, + 'term': 'redacted-ACw+6ecI', 'count': 1 } ], @@ -1209,16 +1219,6 @@ def test_entity_indices(self): 'total': 1, 'type': 'terms' }, - 'files.is_supplementary': { - 'terms': [ - { - 'count': 1, - 'term': 'false' - } - ], - 'total': 1, - 'type': 'terms' - }, 'files.reference_assembly': { 'terms': [ { @@ -1234,7 +1234,7 @@ def test_entity_indices(self): 'donors': { 'hits': [ { - 'entryId': '6708dc44-5232-44ca-a050-5dee58637284', + 'entryId': 'bfd991f2-2797-4083-972a-da7c6d7f1b2e', 'sources': [ { 'sourceSpec': 'tdr:test_project:snapshot/snapshot:/2', @@ -1326,12 +1326,12 @@ def test_entity_indices(self): ], 'donors': [ { - 'document_id': '6708dc44-5232-44ca-a050-5dee58637284', + 'document_id': 'bfd991f2-2797-4083-972a-da7c6d7f1b2e', 'source_datarepo_row_ids': [ 'subject:c23887a0-20c1-44e4-a09e-1c5dfdc2d0ef' ], 'donor_id': '1e2bd7e5-f45e-a391-daea-7c060be76acd', - 'organism_type': None, + 'organism_type': 'redacted-ACw+6ecI', 'phenotypic_sex': 'redacted-JfQ0b3xG', 'reported_ethnicity': [ 'redacted-NSkwDycK' @@ -1348,14 +1348,14 @@ def test_entity_indices(self): None ], 'file_format': [ - '.bam' - ], - 'is_supplementary': [ - False + '.vcf.gz' ], 'reference_assembly': [ None ], + 'is_supplementary': [ + False + ], 'count': 1 }, { @@ -1363,14 +1363,14 @@ def test_entity_indices(self): None ], 'file_format': [ - '.vcf.gz' - ], - 'is_supplementary': [ - False + '.bam' ], 'reference_assembly': [ None ], + 'is_supplementary': [ + False + ], 'count': 1 } ] @@ -1431,6 +1431,16 @@ def test_entity_indices(self): 'total': 1, 'type': 'terms' }, + 'files.is_supplementary': { + 'terms': [ + { + 'term': 'false', + 'count': 1 + } + ], + 'total': 1, + 'type': 'terms' + }, 'files.data_modality': { 'terms': [ { @@ -1494,7 +1504,7 @@ def test_entity_indices(self): 'donors.organism_type': { 'terms': [ { - 'term': None, + 'term': 'redacted-ACw+6ecI', 'count': 1 } ], @@ -1575,16 +1585,6 @@ def test_entity_indices(self): 'total': 1, 'type': 'terms' }, - 'files.is_supplementary': { - 'terms': [ - { - 'count': 1, - 'term': 'false' - } - ], - 'total': 1, - 'type': 'terms' - }, 'files.reference_assembly': { 'terms': [ { @@ -1600,7 +1600,7 @@ def test_entity_indices(self): 'files': { 'hits': [ { - 'entryId': '6a85e0ab-2386-4f7e-8503-d72d90b4bc47', + 'entryId': '15b76f9c-6b46-433f-851d-34e89f1b9ba6', 'sources': [ { 'sourceSpec': 'tdr:test_project:snapshot/snapshot:/2', @@ -1681,7 +1681,7 @@ def test_entity_indices(self): 'donors': [ { 'organism_type': [ - None + 'redacted-ACw+6ecI' ], 'phenotypic_sex': [ 'redacted-JfQ0b3xG' @@ -1696,7 +1696,7 @@ def test_entity_indices(self): ], 'files': [ { - 'document_id': '6a85e0ab-2386-4f7e-8503-d72d90b4bc47', + 'document_id': '15b76f9c-6b46-433f-851d-34e89f1b9ba6', 'source_datarepo_row_ids': [ 'file_inventory:81d16471-97ac-48fe-99a0-73d9ec62c2c0' ], @@ -1711,26 +1711,26 @@ def test_entity_indices(self): None ], 'file_name': '307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz', + 'is_supplementary': False, 'version': '2022-06-01T00:00:00.000000Z', - 'uuid': '6a85e0ab-2386-4f7e-8503-d72d90b4bc47', + 'uuid': '15b76f9c-6b46-433f-851d-34e89f1b9ba6', 'size': 213021639, - 'is_supplementary': False, - 'name': 'file_1e269f04-4347-4188-b060-1dcc69e71d67', + 'name': '307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz', 'crc32': '', 'sha256': '', 'accessible': True, 'drs_uri': str(self.drs_uri.add( - path='v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_1e269f04-4347-4188-b060-1dcc69e71d67' + path='v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_1e269f04-4347-4188-b060-1dcc69e71d67' )), 'url': str(self.base_url.set( - path='/repository/files/6a85e0ab-2386-4f7e-8503-d72d90b4bc47', - args=dict(catalog=self.catalog, version='2022-06-01T00:00:00.000000Z') + path='/repository/files/15b76f9c-6b46-433f-851d-34e89f1b9ba6', + args=dict(catalog='test', version='2022-06-01T00:00:00.000000Z') )) } ] }, { - 'entryId': '677f207e-2d12-4eca-8f7a-039325af91ad', + 'entryId': '3b17377b-16b1-431c-9967-e5d01fc5923f', 'sources': [ { 'sourceSpec': 'tdr:test_project:snapshot/snapshot:/2', @@ -1811,7 +1811,7 @@ def test_entity_indices(self): 'donors': [ { 'organism_type': [ - None + 'redacted-ACw+6ecI' ], 'phenotypic_sex': [ 'redacted-JfQ0b3xG' @@ -1826,7 +1826,7 @@ def test_entity_indices(self): ], 'files': [ { - 'document_id': '677f207e-2d12-4eca-8f7a-039325af91ad', + 'document_id': '3b17377b-16b1-431c-9967-e5d01fc5923f', 'source_datarepo_row_ids': [ 'file_inventory:9658d94a-511d-4b49-82c3-d0cb07e0cff2' ], @@ -1841,20 +1841,20 @@ def test_entity_indices(self): None ], 'file_name': '307500.merged.matefixed.sorted.markeddups.recal.bam', + 'is_supplementary': False, 'version': '2022-06-01T00:00:00.000000Z', - 'uuid': '677f207e-2d12-4eca-8f7a-039325af91ad', + 'uuid': '3b17377b-16b1-431c-9967-e5d01fc5923f', 'size': 3306845592, - 'is_supplementary': False, - 'name': 'file_8b722e88-8103-49c1-b351-e64fa7c6ab37', + 'name': '307500.merged.matefixed.sorted.markeddups.recal.bam', 'crc32': '', 'sha256': '', 'accessible': True, 'drs_uri': str(self.drs_uri.add( - path='v1_5b77f311-6e67-4a1b-b32a-baa9f5742442_8b722e88-8103-49c1-b351-e64fa7c6ab37' + path='v1_2ae00e5c-4aef-4a1e-9eca-d8d0747b5348_8b722e88-8103-49c1-b351-e64fa7c6ab37' )), 'url': str(self.base_url.set( - path='/repository/files/677f207e-2d12-4eca-8f7a-039325af91ad', - args=dict(catalog=self.catalog, version='2022-06-01T00:00:00.000000Z') + path='/repository/files/3b17377b-16b1-431c-9967-e5d01fc5923f', + args=dict(catalog='test', version='2022-06-01T00:00:00.000000Z') )) } ] @@ -1915,6 +1915,16 @@ def test_entity_indices(self): 'total': 2, 'type': 'terms' }, + 'files.is_supplementary': { + 'terms': [ + { + 'term': 'false', + 'count': 2 + } + ], + 'total': 2, + 'type': 'terms' + }, 'files.data_modality': { 'terms': [ { @@ -1978,7 +1988,7 @@ def test_entity_indices(self): 'donors.organism_type': { 'terms': [ { - 'term': None, + 'term': 'redacted-ACw+6ecI', 'count': 2 } ], @@ -2059,16 +2069,6 @@ def test_entity_indices(self): 'total': 2, 'type': 'terms' }, - 'files.is_supplementary': { - 'terms': [ - { - 'count': 2, - 'term': 'false' - } - ], - 'total': 2, - 'type': 'terms' - }, 'files.reference_assembly': { 'terms': [ { @@ -2118,7 +2118,7 @@ def test_summary(self): 'donorSpecies': [ { 'count': 1, - 'species': None + 'species': 'redacted-ACw+6ecI' } ], 'fileCount': 2,