Skip to content

Commit

Permalink
Consolidate AnVIL and HCA boilerplate (partial #5358)
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Sep 19, 2023
1 parent 0d02b20 commit c24989b
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 35 deletions.
23 changes: 23 additions & 0 deletions src/azul/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,31 @@
Optional,
)

import attr

from azul.indexer import (
Bundle,
BundlePartition,
)
from azul.indexer.aggregate import (
EntityAggregator,
)
from azul.indexer.document import (
Contribution,
ContributionCoordinates,
EntityReference,
EntityType,
FieldTypes,
)
from azul.types import (
MutableJSON,
)


@attr.s(frozen=True, kw_only=True, auto_attribs=True)
class Transformer(metaclass=ABCMeta):
bundle: Bundle
deleted: bool

@classmethod
@abstractmethod
Expand Down Expand Up @@ -75,3 +86,15 @@ def get_aggregator(cls, entity_type: EntityType) -> Optional[EntityAggregator]:
entities of types other than X.
"""
raise NotImplementedError

def _contribution(self,
contents: MutableJSON,
entity: EntityReference
) -> Contribution:
coordinates = ContributionCoordinates(entity=entity,
bundle=self.bundle.fqid.upcast(),
deleted=self.deleted)
return Contribution(coordinates=coordinates,
version=None,
source=self.bundle.fqid.source,
contents=contents)
22 changes: 7 additions & 15 deletions src/azul/plugins/metadata/anvil/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
)
from azul.indexer.document import (
Contribution,
ContributionCoordinates,
EntityReference,
EntityType,
FieldTypes,
Expand Down Expand Up @@ -120,7 +119,6 @@ def _search(cls,
@attr.s(frozen=True, kw_only=True, auto_attribs=True)
class BaseTransformer(Transformer, metaclass=ABCMeta):
bundle: AnvilBundle
deleted: bool

@classmethod
def field_types(cls) -> FieldTypes:
Expand Down Expand Up @@ -307,8 +305,8 @@ def get_bound(field_name: str) -> Optional[float]:
}

def _contribution(self,
entity: EntityReference,
contents: MutableJSON,
entity: EntityReference,
) -> Contribution:
# The entity type is used to determine the index name.
# All activities go into the same index, regardless of their polymorphic type.
Expand All @@ -317,13 +315,7 @@ def _contribution(self,
if entity.entity_type.endswith('activity') else
entity.entity_type)
entity = attr.evolve(entity, entity_type=entity_type)
coordinates = ContributionCoordinates(entity=entity,
bundle=self.bundle.fqid.upcast(),
deleted=self.deleted)
return Contribution(coordinates=coordinates,
version=None,
source=self.bundle.fqid.source,
contents=contents)
return super()._contribution(contents, entity)

def _entity(self,
entity: EntityReference,
Expand Down Expand Up @@ -422,7 +414,7 @@ def _transform(self, entity: EntityReference) -> Contribution:
donors=self._entities(self._donor, linked['donor']),
files=self._entities(self._file, linked['file']),
)
return self._contribution(entity, contents)
return self._contribution(contents, entity)


class BiosampleTransformer(BaseTransformer):
Expand All @@ -444,7 +436,7 @@ def _transform(self, entity: EntityReference) -> Contribution:
donors=self._entities(self._donor, linked['donor']),
files=self._entities(self._file, linked['file']),
)
return self._contribution(entity, contents)
return self._contribution(contents, entity)


class DatasetTransformer(BaseTransformer):
Expand All @@ -465,7 +457,7 @@ def _transform(self, entity: EntityReference) -> Contribution:
donors=self._entities(self._donor, self._entities_by_type['donor']),
files=self._entities(self._file, self._entities_by_type['file']),
)
return self._contribution(entity, contents)
return self._contribution(contents, entity)


class DonorTransformer(BaseTransformer):
Expand All @@ -487,7 +479,7 @@ def _transform(self, entity: EntityReference) -> Contribution:
donors=[self._donor(entity)],
files=self._entities(self._file, linked['file']),
)
return self._contribution(entity, contents)
return self._contribution(contents, entity)


class FileTransformer(BaseTransformer):
Expand All @@ -509,4 +501,4 @@ def _transform(self, entity: EntityReference) -> Contribution:
donors=self._entities(self._donor, linked['donor']),
files=[self._file(entity)],
)
return self._contribution(entity, contents)
return self._contribution(contents, entity)
26 changes: 6 additions & 20 deletions src/azul/plugins/metadata/hca/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@
from azul.indexer.document import (
ClosedRange,
Contribution,
ContributionCoordinates,
EntityReference,
EntityType,
FieldType,
Expand Down Expand Up @@ -455,7 +454,6 @@ class DatedEntity(Entity, Protocol):
class BaseTransformer(Transformer, metaclass=ABCMeta):
bundle: HCABundle
api_bundle: api.Bundle
deleted: bool

# This stub is only needed to aid PyCharm's type inference. Without this,
# a constructor invocation that doesn't refer to the class explicitly, but
Expand Down Expand Up @@ -1206,19 +1204,8 @@ def _build_strata_string(self, file):
point_strings.append(dimension + '=' + ','.join(sorted(values)))
return ';'.join(point_strings)

def _contribution(self,
contents: MutableJSON,
entity_id: api.UUID4
) -> Contribution:
entity = EntityReference(entity_type=self.entity_type(),
entity_id=str(entity_id))
coordinates = ContributionCoordinates(entity=entity,
bundle=self.bundle.fqid.upcast(),
deleted=self.deleted)
return Contribution(coordinates=coordinates,
version=None,
source=self.bundle.fqid.source,
contents=contents)
def _entity_ref(self, entity_id: api.UUID4) -> EntityReference:
return EntityReference(entity_id=str(entity_id), entity_type=self.entity_type())

@classmethod
def field_types(cls) -> FieldTypes:
Expand Down Expand Up @@ -1436,7 +1423,7 @@ def _transform(self, files: Iterable[api.File]) -> Iterable[Contribution]:
additional_contents = self.matrix_stratification_values(file)
for entity_type, values in additional_contents.items():
contents[entity_type].extend(values)
yield self._contribution(contents, file.document_id)
yield self._contribution(contents, self._entity_ref(file.document_id))

def matrix_stratification_values(self, file: api.File) -> JSON:
"""
Expand Down Expand Up @@ -1531,7 +1518,7 @@ def _transform(self,
),
dates=[self._date(cell_suspension)],
projects=[self._project(self._api_project)])
yield self._contribution(contents, cell_suspension.document_id)
yield self._contribution(contents, self._entity_ref(cell_suspension.document_id))


class SampleTransformer(PartitionedTransformer):
Expand Down Expand Up @@ -1578,7 +1565,7 @@ def _transform(self, samples: Iterable[Sample]) -> Iterable[Contribution]:
),
dates=[self._date(sample)],
projects=[self._project(self._api_project)])
yield self._contribution(contents, sample.document_id)
yield self._contribution(contents, self._entity_ref(sample.document_id))


class BundleAsEntity(DatedEntity):
Expand Down Expand Up @@ -1678,8 +1665,7 @@ def _transform(self) -> Contribution:
contributed_analyses=contributed_analyses,
dates=[self._date(self._singleton_entity())],
projects=[self._project(self._api_project)])

return self._contribution(contents, self._singleton_id)
return self._contribution(contents, self._entity_ref(self._singleton_id))


class ProjectTransformer(SingletonTransformer):
Expand Down

0 comments on commit c24989b

Please sign in to comment.