Skip to content

Commit

Permalink
Push down HCA specifics to a Bundle subclass (#4940)
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Apr 15, 2023
1 parent debe0a8 commit c8962cd
Show file tree
Hide file tree
Showing 12 changed files with 279 additions and 275 deletions.
2 changes: 1 addition & 1 deletion src/azul/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,7 @@ class Catalog:
Config.Catalog(name='dcp',
atlas='',
internal=False,
plugins={'metadata': {'name': 'hca'},
plugins={'metadata': {'name': 'hca'},
'repository': {'name': 'tdr_hca'}},
sources='')
Expand Down
22 changes: 4 additions & 18 deletions src/azul/indexer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from azul.types import (
JSON,
MutableJSON,
MutableJSONs,
SupportsLessThan,
get_generic_type_params,
)
Expand Down Expand Up @@ -396,23 +395,6 @@ def to_json(self) -> SourcedBundleFQIDJSON:
@attr.s(auto_attribs=True, kw_only=True)
class Bundle(ABC, Generic[BUNDLE_FQID]):
fqid: BUNDLE_FQID
manifest: MutableJSONs
"""
Each item of the `manifest` attribute's value has this shape:
{
'content-type': 'application/json; dcp-type="metadata/biomaterial"',
'crc32c': 'fd239631',
'indexed': True,
'name': 'cell_suspension_0.json',
's3_etag': 'aa31c093cc816edb1f3a42e577872ec6',
'sha1': 'f413a9a7923dee616309e4f40752859195798a5d',
'sha256': 'ea4c9ed9e53a3aa2ca4b7dffcacb6bbe9108a460e8e15d2b3d5e8e5261fb043e',
'size': 1366,
'uuid': '0136ebb4-1317-42a0-8826-502fae25c29f',
'version': '2019-05-16T162155.020000Z'
}
"""
metadata_files: MutableJSON

@property
def uuid(self) -> BundleUUID:
Expand All @@ -432,6 +414,10 @@ def drs_path(self, manifest_entry: JSON) -> Optional[str]:
"""
raise NotImplementedError

@abstractmethod
def to_json(self) -> MutableJSON:
raise NotImplementedError


class BundlePartition(UUIDPartition['BundlePartition']):
"""
Expand Down
2 changes: 1 addition & 1 deletion src/azul/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def load(cls: Type[T], catalog: CatalogName) -> Type[T]:

@classmethod
def types(cls) -> Sequence[Type['Plugin']]:
return cls.__subclasses__()
return RepositoryPlugin, MetadataPlugin

@classmethod
def type_for_name(cls, plugin_type_name: str) -> Type[T]:
Expand Down
86 changes: 85 additions & 1 deletion src/azul/plugins/anvil.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,97 @@
from abc import (
ABC,
)
from typing import (
AbstractSet,
Generic,
Iterable,
Optional,
TypeVar,
Union,
)

import attr
from more_itertools import (
one,
)

from azul.indexer import (
Bundle,
)
from azul.indexer.document import (
EntityReference,
EntityType,
)
from azul.plugins import (
Plugin,
)
from azul.types import (
MutableJSON,
)

# AnVIL snapshots do not use UUIDs for primary/foreign keys.
# This type alias helps us distinguish these keys from the document UUIDs,
# which are drawn from the `datarepo_row_id` column.
# Note that entities from different tables may have the same key, so
# `KeyReference` should be used when mixing keys from different entity types.
Key = str


@attr.s(frozen=True, auto_attribs=True, kw_only=True, slots=True)
class KeyReference:
key: Key
entity_type: EntityType


ENTITY_REF = TypeVar(name='ENTITY_REF', bound=Union[EntityReference, KeyReference])


@attr.s(auto_attribs=True, frozen=True, kw_only=True, order=False)
class Link(Generic[ENTITY_REF]):
inputs: AbstractSet[ENTITY_REF] = attr.ib(factory=frozenset)
activity: Optional[ENTITY_REF] = attr.ib(default=None)
outputs: AbstractSet[ENTITY_REF] = attr.ib(factory=frozenset)

@property
def all_entities(self) -> AbstractSet[ENTITY_REF]:
return self.inputs | self.outputs | (set() if self.activity is None else {self.activity})

def to_json(self) -> MutableJSON:
return {
'inputs': sorted(map(str, self.inputs)),
'activity': str(self.activity),
'outputs': sorted(map(str, self.outputs))
}

@classmethod
def merge(cls, links: Iterable['Link']) -> 'Link':
return cls(inputs=frozenset.union(*[link.inputs for link in links]),
activity=one({link.activity for link in links}),
outputs=frozenset.union(*[link.outputs for link in links]))

def __lt__(self, other: 'Link') -> bool:
if self.activity is None or other.activity is None:
return False
else:
return self.activity < other.activity


@attr.s(auto_attribs=True, kw_only=True)
class AnvilBundle(Bundle, ABC):
entities: dict[EntityReference, MutableJSON] = attr.ib(factory=dict)
links: set[Link[EntityReference]] = attr.ib(factory=set)

def to_json(self) -> MutableJSON:
return {
'entities': {
str(entity_ref): entity
for entity_ref, entity in sorted(self.entities.items())
},
'links': [link.to_json() for link in sorted(self.links)]
}


class AnvilPlugin(Plugin, ABC):
class AnvilPlugin(Plugin[AnvilBundle], ABC):

@classmethod
def atlas(cls) -> str:
Expand Down
39 changes: 38 additions & 1 deletion src/azul/plugins/hca.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,49 @@
ABC,
)

import attr

from azul.indexer import (
BUNDLE_FQID,
Bundle,
)
from azul.plugins import (
Plugin,
)
from azul.types import (
MutableJSON,
MutableJSONs,
)


@attr.s(auto_attribs=True, kw_only=True)
class HCABundle(Bundle[BUNDLE_FQID], ABC):
manifest: MutableJSONs
"""
Each item of the `manifest` attribute's value has this shape:
{
'content-type': 'application/json; dcp-type="metadata/biomaterial"',
'crc32c': 'fd239631',
'indexed': True,
'name': 'cell_suspension_0.json',
's3_etag': 'aa31c093cc816edb1f3a42e577872ec6',
'sha1': 'f413a9a7923dee616309e4f40752859195798a5d',
'sha256': 'ea4c9ed9e53a3aa2ca4b7dffcacb6bbe9108a460e8e15d2b3d5e8e5261fb043e',
'size': 1366,
'uuid': '0136ebb4-1317-42a0-8826-502fae25c29f',
'version': '2019-05-16T162155.020000Z'
}
"""
metadata_files: MutableJSON

def to_json(self) -> MutableJSON:
return {
'manifest': self.manifest,
'metadata': self.metadata_files
}


class HCAPlugin(Plugin, ABC):
class HCAPlugin(Plugin[HCABundle], ABC):

@classmethod
def atlas(cls) -> str:
Expand Down
3 changes: 2 additions & 1 deletion src/azul/plugins/metadata/anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
Sorting,
)
from azul.plugins.anvil import (
AnvilBundle,
AnvilPlugin,
)
from azul.plugins.metadata.anvil.indexer.transform import (
Expand Down Expand Up @@ -45,7 +46,7 @@
)


class Plugin(MetadataPlugin[Bundle], AnvilPlugin):
class Plugin(MetadataPlugin[AnvilBundle], AnvilPlugin):

@property
def exposed_indices(self) -> Mapping[str, Sorting]:
Expand Down
Loading

0 comments on commit c8962cd

Please sign in to comment.