diff --git a/src/azul/plugins/metadata/anvil/indexer/transform.py b/src/azul/plugins/metadata/anvil/indexer/transform.py index a57b85a65..10eb56f1c 100644 --- a/src/azul/plugins/metadata/anvil/indexer/transform.py +++ b/src/azul/plugins/metadata/anvil/indexer/transform.py @@ -35,6 +35,9 @@ from azul import ( JSON, ) +from azul.collections import ( + deep_dict_merge, +) from azul.indexer import ( BundleFQID, BundlePartition, @@ -428,7 +431,31 @@ def reconcile_inner_entities(cls, ) -> tuple[JSON, BundleFQID]: this_entity, this_bundle = this that_entity, that_bundle = that - return that if that_bundle.version > this_bundle.version else this + if this_bundle.version < that_bundle.version: + return that + else: + if this_entity.keys() == that_entity.keys(): + return this + else: + this_and_that = this, that + assert this_bundle.version == that_bundle.version, this_and_that + assert entity_type == 'datasets', entity_type + + # FIXME: Eliminate local import + # https://github.com/DataBiosphere/azul/issues/5683 + from azul.plugins.repository.tdr_anvil import ( + BundleEntityType, + TDRAnvilBundleFQID, + ) + + bundle_types = defaultdict(int) + for bundle in this_bundle, that_bundle: + assert isinstance(bundle, TDRAnvilBundleFQID), this_and_that + bundle_types[bundle.entity_type] += 1 + assert bundle_types[BundleEntityType.duos] == 1, this_and_that + # We can safely discard that_bundle because only the version is + # used by the caller, and we know the versions are equal. + return deep_dict_merge((this_entity, that_entity)), this_bundle class ActivityTransformer(BaseTransformer):