diff --git a/src/azul/plugins/metadata/anvil/indexer/transform.py b/src/azul/plugins/metadata/anvil/indexer/transform.py index a57b85a65b..34f5e5d4d6 100644 --- a/src/azul/plugins/metadata/anvil/indexer/transform.py +++ b/src/azul/plugins/metadata/anvil/indexer/transform.py @@ -35,6 +35,9 @@ from azul import ( JSON, ) +from azul.collections import ( + deep_dict_merge, +) from azul.indexer import ( BundleFQID, BundlePartition, @@ -428,7 +431,21 @@ def reconcile_inner_entities(cls, ) -> tuple[JSON, BundleFQID]: this_entity, this_bundle = this that_entity, that_bundle = that - return that if that_bundle.version > this_bundle.version else this + if this_entity.keys() == that_entity.keys(): + return that if that_bundle.version > this_bundle.version else this + else: + assert entity_type == 'datasets', (entity_type, this, that) + assert this_bundle.version == that_bundle.version, (this, that) + expected_keys = cls.field_types()[entity_type].keys() + assert this_entity.keys() < expected_keys, this + assert that_entity.keys() < expected_keys, that + merged = deep_dict_merge((this_entity, that_entity)) + # Confirm that we combined a regular dataset with a DUOS stub to + # produce the complete set of expected fields + assert merged.keys() == expected_keys, (this, that) + # We can safely discard that_bundle because only the version is + # used by the caller, and we know the versions are equal. + return merged, this_bundle class ActivityTransformer(BaseTransformer):