Skip to content

Commit

Permalink
propagate suppression status through models using "suppression tree"
Browse files Browse the repository at this point in the history
  • Loading branch information
SandyRogers committed Sep 20, 2023
1 parent f9dfe0c commit f9e6b78
Show file tree
Hide file tree
Showing 12 changed files with 400 additions and 74 deletions.
2 changes: 1 addition & 1 deletion emgapi/management/commands/clean_empty_studies.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,5 @@ def handle(self, *args, **kwargs):
run.ena_study_accession = study.secondary_accession
run.study = None
Run.objects.bulk_update(runs, ["ena_study_accession", "study"])
study.suppress()
study.suppress(propagate=False)
logger.info(f"{study} suppressed")
44 changes: 44 additions & 0 deletions emgapi/migrations/0011_auto_20230914_0716.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Generated by Django 3.2.18 on 2023-09-14 07:16

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('emgapi', '0010_runextraannotation'),
]

operations = [
migrations.AlterField(
model_name='analysisjob',
name='suppression_reason',
field=models.IntegerField(blank=True, choices=[(1, 'Draft'), (3, 'Cancelled'), (5, 'Suppressed'), (6, 'Killed'), (7, 'Temporary Suppressed'), (8, 'Temporary Killed'), (100, 'Ancestor Suppressed')], db_column='SUPPRESSION_REASON', null=True),
),
migrations.AlterField(
model_name='assembly',
name='study',
field=models.ForeignKey(blank=True, db_column='STUDY_ID', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='assemblies', to='emgapi.study'),
),
migrations.AlterField(
model_name='assembly',
name='suppression_reason',
field=models.IntegerField(blank=True, choices=[(1, 'Draft'), (3, 'Cancelled'), (5, 'Suppressed'), (6, 'Killed'), (7, 'Temporary Suppressed'), (8, 'Temporary Killed'), (100, 'Ancestor Suppressed')], db_column='SUPPRESSION_REASON', null=True),
),
migrations.AlterField(
model_name='run',
name='suppression_reason',
field=models.IntegerField(blank=True, choices=[(1, 'Draft'), (3, 'Cancelled'), (5, 'Suppressed'), (6, 'Killed'), (7, 'Temporary Suppressed'), (8, 'Temporary Killed'), (100, 'Ancestor Suppressed')], db_column='SUPPRESSION_REASON', null=True),
),
migrations.AlterField(
model_name='sample',
name='suppression_reason',
field=models.IntegerField(blank=True, choices=[(1, 'Draft'), (3, 'Cancelled'), (5, 'Suppressed'), (6, 'Killed'), (7, 'Temporary Suppressed'), (8, 'Temporary Killed'), (100, 'Ancestor Suppressed')], db_column='SUPPRESSION_REASON', null=True),
),
migrations.AlterField(
model_name='study',
name='suppression_reason',
field=models.IntegerField(blank=True, choices=[(1, 'Draft'), (3, 'Cancelled'), (5, 'Suppressed'), (6, 'Killed'), (7, 'Temporary Suppressed'), (8, 'Temporary Killed'), (100, 'Ancestor Suppressed')], db_column='SUPPRESSION_REASON', null=True),
),
]
137 changes: 95 additions & 42 deletions emgapi/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import logging

from django.apps import apps
from django.conf import settings
from django.db import models
from django.db.models import (CharField, Count, OuterRef, Prefetch, Q,
Expand Down Expand Up @@ -73,7 +74,7 @@ class SuppressibleModel(models.Model):
suppressible_descendants = []
# List of related_names from this model that should have their suppression status propagated from this.
# E.g. Study.suppressible_descendants = ['samples'] to suppress a study's samples if the study is suppressed.

class Reason(models.IntegerChoices):
DRAFT = 1
CANCELLED = 3
Expand All @@ -88,64 +89,114 @@ class Reason(models.IntegerChoices):
suppressed_at = models.DateTimeField(db_column='SUPPRESSED_AT', blank=True, null=True)
suppression_reason = models.IntegerField(db_column='SUPPRESSION_REASON', blank=True, null=True, choices=Reason.choices)

def _get_suppression_descendant_tree(self, suppressing: bool = True):
"""
Recursively find all suppressible descendants of the calling suppressible model.
:param suppressing: True if looking for descendants that should be (i.e. are not currently) suppressed. False if opposite.
:return: Dict mapping model names to sets of model instances
"""
suppressibles = {}

def __add_to_suppressibles(kls, additional_suppressibles):
suppressibles.setdefault(
kls,
set()
).update(additional_suppressibles)

logger.debug(f'Building suppression descendant tree for {self._meta.object_name}')

for descendant_relation_name in self.suppressible_descendants:
descendant_relation = getattr(
self,
descendant_relation_name
)
descendants_to_update = descendant_relation.filter(
is_suppressed=not suppressing,
)
if not descendants_to_update.exists():
logger.debug(f'No {descendant_relation_name} descendants to handle.')
continue
# Check whether the descendant might have other non-suppressed ancestors of the same type as this
# (If so, it shouldn't be suppressed).
relation_field = self._meta.get_field(descendant_relation_name)
logger.info(f'{relation_field = }')
if isinstance(relation_field, models.ManyToManyField):
logger.debug(
f"Descendant relation {descendant_relation_name} on {self.__class__} is a Many2Many."
f"Checking whether descendants have unsuppressed siblings.."
)
logger.debug(f"Before filtering, had {descendants_to_update.count()} {descendant_relation_name}")

descendant_ids_with_unsuppressed_alike_ancestors = descendant_relation.through.objects.filter(
**{
f"{descendant_relation.target_field_name}__in": descendant_relation.all(), # e.g. sample in study.samples
f"{descendant_relation.source_field_name}__is_suppressed": False, # e.g. not study.is_suppressed
}
).exclude(
**{
f"{descendant_relation.source_field_name}": self, # e.g. study != self
}
).values_list(
f"{descendant_relation.target_field_name}_id",
flat=True
)
descendants_to_update = descendants_to_update.exclude(
pk__in=descendant_ids_with_unsuppressed_alike_ancestors
)

logger.debug(f"After filtering, had {descendants_to_update.count()} {descendant_relation_name}")

__add_to_suppressibles(
descendant_relation.model._meta.object_name,
descendants_to_update
)

for descendant in descendants_to_update:
for kls, kls_suppressibles in descendant._get_suppression_descendant_tree(
suppressing
).items():
__add_to_suppressibles(
kls,
kls_suppressibles
)

return suppressibles

def suppress(self, suppression_reason=None, save=True, propagate=True):
self.is_suppressed = True
self.suppressed_at = timezone.now()
self.suppression_reason = suppression_reason
if save:
self.save()
if propagate:
for descendant_relation in self.suppressible_descendants:
descendants_to_suppress: QuerySet = getattr(
self,
descendant_relation
).filter(
is_suppressed=False
)
for descendant in descendants_to_suppress:
descendant_tree = self._get_suppression_descendant_tree()
for descendants_object_type, descendants in descendant_tree.items():
for descendant in descendants:
descendant.is_suppressed = True
descendant.suppression_reason = self.Reason.ANCESTOR_SUPPRESSED
descendants_to_suppress.bulk_update(
descendants_to_suppress,
[
'is_suppressed',
'suppression_reason'
]
)
m: SuppressibleModel = apps.get_model(app_label='emgapi', model_name=descendants_object_type)
m.objects.bulk_update(descendants, fields=['is_suppressed', 'suppression_reason'])
logger.info(
f'Propagated suppression of {self} '
f'to {len(descendants_to_suppress)} {descendant_relation} descendants'
f'Propagated suppression of {self} to {len(descendants)} descendant {descendants_object_type}s'
)
return self

def unsuppress(self, suppression_reason=None, save=True, propagate=True):
def unsuppress(self, save=True, propagate=True):
self.is_suppressed = False
self.suppressed_at = None
self.suppression_reason = None
if save:
self.save()
if propagate:
for descendant_relation in self.suppressible_descendants:
descendants_to_unsuppress: QuerySet = getattr(
self,
descendant_relation
).filter(
is_suppressed=True,
suppression_reason=self.Reason.ANCESTOR_SUPPRESSED
)
for descendant in descendants_to_unsuppress:
descendant_tree = self._get_suppression_descendant_tree(suppressing=False)
for descendants_object_type, descendants in descendant_tree.items():
for descendant in descendants:
descendant.is_suppressed = False
descendant.suppression_reason = None
descendants_to_unsuppress.bulk_update(
descendants_to_unsuppress,
[
'is_suppressed',
'suppression_reason'
]
)
m: SuppressibleModel = apps.get_model(app_label='emgapi', model_name=descendants_object_type)
m.objects.bulk_update(descendants, fields=['is_suppressed', 'suppression_reason'])
logger.info(
f'Propagated unsuppression of {self} '
f'to {len(descendants_to_unsuppress)} {descendant_relation} descendants'
f'Propagated unsuppression of {self} to {len(descendants)} descendant {descendants_object_type}s'
)
return self

Expand All @@ -154,9 +205,11 @@ class Meta:


class ENASyncableModel(SuppressibleModel, PrivacyControlledModel):
def sync_with_ena_status(self, ena_model_status: ENAStatus):
def sync_with_ena_status(self, ena_model_status: ENAStatus, propagate=True):
"""Sync the model with the ENA status accordingly.
Fields that are updated: is_suppressed, suppressed_at, reason and is_private
:propagate: If True, propagate the ena status of this entity to entities that are derived from / children of it.
"""
if ena_model_status == ENAStatus.PRIVATE and not self.is_private:
self.is_private = True
Expand Down Expand Up @@ -197,7 +250,7 @@ def sync_with_ena_status(self, ena_model_status: ENAStatus):
elif ena_model_status == ENAStatus.CANCELLED:
reason = SuppressibleModel.Reason.CANCELLED

self.suppress(suppression_reason=reason, save=False)
self.suppress(suppression_reason=reason, save=False, propagate=propagate)

logging.info(
f"{self} was suppressed, status on ENA {ena_model_status}"
Expand Down Expand Up @@ -1115,7 +1168,7 @@ class Meta:


class SampleQuerySet(BaseQuerySet, SuppressQuerySet):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

Expand Down Expand Up @@ -1334,7 +1387,7 @@ def __str__(self):


class RunQuerySet(BaseQuerySet, SuppressQuerySet):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

Expand Down Expand Up @@ -1757,7 +1810,7 @@ class VariableNames(models.Model):
class Meta:
db_table = 'VARIABLE_NAMES'
unique_together = (('var_id', 'var_name'), ('var_id', 'var_name'),)
verbose_name = 'variable name'
verbose_name = 'variable name'

def __str__(self):
return self.var_name
Expand Down
6 changes: 1 addition & 5 deletions emgena/management/commands/sync_assemblies_with_ena.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,7 @@ def handle(self, *args, **kwargs):
)
continue

# inherits the status of its study
if study.is_suppressed:
emg_assembly.suppress(
suppression_reason=study.suppression_reason
)
# inherits the privacy status of its study
emg_assembly.is_private = study.is_private
continue
elif ena_assembly.status_id is None:
Expand Down
5 changes: 1 addition & 4 deletions emgena/management/commands/sync_runs_with_ena.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,8 @@
# limitations under the License.

import logging
import os

from django.db.models import Count
from django.core.management import BaseCommand
from django.conf import settings

from emgapi import models as emg_models
from emgena import models as ena_models
Expand All @@ -38,7 +35,7 @@ def handle(self, *args, **kwargs):
logging.info(f"Total Runs on EMG {runs_count}")

while offset < runs_count:
emg_runs_batch = emg_models.Run.objects.all()[offset : offset + batch_size]
emg_runs_batch = emg_models.Run.objects.all()[offset: offset + batch_size]
ena_runs_batch = ena_models.Run.objects.using("era").filter(
run_id__in=[run.accession for run in emg_runs_batch]
)
Expand Down
3 changes: 0 additions & 3 deletions emgena/management/commands/sync_samples_with_ena.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,8 @@
# limitations under the License.

import logging
import os

from django.db.models import Count
from django.core.management import BaseCommand
from django.conf import settings

from emgapi import models as emg_models
from emgena import models as ena_models
Expand Down
3 changes: 0 additions & 3 deletions emgena/management/commands/sync_studies_with_ena.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,8 @@
# limitations under the License.

import logging
import os

from django.db.models import Count
from django.core.management import BaseCommand
from django.conf import settings

from emgapi import models as emg_models
from emgena import models as ena_models
Expand Down
20 changes: 19 additions & 1 deletion tests/ena/test_sync_assemblies_with_ena.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from django.urls import reverse
from django.core.management import call_command
from emgapi.models import Assembly
from emgapi.models import Assembly, AnalysisJob

from test_utils.emg_fixtures import * # noqa

Expand Down Expand Up @@ -117,3 +117,21 @@ def test_sync_assemblies_based_on_study(
assembly.refresh_from_db()
assert assembly.is_suppressed == False
assert assembly.is_private == False


@patch("emgena.models.Assembly.objects")
def test_sync_assemblies_propagation(
self, ena_assembly_objs_mock, ena_suppression_propagation_assemblies
):
ena_assembly_objs_mock.using("ena").filter.return_value = ena_suppression_propagation_assemblies

assert Assembly.objects.filter(is_suppressed=True).count() == 0
assert AnalysisJob.objects.filter(is_suppressed=True).count() == 0

call_command("sync_assemblies_with_ena")

assert Assembly.objects.filter(is_suppressed=True).count() == 32
assert AnalysisJob.objects.filter(
is_suppressed=True,
suppression_reason=AnalysisJob.Reason.ANCESTOR_SUPPRESSED
).count() == 64
Loading

0 comments on commit f9e6b78

Please sign in to comment.