Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions ami/main/checks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import logging
from dataclasses import dataclass, field

from django.db.models import Count

logger = logging.getLogger(__name__)


@dataclass
class OccurrenceCheckReport:
missing_determination: list[int] = field(default_factory=list)
orphaned_occurrences: list[int] = field(default_factory=list)
orphaned_detections: list[int] = field(default_factory=list)
fixed_determinations: int = 0
deleted_occurrences: int = 0

@property
def has_issues(self) -> bool:
return bool(self.missing_determination or self.orphaned_occurrences or self.orphaned_detections)

@property
def summary(self) -> str:
parts = []
if self.missing_determination:
s = f"{len(self.missing_determination)} missing determination"
if self.fixed_determinations:
s += f" ({self.fixed_determinations} fixed)"
parts.append(s)
if self.orphaned_occurrences:
s = f"{len(self.orphaned_occurrences)} orphaned occurrences"
if self.deleted_occurrences:
s += f" ({self.deleted_occurrences} deleted)"
parts.append(s)
if self.orphaned_detections:
parts.append(f"{len(self.orphaned_detections)} orphaned detections")
return ", ".join(parts) if parts else "No issues found"


def check_occurrences(
project_id: int | None = None,
fix: bool = False,
) -> OccurrenceCheckReport:
"""
Check occurrence data integrity and optionally fix issues.

Args:
project_id: Scope to a single project. None = all projects.
fix: If True, auto-fix what can be fixed. If False (default), report only.

Returns:
OccurrenceCheckReport with findings and fix counts.
"""
from ami.main.models import Detection, Occurrence, update_occurrence_determination

report = OccurrenceCheckReport()

# Base querysets scoped by project
occ_qs = Occurrence.objects.all()
det_qs = Detection.objects.all()
if project_id is not None:
occ_qs = occ_qs.filter(project_id=project_id)
det_qs = det_qs.filter(source_image__deployment__project_id=project_id)
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When scoping detections by project, this uses source_image__deployment__project_id. Elsewhere in the codebase project scoping is typically done via detection__source_image__project_id / source_image__project_id (and SourceImage.save() backfills project from deployment). Using source_image__project_id here would be more consistent and avoids edge cases if deployment/project ever diverge.

Suggested change
det_qs = det_qs.filter(source_image__deployment__project_id=project_id)
det_qs = det_qs.filter(source_image__project_id=project_id)

Copilot uses AI. Check for mistakes.

# Check 1: Missing determination
# Occurrences with classifications but no determination set
missing = occ_qs.filter(
determination__isnull=True,
detections__classifications__isnull=False,
).distinct()
report.missing_determination = list(missing.values_list("pk", flat=True))

if fix and report.missing_determination:
for occ in missing.iterator():
if update_occurrence_determination(occ, current_determination=None, save=True):
report.fixed_determinations += 1
logger.info(
"Fixed %d/%d missing determinations",
report.fixed_determinations,
len(report.missing_determination),
)
Comment on lines +72 to +80
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Keep one bad occurrence from aborting the whole repair pass.

update_occurrence_determination(..., save=True) re-queries and saves the row, and the callee does not swallow DB errors. Right now any exception there stops the rest of the batch, so later repairs and the orphan checks never run.

🛠️ Suggested fix
     if fix and report.missing_determination:
         for occ in missing.iterator():
-            if update_occurrence_determination(occ, current_determination=None, save=True):
-                report.fixed_determinations += 1
+            try:
+                if update_occurrence_determination(occ, current_determination=None, save=True):
+                    report.fixed_determinations += 1
+            except Exception:
+                logger.exception("Failed to fix missing determination for occurrence %s", occ.pk)
         logger.info(
             "Fixed %d/%d missing determinations",
             report.fixed_determinations,
             len(report.missing_determination),
         )
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@ami/main/checks.py` around lines 72 - 80, Wrap the per-occurrence repair call
so a DB exception on one record doesn't abort the loop: inside the loop
iterating over missing.iterator() that calls
update_occurrence_determination(occ, current_determination=None, save=True),
catch exceptions (e.g., Exception or the specific DB error) around that call,
log a warning including the occurrence id/context and the exception, and
continue; still increment report.fixed_determinations only on successful updates
and keep the final logger.info(...) unchanged so the rest of the repair pass and
subsequent orphan checks run.


# Check 2: Orphaned occurrences (no detections)
orphaned_occ = occ_qs.annotate(det_count=Count("detections")).filter(det_count=0)
report.orphaned_occurrences = list(orphaned_occ.values_list("pk", flat=True))

if fix and report.orphaned_occurrences:
deleted_count, _ = orphaned_occ.delete()
report.deleted_occurrences = deleted_count
logger.info("Deleted %d orphaned occurrences", deleted_count)
Comment on lines +87 to +89
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

QuerySet.delete() returns the total number of objects deleted across all cascades, not just Occurrence rows. As a result deleted_occurrences can be inflated (e.g., if related rows are cascaded), and the management command/test expectations may become inaccurate. Consider computing the occurrence count before deleting, or use the per-model breakdown from the second value returned by delete() to record only the Occurrence deletions.

Suggested change
deleted_count, _ = orphaned_occ.delete()
report.deleted_occurrences = deleted_count
logger.info("Deleted %d orphaned occurrences", deleted_count)
deleted_total, per_model_counts = orphaned_occ.delete()
deleted_occurrences = per_model_counts.get(Occurrence._meta.label, 0)
report.deleted_occurrences = deleted_occurrences
logger.info("Deleted %d orphaned occurrences", deleted_occurrences)

Copilot uses AI. Check for mistakes.

# Check 3: Orphaned detections (no occurrence)
orphaned_det = det_qs.filter(occurrence__isnull=True)
report.orphaned_detections = list(orphaned_det.values_list("pk", flat=True))

if report.orphaned_detections:
logger.warning(
"Found %d orphaned detections (no occurrence linked): %s",
len(report.orphaned_detections),
report.orphaned_detections[:10],
)

return report
69 changes: 69 additions & 0 deletions ami/main/management/commands/check_occurrences.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import logging

from django.core.management.base import BaseCommand

from ami.main.checks import check_occurrences

logger = logging.getLogger(__name__)


Comment on lines +1 to +9
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logging/logger are defined but never used in this command. Please remove the unused import/variable to avoid lint noise and keep the command minimal.

Suggested change
import logging
from django.core.management.base import BaseCommand
from ami.main.checks import check_occurrences
logger = logging.getLogger(__name__)
from django.core.management.base import BaseCommand
from ami.main.checks import check_occurrences

Copilot uses AI. Check for mistakes.
class Command(BaseCommand):
help = "Check occurrence data integrity and optionally fix issues"

def add_arguments(self, parser):
parser.add_argument(
"--project-id",
type=int,
default=None,
help="Scope to a single project ID",
)
parser.add_argument(
"--fix",
action="store_true",
help="Auto-fix issues (missing determinations, orphaned occurrences)",
)

def handle(self, *args, **options):
project_id = options["project_id"]
fix = options["fix"]

scope = f"project {project_id}" if project_id else "all projects"
self.stdout.write(f"Checking occurrence integrity for {scope}...")

report = check_occurrences(project_id=project_id, fix=fix)

# Missing determination
label = "Missing determination"
count = len(report.missing_determination)
if fix and report.fixed_determinations:
self.stdout.write(f" {label}: {count} found, {report.fixed_determinations} fixed")
elif count:
self.stdout.write(self.style.WARNING(f" {label}: {count} found"))
else:
self.stdout.write(f" {label}: 0")
Comment on lines +36 to +43
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In --fix mode, the output only shows “found, fixed/deleted” when the fixed/deleted count is non-zero; if fixes were attempted but none were applied (or only partially applied), the output degrades to the same “X found” warning as report-only mode. Consider always printing the fixed/deleted counts when --fix is set (including 0), and optionally highlighting when fixed != found so operators can tell whether anything was actually repaired.

Copilot uses AI. Check for mistakes.

# Orphaned occurrences
label = "Orphaned occurrences"
count = len(report.orphaned_occurrences)
if fix and report.deleted_occurrences:
self.stdout.write(f" {label}: {count} found, {report.deleted_occurrences} deleted")
elif count:
self.stdout.write(self.style.WARNING(f" {label}: {count} found"))
else:
self.stdout.write(f" {label}: 0")

# Orphaned detections
label = "Orphaned detections"
count = len(report.orphaned_detections)
if count:
self.stdout.write(self.style.WARNING(f" {label}: {count} found"))
else:
self.stdout.write(f" {label}: 0")

# Summary
if report.has_issues and not fix:
self.stdout.write(self.style.NOTICE("\nRun with --fix to repair fixable issues."))
elif report.has_issues and fix:
self.stdout.write(self.style.SUCCESS("\nDone. Applied fixes."))
else:
self.stdout.write(self.style.SUCCESS("\nNo issues found."))
Comment on lines +33 to +69
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Don't end --fix runs with a success footer when issues remain.

Orphaned detections are never auto-fixed, and the other two categories can be only partially repaired. This branch still prints SUCCESS, so the command can look clean even when the counts above show unresolved problems.

🛠️ Suggested fix
         report = check_occurrences(project_id=project_id, fix=fix)
+        remaining_missing = max(len(report.missing_determination) - report.fixed_determinations, 0)
+        remaining_orphaned_occurrences = max(len(report.orphaned_occurrences) - report.deleted_occurrences, 0)
+        remaining_issues = remaining_missing + remaining_orphaned_occurrences + len(report.orphaned_detections)
@@
-        elif report.has_issues and fix:
-            self.stdout.write(self.style.SUCCESS("\nDone. Applied fixes."))
+        elif fix and remaining_issues:
+            self.stdout.write(
+                self.style.WARNING(
+                    f"\nDone. Applied fixes, but {remaining_issues} issue(s) still require attention."
+                )
+            )
+        elif fix:
+            self.stdout.write(self.style.SUCCESS("\nDone. All fixable issues were repaired."))
         else:
             self.stdout.write(self.style.SUCCESS("\nNo issues found."))
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
report = check_occurrences(project_id=project_id, fix=fix)
# Missing determination
label = "Missing determination"
count = len(report.missing_determination)
if fix and report.fixed_determinations:
self.stdout.write(f" {label}: {count} found, {report.fixed_determinations} fixed")
elif count:
self.stdout.write(self.style.WARNING(f" {label}: {count} found"))
else:
self.stdout.write(f" {label}: 0")
# Orphaned occurrences
label = "Orphaned occurrences"
count = len(report.orphaned_occurrences)
if fix and report.deleted_occurrences:
self.stdout.write(f" {label}: {count} found, {report.deleted_occurrences} deleted")
elif count:
self.stdout.write(self.style.WARNING(f" {label}: {count} found"))
else:
self.stdout.write(f" {label}: 0")
# Orphaned detections
label = "Orphaned detections"
count = len(report.orphaned_detections)
if count:
self.stdout.write(self.style.WARNING(f" {label}: {count} found"))
else:
self.stdout.write(f" {label}: 0")
# Summary
if report.has_issues and not fix:
self.stdout.write(self.style.NOTICE("\nRun with --fix to repair fixable issues."))
elif report.has_issues and fix:
self.stdout.write(self.style.SUCCESS("\nDone. Applied fixes."))
else:
self.stdout.write(self.style.SUCCESS("\nNo issues found."))
report = check_occurrences(project_id=project_id, fix=fix)
remaining_missing = max(len(report.missing_determination) - report.fixed_determinations, 0)
remaining_orphaned_occurrences = max(len(report.orphaned_occurrences) - report.deleted_occurrences, 0)
remaining_issues = remaining_missing + remaining_orphaned_occurrences + len(report.orphaned_detections)
# Missing determination
label = "Missing determination"
count = len(report.missing_determination)
if fix and report.fixed_determinations:
self.stdout.write(f" {label}: {count} found, {report.fixed_determinations} fixed")
elif count:
self.stdout.write(self.style.WARNING(f" {label}: {count} found"))
else:
self.stdout.write(f" {label}: 0")
# Orphaned occurrences
label = "Orphaned occurrences"
count = len(report.orphaned_occurrences)
if fix and report.deleted_occurrences:
self.stdout.write(f" {label}: {count} found, {report.deleted_occurrences} deleted")
elif count:
self.stdout.write(self.style.WARNING(f" {label}: {count} found"))
else:
self.stdout.write(f" {label}: 0")
# Orphaned detections
label = "Orphaned detections"
count = len(report.orphaned_detections)
if count:
self.stdout.write(self.style.WARNING(f" {label}: {count} found"))
else:
self.stdout.write(f" {label}: 0")
# Summary
if report.has_issues and not fix:
self.stdout.write(self.style.NOTICE("\nRun with --fix to repair fixable issues."))
elif fix and remaining_issues:
self.stdout.write(
self.style.WARNING(
f"\nDone. Applied fixes, but {remaining_issues} issue(s) still require attention."
)
)
elif fix:
self.stdout.write(self.style.SUCCESS("\nDone. All fixable issues were repaired."))
else:
self.stdout.write(self.style.SUCCESS("\nNo issues found."))
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@ami/main/management/commands/check_occurrences.py` around lines 33 - 69, The
summary footer currently prints SUCCESS on --fix runs even when unresolved
issues remain; update the final summary logic to compute remaining issues after
attempted fixes (e.g. remaining = max(0, len(report.missing_determination) -
(report.fixed_determinations or 0)) + max(0, len(report.orphaned_occurrences) -
(report.deleted_occurrences or 0)) + len(report.orphaned_detections)) and then:
if remaining > 0 print a NOTICE that unresolved issues remain (instead of
SUCCESS), if fix is true and remaining == 0 print SUCCESS ("Done. Applied
fixes."), if not fix and report.has_issues keep the existing NOTICE prompt,
otherwise print SUCCESS ("No issues found."). Use the existing symbols report,
fix, report.fixed_determinations, report.deleted_occurrences,
report.missing_determination, report.orphaned_occurrences,
report.orphaned_detections, and report.has_issues to implement this.

18 changes: 18 additions & 0 deletions ami/main/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import logging

from config import celery_app

logger = logging.getLogger(__name__)


@celery_app.task()
def check_occurrences_task():
"""Periodic occurrence integrity check. Report-only, logs warnings."""
from ami.main.checks import check_occurrences

report = check_occurrences(fix=False)
if report.has_issues:
logger.warning("Occurrence integrity issues: %s", report.summary)
else:
logger.info("Occurrence integrity check passed")
return report.summary
128 changes: 128 additions & 0 deletions ami/main/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from ami.exports.models import DataExport
from ami.jobs.models import VALID_JOB_TYPES, Job
from ami.main.checks import check_occurrences
from ami.main.models import (
Classification,
Deployment,
Expand Down Expand Up @@ -3744,3 +3745,130 @@ def test_list_pipelines_public_project_non_member(self):
self.client.force_authenticate(user=non_member)
response = self.client.get(url)
self.assertEqual(response.status_code, status.HTTP_200_OK)


class TestCheckOccurrences(TestCase):
def setUp(self):
self.project = Project.objects.create(name="Integrity Test Project")
self.deployment = Deployment.objects.create(name="Test Deployment", project=self.project)
self.event = Event.objects.create(
deployment=self.deployment,
project=self.project,
start=datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc),
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Event.group_by is a required field (non-null, no default). Creating an Event without group_by will raise an integrity error in tests. Set a deterministic group_by value in the test setup (e.g., based on the start date) to satisfy the model constraint.

Suggested change
start=datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc),
start=datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc),
group_by="2024-01-01",

Copilot uses AI. Check for mistakes.
)
self.taxon = Taxon.objects.create(name="Test Species", rank=TaxonRank.SPECIES)
self.source_image = SourceImage.objects.create(
deployment=self.deployment,
event=self.event,
)

def _create_occurrence_with_classification(self, determination=None):
"""Helper: create occurrence -> detection -> classification chain."""
occurrence = Occurrence.objects.create(
project=self.project,
event=self.event,
deployment=self.deployment,
determination=determination,
)
detection = Detection.objects.create(
source_image=self.source_image,
occurrence=occurrence,
)
Classification.objects.create(
detection=detection,
taxon=self.taxon,
score=0.9,
terminal=True,
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Classification.timestamp is non-nullable. This test helper creates a Classification without a timestamp, which will fail at runtime. Provide a timestamp (e.g., from the source image/event start) when creating the classification.

Suggested change
terminal=True,
terminal=True,
timestamp=self.event.start,

Copilot uses AI. Check for mistakes.
)
return occurrence

def test_no_issues(self):
"""Clean data should report no issues."""
self._create_occurrence_with_classification(determination=self.taxon)
report = check_occurrences(project_id=self.project.pk)
self.assertFalse(report.has_issues)
self.assertEqual(len(report.missing_determination), 0)
self.assertEqual(len(report.orphaned_occurrences), 0)
self.assertEqual(len(report.orphaned_detections), 0)

def test_missing_determination_detected(self):
"""Occurrence with classification but null determination should be flagged."""
occurrence = self._create_occurrence_with_classification(determination=None)
# Force determination to None (save() would auto-set it)
Occurrence.objects.filter(pk=occurrence.pk).update(determination=None)

report = check_occurrences(project_id=self.project.pk)
self.assertTrue(report.has_issues)
self.assertIn(occurrence.pk, report.missing_determination)
self.assertEqual(report.fixed_determinations, 0)

def test_missing_determination_fixed(self):
"""With fix=True, missing determination should be repaired."""
occurrence = self._create_occurrence_with_classification(determination=None)
Occurrence.objects.filter(pk=occurrence.pk).update(determination=None)

report = check_occurrences(project_id=self.project.pk, fix=True)
self.assertEqual(report.fixed_determinations, 1)

occurrence.refresh_from_db()
self.assertIsNotNone(occurrence.determination)

def test_orphaned_occurrence_detected(self):
"""Occurrence with no detections should be flagged."""
orphan = Occurrence.objects.create(
project=self.project,
event=self.event,
deployment=self.deployment,
)
report = check_occurrences(project_id=self.project.pk)
self.assertIn(orphan.pk, report.orphaned_occurrences)

def test_orphaned_occurrence_fixed(self):
"""With fix=True, orphaned occurrences should be deleted."""
orphan = Occurrence.objects.create(
project=self.project,
event=self.event,
deployment=self.deployment,
)
report = check_occurrences(project_id=self.project.pk, fix=True)
self.assertEqual(report.deleted_occurrences, 1)
self.assertFalse(Occurrence.objects.filter(pk=orphan.pk).exists())

def test_orphaned_detection_detected(self):
"""Detection with no occurrence should be flagged."""
det = Detection.objects.create(
source_image=self.source_image,
occurrence=None,
)
report = check_occurrences(project_id=self.project.pk)
self.assertIn(det.pk, report.orphaned_detections)

def test_project_filter(self):
"""Issues in other projects should not be reported."""
other_project = Project.objects.create(name="Other Project")
other_deployment = Deployment.objects.create(name="Other Dep", project=other_project)
other_event = Event.objects.create(
deployment=other_deployment,
project=other_project,
start=datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc),
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue as above: Event.group_by is required. This other_event creation in the project filter test omits group_by and will error. Add a group_by value here as well.

Suggested change
start=datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc),
start=datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc),
group_by=self.event.group_by,

Copilot uses AI. Check for mistakes.
)
Occurrence.objects.create(
project=other_project,
event=other_event,
deployment=other_deployment,
) # orphaned in other project

report = check_occurrences(project_id=self.project.pk)
self.assertEqual(len(report.orphaned_occurrences), 0)

def test_report_summary(self):
"""Summary should be a non-empty string when issues exist."""
Occurrence.objects.create(
project=self.project,
event=self.event,
deployment=self.deployment,
)
report = check_occurrences(project_id=self.project.pk)
self.assertTrue(report.has_issues)
self.assertIsInstance(report.summary, str)
self.assertGreater(len(report.summary), 0)
Loading
Loading