-
Notifications
You must be signed in to change notification settings - Fork 12
feat: add check_occurrences for occurrence data integrity #1188
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
5301de3
58971f5
9e2bb10
103dce5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,102 @@ | ||||||||||||||||
| import logging | ||||||||||||||||
| from dataclasses import dataclass, field | ||||||||||||||||
|
|
||||||||||||||||
| from django.db.models import Count | ||||||||||||||||
|
|
||||||||||||||||
| logger = logging.getLogger(__name__) | ||||||||||||||||
|
|
||||||||||||||||
|
|
||||||||||||||||
| @dataclass | ||||||||||||||||
| class OccurrenceCheckReport: | ||||||||||||||||
| missing_determination: list[int] = field(default_factory=list) | ||||||||||||||||
| orphaned_occurrences: list[int] = field(default_factory=list) | ||||||||||||||||
| orphaned_detections: list[int] = field(default_factory=list) | ||||||||||||||||
| fixed_determinations: int = 0 | ||||||||||||||||
| deleted_occurrences: int = 0 | ||||||||||||||||
|
|
||||||||||||||||
| @property | ||||||||||||||||
| def has_issues(self) -> bool: | ||||||||||||||||
| return bool(self.missing_determination or self.orphaned_occurrences or self.orphaned_detections) | ||||||||||||||||
|
|
||||||||||||||||
| @property | ||||||||||||||||
| def summary(self) -> str: | ||||||||||||||||
| parts = [] | ||||||||||||||||
| if self.missing_determination: | ||||||||||||||||
| s = f"{len(self.missing_determination)} missing determination" | ||||||||||||||||
| if self.fixed_determinations: | ||||||||||||||||
| s += f" ({self.fixed_determinations} fixed)" | ||||||||||||||||
| parts.append(s) | ||||||||||||||||
| if self.orphaned_occurrences: | ||||||||||||||||
| s = f"{len(self.orphaned_occurrences)} orphaned occurrences" | ||||||||||||||||
| if self.deleted_occurrences: | ||||||||||||||||
| s += f" ({self.deleted_occurrences} deleted)" | ||||||||||||||||
| parts.append(s) | ||||||||||||||||
| if self.orphaned_detections: | ||||||||||||||||
| parts.append(f"{len(self.orphaned_detections)} orphaned detections") | ||||||||||||||||
| return ", ".join(parts) if parts else "No issues found" | ||||||||||||||||
|
|
||||||||||||||||
|
|
||||||||||||||||
| def check_occurrences( | ||||||||||||||||
| project_id: int | None = None, | ||||||||||||||||
| fix: bool = False, | ||||||||||||||||
| ) -> OccurrenceCheckReport: | ||||||||||||||||
| """ | ||||||||||||||||
| Check occurrence data integrity and optionally fix issues. | ||||||||||||||||
|
|
||||||||||||||||
| Args: | ||||||||||||||||
| project_id: Scope to a single project. None = all projects. | ||||||||||||||||
| fix: If True, auto-fix what can be fixed. If False (default), report only. | ||||||||||||||||
|
|
||||||||||||||||
| Returns: | ||||||||||||||||
| OccurrenceCheckReport with findings and fix counts. | ||||||||||||||||
| """ | ||||||||||||||||
| from ami.main.models import Detection, Occurrence, update_occurrence_determination | ||||||||||||||||
|
|
||||||||||||||||
| report = OccurrenceCheckReport() | ||||||||||||||||
|
|
||||||||||||||||
| # Base querysets scoped by project | ||||||||||||||||
| occ_qs = Occurrence.objects.all() | ||||||||||||||||
| det_qs = Detection.objects.all() | ||||||||||||||||
| if project_id is not None: | ||||||||||||||||
| occ_qs = occ_qs.filter(project_id=project_id) | ||||||||||||||||
| det_qs = det_qs.filter(source_image__deployment__project_id=project_id) | ||||||||||||||||
|
|
||||||||||||||||
| # Check 1: Missing determination | ||||||||||||||||
| # Occurrences with classifications but no determination set | ||||||||||||||||
| missing = occ_qs.filter( | ||||||||||||||||
| determination__isnull=True, | ||||||||||||||||
| detections__classifications__isnull=False, | ||||||||||||||||
| ).distinct() | ||||||||||||||||
| report.missing_determination = list(missing.values_list("pk", flat=True)) | ||||||||||||||||
|
|
||||||||||||||||
| if fix and report.missing_determination: | ||||||||||||||||
| for occ in missing.iterator(): | ||||||||||||||||
| if update_occurrence_determination(occ, current_determination=None, save=True): | ||||||||||||||||
| report.fixed_determinations += 1 | ||||||||||||||||
| logger.info( | ||||||||||||||||
| "Fixed %d/%d missing determinations", | ||||||||||||||||
| report.fixed_determinations, | ||||||||||||||||
| len(report.missing_determination), | ||||||||||||||||
| ) | ||||||||||||||||
|
Comment on lines
+72
to
+80
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keep one bad occurrence from aborting the whole repair pass.
🛠️ Suggested fix if fix and report.missing_determination:
for occ in missing.iterator():
- if update_occurrence_determination(occ, current_determination=None, save=True):
- report.fixed_determinations += 1
+ try:
+ if update_occurrence_determination(occ, current_determination=None, save=True):
+ report.fixed_determinations += 1
+ except Exception:
+ logger.exception("Failed to fix missing determination for occurrence %s", occ.pk)
logger.info(
"Fixed %d/%d missing determinations",
report.fixed_determinations,
len(report.missing_determination),
)🤖 Prompt for AI Agents |
||||||||||||||||
|
|
||||||||||||||||
| # Check 2: Orphaned occurrences (no detections) | ||||||||||||||||
| orphaned_occ = occ_qs.annotate(det_count=Count("detections")).filter(det_count=0) | ||||||||||||||||
| report.orphaned_occurrences = list(orphaned_occ.values_list("pk", flat=True)) | ||||||||||||||||
|
|
||||||||||||||||
| if fix and report.orphaned_occurrences: | ||||||||||||||||
| deleted_count, _ = orphaned_occ.delete() | ||||||||||||||||
| report.deleted_occurrences = deleted_count | ||||||||||||||||
| logger.info("Deleted %d orphaned occurrences", deleted_count) | ||||||||||||||||
|
Comment on lines
+87
to
+89
|
||||||||||||||||
| deleted_count, _ = orphaned_occ.delete() | |
| report.deleted_occurrences = deleted_count | |
| logger.info("Deleted %d orphaned occurrences", deleted_count) | |
| deleted_total, per_model_counts = orphaned_occ.delete() | |
| deleted_occurrences = per_model_counts.get(Occurrence._meta.label, 0) | |
| report.deleted_occurrences = deleted_occurrences | |
| logger.info("Deleted %d orphaned occurrences", deleted_occurrences) |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,69 @@ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| import logging | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| from django.core.management.base import BaseCommand | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| from ami.main.checks import check_occurrences | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| logger = logging.getLogger(__name__) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+1
to
+9
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| import logging | |
| from django.core.management.base import BaseCommand | |
| from ami.main.checks import check_occurrences | |
| logger = logging.getLogger(__name__) | |
| from django.core.management.base import BaseCommand | |
| from ami.main.checks import check_occurrences |
Copilot
AI
Mar 25, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In --fix mode, the output only shows “found, fixed/deleted” when the fixed/deleted count is non-zero; if fixes were attempted but none were applied (or only partially applied), the output degrades to the same “X found” warning as report-only mode. Consider always printing the fixed/deleted counts when --fix is set (including 0), and optionally highlighting when fixed != found so operators can tell whether anything was actually repaired.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't end --fix runs with a success footer when issues remain.
Orphaned detections are never auto-fixed, and the other two categories can be only partially repaired. This branch still prints SUCCESS, so the command can look clean even when the counts above show unresolved problems.
🛠️ Suggested fix
report = check_occurrences(project_id=project_id, fix=fix)
+ remaining_missing = max(len(report.missing_determination) - report.fixed_determinations, 0)
+ remaining_orphaned_occurrences = max(len(report.orphaned_occurrences) - report.deleted_occurrences, 0)
+ remaining_issues = remaining_missing + remaining_orphaned_occurrences + len(report.orphaned_detections)
@@
- elif report.has_issues and fix:
- self.stdout.write(self.style.SUCCESS("\nDone. Applied fixes."))
+ elif fix and remaining_issues:
+ self.stdout.write(
+ self.style.WARNING(
+ f"\nDone. Applied fixes, but {remaining_issues} issue(s) still require attention."
+ )
+ )
+ elif fix:
+ self.stdout.write(self.style.SUCCESS("\nDone. All fixable issues were repaired."))
else:
self.stdout.write(self.style.SUCCESS("\nNo issues found."))📝 Committable suggestion
‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.
| report = check_occurrences(project_id=project_id, fix=fix) | |
| # Missing determination | |
| label = "Missing determination" | |
| count = len(report.missing_determination) | |
| if fix and report.fixed_determinations: | |
| self.stdout.write(f" {label}: {count} found, {report.fixed_determinations} fixed") | |
| elif count: | |
| self.stdout.write(self.style.WARNING(f" {label}: {count} found")) | |
| else: | |
| self.stdout.write(f" {label}: 0") | |
| # Orphaned occurrences | |
| label = "Orphaned occurrences" | |
| count = len(report.orphaned_occurrences) | |
| if fix and report.deleted_occurrences: | |
| self.stdout.write(f" {label}: {count} found, {report.deleted_occurrences} deleted") | |
| elif count: | |
| self.stdout.write(self.style.WARNING(f" {label}: {count} found")) | |
| else: | |
| self.stdout.write(f" {label}: 0") | |
| # Orphaned detections | |
| label = "Orphaned detections" | |
| count = len(report.orphaned_detections) | |
| if count: | |
| self.stdout.write(self.style.WARNING(f" {label}: {count} found")) | |
| else: | |
| self.stdout.write(f" {label}: 0") | |
| # Summary | |
| if report.has_issues and not fix: | |
| self.stdout.write(self.style.NOTICE("\nRun with --fix to repair fixable issues.")) | |
| elif report.has_issues and fix: | |
| self.stdout.write(self.style.SUCCESS("\nDone. Applied fixes.")) | |
| else: | |
| self.stdout.write(self.style.SUCCESS("\nNo issues found.")) | |
| report = check_occurrences(project_id=project_id, fix=fix) | |
| remaining_missing = max(len(report.missing_determination) - report.fixed_determinations, 0) | |
| remaining_orphaned_occurrences = max(len(report.orphaned_occurrences) - report.deleted_occurrences, 0) | |
| remaining_issues = remaining_missing + remaining_orphaned_occurrences + len(report.orphaned_detections) | |
| # Missing determination | |
| label = "Missing determination" | |
| count = len(report.missing_determination) | |
| if fix and report.fixed_determinations: | |
| self.stdout.write(f" {label}: {count} found, {report.fixed_determinations} fixed") | |
| elif count: | |
| self.stdout.write(self.style.WARNING(f" {label}: {count} found")) | |
| else: | |
| self.stdout.write(f" {label}: 0") | |
| # Orphaned occurrences | |
| label = "Orphaned occurrences" | |
| count = len(report.orphaned_occurrences) | |
| if fix and report.deleted_occurrences: | |
| self.stdout.write(f" {label}: {count} found, {report.deleted_occurrences} deleted") | |
| elif count: | |
| self.stdout.write(self.style.WARNING(f" {label}: {count} found")) | |
| else: | |
| self.stdout.write(f" {label}: 0") | |
| # Orphaned detections | |
| label = "Orphaned detections" | |
| count = len(report.orphaned_detections) | |
| if count: | |
| self.stdout.write(self.style.WARNING(f" {label}: {count} found")) | |
| else: | |
| self.stdout.write(f" {label}: 0") | |
| # Summary | |
| if report.has_issues and not fix: | |
| self.stdout.write(self.style.NOTICE("\nRun with --fix to repair fixable issues.")) | |
| elif fix and remaining_issues: | |
| self.stdout.write( | |
| self.style.WARNING( | |
| f"\nDone. Applied fixes, but {remaining_issues} issue(s) still require attention." | |
| ) | |
| ) | |
| elif fix: | |
| self.stdout.write(self.style.SUCCESS("\nDone. All fixable issues were repaired.")) | |
| else: | |
| self.stdout.write(self.style.SUCCESS("\nNo issues found.")) |
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.
In `@ami/main/management/commands/check_occurrences.py` around lines 33 - 69, The
summary footer currently prints SUCCESS on --fix runs even when unresolved
issues remain; update the final summary logic to compute remaining issues after
attempted fixes (e.g. remaining = max(0, len(report.missing_determination) -
(report.fixed_determinations or 0)) + max(0, len(report.orphaned_occurrences) -
(report.deleted_occurrences or 0)) + len(report.orphaned_detections)) and then:
if remaining > 0 print a NOTICE that unresolved issues remain (instead of
SUCCESS), if fix is true and remaining == 0 print SUCCESS ("Done. Applied
fixes."), if not fix and report.has_issues keep the existing NOTICE prompt,
otherwise print SUCCESS ("No issues found."). Use the existing symbols report,
fix, report.fixed_determinations, report.deleted_occurrences,
report.missing_determination, report.orphaned_occurrences,
report.orphaned_detections, and report.has_issues to implement this.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| import logging | ||
|
|
||
| from config import celery_app | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| @celery_app.task() | ||
| def check_occurrences_task(): | ||
| """Periodic occurrence integrity check. Report-only, logs warnings.""" | ||
| from ami.main.checks import check_occurrences | ||
|
|
||
| report = check_occurrences(fix=False) | ||
| if report.has_issues: | ||
| logger.warning("Occurrence integrity issues: %s", report.summary) | ||
| else: | ||
| logger.info("Occurrence integrity check passed") | ||
| return report.summary |
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -15,6 +15,7 @@ | |||||||
|
|
||||||||
| from ami.exports.models import DataExport | ||||||||
| from ami.jobs.models import VALID_JOB_TYPES, Job | ||||||||
| from ami.main.checks import check_occurrences | ||||||||
| from ami.main.models import ( | ||||||||
| Classification, | ||||||||
| Deployment, | ||||||||
|
|
@@ -3744,3 +3745,130 @@ def test_list_pipelines_public_project_non_member(self): | |||||||
| self.client.force_authenticate(user=non_member) | ||||||||
| response = self.client.get(url) | ||||||||
| self.assertEqual(response.status_code, status.HTTP_200_OK) | ||||||||
|
|
||||||||
|
|
||||||||
| class TestCheckOccurrences(TestCase): | ||||||||
| def setUp(self): | ||||||||
| self.project = Project.objects.create(name="Integrity Test Project") | ||||||||
| self.deployment = Deployment.objects.create(name="Test Deployment", project=self.project) | ||||||||
| self.event = Event.objects.create( | ||||||||
| deployment=self.deployment, | ||||||||
| project=self.project, | ||||||||
| start=datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc), | ||||||||
|
||||||||
| start=datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc), | |
| start=datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc), | |
| group_by="2024-01-01", |
Copilot
AI
Mar 25, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Classification.timestamp is non-nullable. This test helper creates a Classification without a timestamp, which will fail at runtime. Provide a timestamp (e.g., from the source image/event start) when creating the classification.
| terminal=True, | |
| terminal=True, | |
| timestamp=self.event.start, |
Copilot
AI
Mar 25, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same issue as above: Event.group_by is required. This other_event creation in the project filter test omits group_by and will error. Add a group_by value here as well.
| start=datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc), | |
| start=datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc), | |
| group_by=self.event.group_by, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When scoping detections by project, this uses
source_image__deployment__project_id. Elsewhere in the codebase project scoping is typically done viadetection__source_image__project_id/source_image__project_id(andSourceImage.save()backfillsprojectfromdeployment). Usingsource_image__project_idhere would be more consistent and avoids edge cases if deployment/project ever diverge.