Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete expired access logs #5097

Merged
merged 9 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions dependencies/pip/dev_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,8 @@ modilabs-python-utils==0.1.5
# via -r dependencies/pip/requirements.in
mongomock==4.1.2
# via -r dependencies/pip/dev_requirements.in
more-itertools==10.4.0
# via -r dependencies/pip/requirements.in
multidict==6.0.5
# via
# aiohttp
Expand Down
1 change: 1 addition & 0 deletions dependencies/pip/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ jsonfield
jsonschema
kombu
lxml
more-itertools
oauthlib
openpyxl
#py-gfm # Incompatible with markdown 3.x
Expand Down
2 changes: 2 additions & 0 deletions dependencies/pip/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,8 @@ markupsafe==2.1.5
# via werkzeug
modilabs-python-utils==0.1.5
# via -r dependencies/pip/requirements.in
more-itertools==10.4.0
# via -r dependencies/pip/requirements.in
multidict==6.0.5
# via
# aiohttp
Expand Down
45 changes: 45 additions & 0 deletions kobo/apps/audit_log/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from datetime import timedelta

from constance import config
from django.conf import settings
from django.utils import timezone
from more_itertools import chunked

from kobo.apps.audit_log.models import (
AccessLog,
AuditLog,
)
from kobo.celery import celery_app
from kpi.utils.log import logging


@celery_app.task()
def batch_delete_audit_logs_by_id(ids):
logs = AuditLog.objects.filter(id__in=ids)
count, _ = logs.delete()
logging.info(f'Deleted {count} audit logs from database')


@celery_app.task()
def spawn_access_log_cleaning_tasks():
"""
Enqueue tasks to delete access logs older than ACCESS_LOG_LIFESPAN days old.

ACCESS_LOG_LIFESPAN is configured via constance.
Ids are batched into multiple tasks.
"""

expiration_date = timezone.now() - timedelta(
days=config.ACCESS_LOG_LIFESPAN
)

expired_logs = (
AccessLog.objects.filter(date_created__lt=expiration_date)
.values_list('id', flat=True)
.iterator()
)
for id_batch in chunked(
expired_logs, settings.ACCESS_LOG_DELETION_BATCH_SIZE
):
# queue up a new task for each batch of expired ids
batch_delete_audit_logs_by_id.delay(ids=id_batch)
96 changes: 96 additions & 0 deletions kobo/apps/audit_log/tests/test_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from datetime import timedelta
from unittest.mock import patch

from constance.test import override_config
from django.test import override_settings
from django.utils import timezone

from kobo.apps.audit_log.models import AccessLog
from kobo.apps.audit_log.tasks import (
batch_delete_audit_logs_by_id,
spawn_access_log_cleaning_tasks,
)
from kobo.apps.kobo_auth.shortcuts import User
from kpi.tests.base_test_case import BaseTestCase


@override_config(ACCESS_LOG_LIFESPAN=1)
class AuditLogTasksTestCase(BaseTestCase):

fixtures = ['test_data']

def test_spawn_deletion_task_identifies_expired_logs(self):
"""
Test the spawning task correctly identifies which logs to delete.

Separated for easier debugging of the spawning vs deleting steps
"""
user = User.objects.get(username='someuser')
old_log = AccessLog.objects.create(
user=user,
date_created=timezone.now() - timedelta(days=1, hours=1),
)
older_log = AccessLog.objects.create(
user=user,
date_created=timezone.now() - timedelta(days=2)
)
new_log = AccessLog.objects.create(user=user)

with patch(
'kobo.apps.audit_log.tasks.batch_delete_audit_logs_by_id.delay'
) as patched_spawned_task:
spawn_access_log_cleaning_tasks()

# get the list of ids passed for any call to the actual deletion task
id_lists = [kwargs['ids'] for _, _, kwargs in patched_spawned_task.mock_calls]
# flatten the list
all_deleted_ids = [log_id for id_list in id_lists for log_id in id_list]
self.assertIn(old_log.id, all_deleted_ids)
self.assertIn(older_log.id, all_deleted_ids)
self.assertNotIn(new_log.id, all_deleted_ids)

@override_settings(ACCESS_LOG_DELETION_BATCH_SIZE=2)
def test_spawn_task_batches_ids(self):
three_days_ago = timezone.now() - timedelta(days=3)
user = User.objects.get(username='someuser')
old_log_1 = AccessLog.objects.create(
user=user, date_created=three_days_ago
)
old_log_2 = AccessLog.objects.create(
user=user, date_created=three_days_ago
)
old_log_3 = AccessLog.objects.create(
user=user, date_created=three_days_ago
)

with patch(
'kobo.apps.audit_log.tasks.batch_delete_audit_logs_by_id.delay'
) as patched_spawned_task:
spawn_access_log_cleaning_tasks()

# Should be 2 batches
self.assertEqual(patched_spawned_task.call_count, 2)
# make sure all batches were <= ACCESS_LOG_DELETION_BATCH_SIZE
all_deleted_ids = []
for task_call in patched_spawned_task.mock_calls:
_, _, kwargs = task_call
id_list = kwargs['ids']
self.assertLessEqual(len(id_list), 2)
all_deleted_ids.extend(id_list)

# make sure we queued everything for deletion
self.assertIn(old_log_1.id, all_deleted_ids)
self.assertIn(old_log_2.id, all_deleted_ids)
self.assertIn(old_log_3.id, all_deleted_ids)

def test_batch_delete_audit_logs_by_id(self):
user = User.objects.get(username='someuser')
log_1 = AccessLog.objects.create(user=user)
log_2 = AccessLog.objects.create(user=user)
log_3 = AccessLog.objects.create(user=user)
self.assertEqual(AccessLog.objects.count(), 3)

batch_delete_audit_logs_by_id(ids=[log_1.id, log_2.id])
# only log_3 should remain
self.assertEqual(AccessLog.objects.count(), 1)
self.assertEqual(AccessLog.objects.first().id, log_3.id)
13 changes: 13 additions & 0 deletions kobo/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,11 @@
),
'Email message to sent to admins on failure.',
),
'ACCESS_LOG_LIFESPAN': (
60,
'Length of time in days to keep access logs.',
'positive_int'
)
}

CONSTANCE_ADDITIONAL_FIELDS = {
Expand Down Expand Up @@ -649,6 +654,7 @@
'EXPOSE_GIT_REV',
'FRONTEND_MIN_RETRY_TIME',
'FRONTEND_MAX_RETRY_TIME',
'ACCESS_LOG_LIFESPAN',
),
'Rest Services': (
'ALLOW_UNSECURED_HOOK_ENDPOINTS',
Expand Down Expand Up @@ -1219,6 +1225,11 @@ def dj_stripe_request_callback_method():
'schedule': crontab(minute=0, hour=0),
'options': {'queue': 'kpi_low_priority_queue'}
},
'delete-expired-access-logs': {
'task': 'kobo.apps.audit_log.tasks.spawn_access_log_cleaning_tasks',
'schedule': crontab(minute=0, hour=0),
'options': {'queue': 'kpi_low_priority_queue'}
}
}


Expand Down Expand Up @@ -1765,6 +1776,8 @@ def dj_stripe_request_callback_method():
'application/x-zip-compressed'
]

ACCESS_LOG_DELETION_BATCH_SIZE = 1000

# Silence Django Guardian warning. Authentication backend is hooked, but
# Django Guardian does not recognize it because it is extended
SILENCED_SYSTEM_CHECKS = ['guardian.W001']
Expand Down
Loading