Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cleanup jobs as Celery periodic tasks #579

Open
wants to merge 21 commits into
base: beta
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
2a70d49
Draft - script to detect which attachment files are orphans
noliveleger Oct 9, 2018
4bc9397
Checks exports too
noliveleger Oct 11, 2018
d934229
Merge branch 'master' into orphans_cleaner
noliveleger Apr 17, 2019
a00beae
Delete file when it's not detected in DB
noliveleger Apr 17, 2019
d441be6
Delete all files with their previous versions
noliveleger Apr 17, 2019
208d82e
Copy lock mechanism from kpi
noliveleger Oct 1, 2019
2db51ac
Merge branch 'orphans_cleaner' into lock-decorator-redis
noliveleger Oct 2, 2019
470a554
Merge branch '2155_kpi_two_databases' into lock-decorator-redis
noliveleger Oct 2, 2019
cf5c5f6
Stantardize remove command names and add these commands as periodic t…
noliveleger Oct 2, 2019
68269ec
Force keep kpi revisions
noliveleger Oct 4, 2019
921e822
Do not delete reversion from Asset model
noliveleger Oct 7, 2019
f39ea41
Merge branch 'master' into cron-cleanup-jobs
noliveleger May 14, 2020
6b5160d
Merge branch 'beta' into cron-cleanup-jobs
noliveleger Feb 18, 2022
9f010b3
Support Filesystem Storage
noliveleger Feb 18, 2022
3508cff
utc-timezone
noliveleger Mar 10, 2022
2aeccfc
Use UTC as timezone
noliveleger Mar 10, 2022
12d0a40
Merge branch 'beta' into cron-cleanup-jobs
noliveleger Mar 10, 2022
0cf1c28
Add customizable session prefix for redis, move settings to base
noliveleger Mar 10, 2022
24435a8
Merge branch 'beta' into cron-cleanup-jobs
noliveleger Mar 10, 2022
1d01d90
Merge branch 'session-with-custom-prefix' into cron-cleanup-jobs
noliveleger Mar 10, 2022
98b89e8
Merge branch 'utc-timezone' into cron-cleanup-jobs
noliveleger Mar 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions onadata/apps/api/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import re
import time
from datetime import datetime
from urllib.parse import unquote

import requests
import rest_framework.views as rest_framework_views
Expand All @@ -31,8 +30,6 @@
from onadata.libs.utils.logger_tools import (
publish_form,
response_with_mimetype_and_name,
OPEN_ROSA_VERSION_HEADER,
OPEN_ROSA_VERSION,
)
from onadata.libs.utils.user_auth import (
check_and_set_form_by_id,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,40 +1,36 @@
#!/usr/bin/env python
# vim: ai ts=4 sts=4 et sw=4 fileencoding=utf-8
# coding: utf-8
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from django.core.management.base import BaseCommand
from django.db import transaction
from django.db.models import Sum
from django.db.models.aggregates import Count
from django.utils import timezone

from onadata.apps.logger.models.attachment import Attachment
from onadata.apps.logger.models.instance import Instance
from onadata.apps.viewer.models.parsed_instance import ParsedInstance
from onadata.apps.logger.models.xform import XForm
from onadata.libs.utils.common_tags import MONGO_STRFTIME


class Command(BaseCommand):

help = "Deletes duplicated submissions (i.e same `uuid` and same `xml`)"
help = "Removes duplicated submissions (i.e same `uuid` and same `xml`)"

def __init__(self, **kwargs):
super().__init__(**kwargs)
self.__vaccuum = False
self.__vacuum = False
self.__users = set([])

def add_arguments(self, parser):
super().add_arguments(parser)

parser.add_argument(
"--user",
'--user',
default=None,
help="Specify a username to clean up only their forms",
help='Specify a username to clean up only their forms',
)

parser.add_argument(
"--xform",
'--xform',
default=None,
help="Specify a XForm's `id_string` to clean up only this form",
)
Expand All @@ -51,18 +47,20 @@ def handle(self, *args, **options):
if username:
query = query.filter(xform__user__username=username)

query = query.values_list('uuid', flat=True)\
.annotate(count_uuid=Count('uuid'))\
.filter(count_uuid__gt=1)\
query = (
query.values_list('uuid', flat=True)
.annotate(count_uuid=Count('uuid'))
.filter(count_uuid__gt=1)
.distinct()
)

for uuid in query.all():

duplicated_query = Instance.objects.filter(uuid=uuid)

instances_with_same_uuid = duplicated_query.values_list('id',
'xml_hash')\
.order_by('xml_hash', 'date_created')
instances_with_same_uuid = duplicated_query.values_list(
'id', 'xml_hash'
).order_by('xml_hash', 'date_created')
xml_hash_ref = None
instance_id_ref = None

Expand All @@ -84,24 +82,26 @@ def handle(self, *args, **options):
self.__clean_up(instance_id_ref,
duplicated_instance_ids)

if not self.__vaccuum:
if not self.__vacuum:
self.stdout.write('No instances have been purged.')
else:
# Update number of submissions for each user.
for user_ in list(self.__users):
result = XForm.objects.filter(user_id=user_.id)\
.aggregate(count=Sum('num_of_submissions'))
result = XForm.objects.filter(user_id=user_.id).aggregate(
count=Sum('num_of_submissions')
)
user_.profile.num_of_submissions = result['count']
self.stdout.write(
"\tUpdating `{}`'s number of submissions".format(
user_.username))
f"\tUpdating `{user_.username}`'s number of submissions"
)
user_.profile.save(update_fields=['num_of_submissions'])
self.stdout.write(
'\t\tDone! New number: {}'.format(result['count']))
f"\t\tDone! New number: {result['count']}"
)

def __clean_up(self, instance_id_ref, duplicated_instance_ids):
if instance_id_ref is not None and len(duplicated_instance_ids) > 0:
self.__vaccuum = True
self.__vacuum = True
with transaction.atomic():
self.stdout.write('Link attachments to instance #{}'.format(
instance_id_ref))
Expand All @@ -115,12 +115,15 @@ def __clean_up(self, instance_id_ref, duplicated_instance_ids):
.get(id=instance_id_ref)
main_instance.parsed_instance.save()

self.stdout.write('\tPurging instances: {}'.format(
duplicated_instance_ids))
Instance.objects.select_for_update()\
.filter(id__in=duplicated_instance_ids).delete()
ParsedInstance.objects.select_for_update()\
.filter(instance_id__in=duplicated_instance_ids).delete()
self.stdout.write(
'\tPurging instances: {}'.format(duplicated_instance_ids)
)
Instance.objects.select_for_update().filter(
id__in=duplicated_instance_ids
).delete()
ParsedInstance.objects.select_for_update().filter(
instance_id__in=duplicated_instance_ids
).delete()
settings.MONGO_DB.instances.remove(
{'_id': {'$in': duplicated_instance_ids}}
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

class Command(RevisionCommand):

help = "Deletes revisions (by chunks) for a given app [and model]"
help = "Removes revisions (by chunks) for a given app [and model]"

def add_arguments(self, parser):
super().add_arguments(parser)
Expand Down Expand Up @@ -54,8 +54,14 @@ def handle(self, *app_labels, **options):
keep_revision_ids = set()
# By default, delete nothing.
can_delete = False

# Get all revisions for the given revision manager and model.
for model in self.get_models(options):
# Force keep assets' revisions even if `self.models()` returns only
# registered models.
if model._meta.verbose_name == 'asset':
continue

if verbosity >= 1:
self.stdout.write("Finding stale revisions for {name}".format(
name=model._meta.verbose_name,
Expand Down
Loading