Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ TASK-163 ] Keep attachments list up-to-date when editing a submission #792

Merged
merged 19 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
4a3eb88
Update attachments list when an Instance object is saved
noliveleger Feb 17, 2022
6014a01
Block access to replaced attachments
noliveleger Feb 18, 2022
545d0c4
Merge branch 'beta' into update-submission-attachments-on-edit
noliveleger Feb 22, 2022
cc520df
Merge branch 'release/2.023.37' into update-submission-attachments-on…
noliveleger Oct 5, 2023
eb3bc03
Update counters
noliveleger Oct 6, 2023
d9ddedd
Merge branch 'release/2.023.37' into update-submission-attachments-on…
noliveleger Oct 10, 2023
8caecb3
Fix migrations conflict
noliveleger Oct 10, 2023
47fbf94
Remove useless print
noliveleger Oct 10, 2023
cf3be16
Merge branch 'release/2.023.37' into update-submission-attachments-on…
noliveleger Oct 11, 2023
8c1027c
Update id_string for user_with_dot_name_fields fixture
noliveleger Oct 11, 2023
df19a78
Only log AssertionError if instance root tag name does not match its …
noliveleger Oct 11, 2023
af2aefd
Unit test for submission attachment edit
noliveleger Oct 12, 2023
0b13cfc
Fix mutable default list in get_path()
noliveleger Oct 12, 2023
a6c719f
Manage command to soft delete orphan attachments
noliveleger Oct 13, 2023
35cdd8c
Merge branch 'release/2.023.37' into update-submission-attachments-on…
noliveleger Oct 13, 2023
10f6d3e
Fix migrations conflict
noliveleger Oct 13, 2023
e46d796
Use model manager to filter soft-deleted attachments
noliveleger Oct 16, 2023
9aa0424
Fix double subtract of soft deleted attachments
noliveleger Oct 16, 2023
a33bd2a
Add dbindex in migration
noliveleger Oct 16, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions onadata/apps/api/tests/viewsets/test_abstract_viewset.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ def publish_xls_form(
'owner': self.user.username,
'public': False,
'public_data': False,
'description': u'transportation_2011_07_25',
'description': 'transportation_2011_07_25',
'downloadable': True,
'encrypted': False,
'id_string': u'transportation_2011_07_25',
'title': u'transportation_2011_07_25',
'id_string': 'transportation_2011_07_25',
'title': 'transportation_2011_07_25',
}

if not path:
Expand Down Expand Up @@ -192,8 +192,9 @@ def _make_submission(
forced_submission_time=None,
auth=None,
media_file=None,
use_service_account=False,
):
if auth is None:
if auth is None and not use_service_account:
auth = DigestAuth(
self.profile_data['username'], self.profile_data['password1']
)
Expand All @@ -205,6 +206,7 @@ def _make_submission(
forced_submission_time,
auth,
media_file,
use_service_account,
)

def _make_submissions(self, username=None):
Expand Down
199 changes: 198 additions & 1 deletion onadata/apps/api/tests/viewsets/test_attachment_viewset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
from django.conf import settings
from kobo_service_account.utils import get_request_headers
from rest_framework import status
from rest_framework.reverse import reverse

from onadata.apps.api.tests.viewsets.test_abstract_viewset import \
from onadata.apps.api.tests.viewsets.test_abstract_viewset import (
TestAbstractViewSet
)
from onadata.apps.api.viewsets.attachment_viewset import AttachmentViewSet
from onadata.apps.logger.models.attachment import Attachment
from onadata.apps.main.models import UserProfile


Expand Down Expand Up @@ -249,3 +252,197 @@ def test_attachment_storage_bytes_create_instance_signal(self):
self.assertEqual(xform.attachment_storage_bytes, media_file_size)
user_profile.refresh_from_db()
self.assertEqual(user_profile.attachment_storage_bytes, media_file_size)

def test_update_attachment_on_edit(self):
data = {
'owner': self.user.username,
'public': False,
'public_data': False,
'description': 'transportation_with_attachment',
'downloadable': True,
'encrypted': False,
'id_string': 'transportation_with_attachment',
'title': 'transportation_with_attachment',
}

path = os.path.join(
settings.ONADATA_DIR,
'apps',
'main',
'tests',
'fixtures',
'transportation',
'transportation_with_attachment.xls',
)
self.publish_xls_form(data=data, path=path)

xml_path = os.path.join(
self.main_directory,
'fixtures',
'transportation',
'instances',
'transport_with_attachment',
'transport_with_attachment.xml',
)
media_file_path = os.path.join(
self.main_directory,
'fixtures',
'transportation',
'instances',
'transport_with_attachment',
'1335783522563.jpg'
)
user_profile = UserProfile.objects.get(user=self.xform.user)
# Submit the same XML again, but this time include the attachment
with open(media_file_path, 'rb') as media_file:
self._make_submission(xml_path, media_file=media_file)
submission_uuid = self.xform.instances.first().uuid
self.assertEqual(self.xform.instances.count(), 1)
self.assertEqual(self.xform.instances.first().uuid, submission_uuid)
media_file_size = os.path.getsize(media_file_path)
self.xform.refresh_from_db()
self.assertEqual(self.xform.attachment_storage_bytes, media_file_size)
user_profile.refresh_from_db()
self.assertEqual(user_profile.attachment_storage_bytes, media_file_size)

xml_path = os.path.join(
self.main_directory,
'fixtures',
'transportation',
'instances',
'transport_with_attachment',
'transport_with_attachment_edit.xml',
)
media_file_path = os.path.join(
self.main_directory,
'fixtures',
'transportation',
'instances',
'transport_with_attachment',
'IMG_2235.JPG'
)
# Edit are only allowed with service account
with open(media_file_path, 'rb') as media_file:
self._make_submission(
xml_path,
media_file=media_file,
auth=False,
use_service_account=True,
)

# Validate counters are up-to-date and instances count is still one.
self.assertEqual(self.xform.instances.count(), 1)
new_media_file_size = os.path.getsize(media_file_path)
self.xform.refresh_from_db()
self.assertEqual(self.xform.attachment_storage_bytes, new_media_file_size)
user_profile.refresh_from_db()
self.assertEqual(user_profile.attachment_storage_bytes, new_media_file_size)
self.assertNotEqual(new_media_file_size, media_file_size)

instance = self.xform.instances.first()
attachment = instance.attachments.first()

# Validate previous attachment has been replaced but file still exists
soft_deleted_attachment_qs = Attachment.all_objects.filter(
instance=instance,
deleted_at__isnull=False
)
self.assertEqual(soft_deleted_attachment_qs.count(), 1)
soft_deleted_attachment = soft_deleted_attachment_qs.first()
self.assertEqual(
soft_deleted_attachment.media_file_basename, '1335783522563.jpg'
)
self.assertTrue(
soft_deleted_attachment.media_file.storage.exists(
str(soft_deleted_attachment.media_file)
)
)

# Validate that /api/v1/media endpoint returns the correct list
expected = {
'url': f'http://testserver/api/v1/media/{attachment.pk}',
'field_xpath': None,
'download_url': attachment.secure_url(),
'small_download_url': attachment.secure_url('small'),
'medium_download_url': attachment.secure_url('medium'),
'large_download_url': attachment.secure_url('large'),
'id': attachment.pk,
'xform': self.xform.pk,
'instance': instance.pk,
'mimetype': attachment.mimetype,
'filename': attachment.media_file.name
}
request = self.factory.get('/', **self.extra)
response = self.list_view(request, pk=attachment.pk)
self.assertEqual(response.status_code, 200)
self.assertTrue(response.data[0], expected)
self.assertTrue(len(response.data), 1)

# Validate that /api/v1/data endpoint returns the correct attachment list
# Unfortunately, attachments list differ from /api/v1/media
expected = {
'download_url': expected['download_url'],
'download_small_url': expected['small_download_url'],
'download_medium_url': expected['medium_download_url'],
'download_large_url': expected['large_download_url'],
'id': expected['id'],
'xform': expected['xform'],
'instance': expected['instance'],
'mimetype': expected['mimetype'],
'filename': expected['filename']
}

instance_response = self.client.get(
reverse(
'data-detail',
kwargs={'pk': self.xform.pk, 'dataid': instance.pk},
),
format='json',
)
self.assertEqual(instance_response.data['_attachments'][0], expected)
self.assertEqual(len(instance_response.data['_attachments']), 1)

def test_storage_counters_still_accurate_on_hard_delete(self):
"""
This test is not an API test, not really an Attachment unit test.
It is there to simplify the code base for attachment replacement.


"""
self.test_update_attachment_on_edit()
self.xform.refresh_from_db()

instance = self.xform.instances.first()
user_profile = UserProfile.objects.get(user=self.xform.user)
self.assertEqual(self.xform.instances.count(), 1)
self.assertNotEqual(self.xform.attachment_storage_bytes, 0)
self.assertNotEqual(user_profile.attachment_storage_bytes, 0)

total_size = sum(
[
a.media_file_size
for a in Attachment.all_objects.filter(instance=instance)
]
)

self.assertGreater(total_size, self.xform.attachment_storage_bytes)

# When deleting a submission, it (hard) deletes all attachments related
# to it, even the soft-deleted one.
self.client.delete(
reverse(
'data-detail',
kwargs={'pk': self.xform.pk, 'dataid': instance.pk},
),
format='json',
)
# Only not soft-deleted attachments should have been subtracted,
# and counters should be equal to 0
self.assertEqual(self.xform.instances.count(), 0)
self.xform.refresh_from_db()
self.assertEqual(self.xform.attachment_storage_bytes, 0)
user_profile.refresh_from_db()
self.assertEqual(user_profile.attachment_storage_bytes, 0)
self.assertFalse(
Attachment.all_objects.filter(instance=instance).exists()
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from __future__ import annotations

from django.core.management.base import BaseCommand

from onadata.apps.logger.models import (
Attachment,
Instance,
)
from onadata.apps.logger.signals import pre_delete_attachment
from onadata.libs.utils.logger_tools import get_soft_deleted_attachments


class Command(BaseCommand):

help = "Soft delete orphan attachments, i.e: Hide them in API responses"

def add_arguments(self, parser):
parser.add_argument(
'--chunks',
type=int,
default=2000,
help="Number of records to process per query"
)

def handle(self, *args, **kwargs):
chunks = kwargs['chunks']
verbosity = kwargs['verbosity']

queryset = Attachment.objects.values_list('instance_id', flat=True).distinct()

if verbosity > 1:
self.stdout.write(
f'Calculating number of instance with attachments…'
)
instances_count = queryset.count()

cpt = 1

for instance_id in queryset.iterator(chunk_size=chunks):
instance = Instance.objects.get(pk=instance_id)
if verbosity > 0:
message = '' if verbosity <= 1 else f' - {cpt}/{instances_count}'
self.stdout.write(
f'Processing Instance object #{instance_id}{message}…'
)
soft_deleted_attachments = get_soft_deleted_attachments(instance)
for soft_deleted_attachment in soft_deleted_attachments:
pre_delete_attachment(
soft_deleted_attachment, only_update_counters=True
)

cpt += 1
self.stdout.write('Done!')
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def handle(self, *args, **kwargs):
)
# aggregate total media file size for all media per xform
form_attachments = Attachment.objects.filter(
instance__xform_id=xform['pk']
instance__xform_id=xform['pk'],
).aggregate(total=Sum('media_file_size'))

if form_attachments['total']:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 3.2.15 on 2023-10-16 17:25

from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('logger', '0032_alter_daily_submission_counter_user'),
]

operations = [
migrations.AddField(
model_name='attachment',
name='deleted_at',
field=models.DateTimeField(blank=True, db_index=True, null=True),
),
]
10 changes: 10 additions & 0 deletions onadata/apps/logger/models/attachment.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ def hash_attachment_contents(contents):
return get_hash(contents)


class AttachmentDefaultManager(models.Manager):

def get_queryset(self):
return super().get_queryset().filter(deleted_at__isnull=True)


class Attachment(models.Model):
instance = models.ForeignKey(
Instance, related_name='attachments', on_delete=models.CASCADE
Expand All @@ -40,6 +46,10 @@ class Attachment(models.Model):
media_file_size = models.PositiveIntegerField(blank=True, null=True)
mimetype = models.CharField(
max_length=100, null=False, blank=True, default='')
deleted_at = models.DateTimeField(blank=True, null=True, db_index=True)

objects = AttachmentDefaultManager()
all_objects = models.Manager()

class Meta:
app_label = 'logger'
Expand Down
2 changes: 1 addition & 1 deletion onadata/apps/logger/models/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
def get_id_string_from_xml_str(xml_str):
xml_obj = clean_and_parse_xml(xml_str)
root_node = xml_obj.documentElement
id_string = root_node.getAttribute("id")
id_string = root_node.getAttribute('id')

if len(id_string) == 0:
# may be hidden in submission/data/id_string
Expand Down
2 changes: 1 addition & 1 deletion onadata/apps/logger/models/xform.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def url(self):
def data_dictionary(self):
from onadata.apps.viewer.models.data_dictionary import\
DataDictionary
return DataDictionary.objects.get(pk=self.pk)
return DataDictionary.all_objects.get(pk=self.pk)

@property
def has_instances_with_geopoints(self):
Expand Down
6 changes: 3 additions & 3 deletions onadata/apps/logger/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ def pre_delete_attachment(instance, **kwargs):
# change the name of the parameter
attachment = instance
file_size = attachment.media_file_size

only_update_counters = kwargs.pop('only_update_counters', False)
xform = attachment.instance.xform

if file_size:
if file_size and attachment.deleted_at is None:
with transaction.atomic():
"""
Update both counters at the same time (in a transaction) to avoid
Expand All @@ -49,7 +49,7 @@ def pre_delete_attachment(instance, **kwargs):
attachment_storage_bytes=F('attachment_storage_bytes') - file_size
)

if not (media_file_name := str(attachment.media_file)):
if only_update_counters or not (media_file_name := str(attachment.media_file)):
return

# Clean-up storage
Expand Down
Loading