Skip to content

Commit

Permalink
Merge pull request #792 from kobotoolbox/update-submission-attachment…
Browse files Browse the repository at this point in the history
…s-on-edit

[ TASK-163 ] Keep attachments list up-to-date when editing a submission
  • Loading branch information
LMNTL authored Oct 17, 2023
2 parents 42f9545 + a33bd2a commit ca44143
Show file tree
Hide file tree
Showing 22 changed files with 539 additions and 46 deletions.
10 changes: 6 additions & 4 deletions onadata/apps/api/tests/viewsets/test_abstract_viewset.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ def publish_xls_form(
'owner': self.user.username,
'public': False,
'public_data': False,
'description': u'transportation_2011_07_25',
'description': 'transportation_2011_07_25',
'downloadable': True,
'encrypted': False,
'id_string': u'transportation_2011_07_25',
'title': u'transportation_2011_07_25',
'id_string': 'transportation_2011_07_25',
'title': 'transportation_2011_07_25',
}

if not path:
Expand Down Expand Up @@ -192,8 +192,9 @@ def _make_submission(
forced_submission_time=None,
auth=None,
media_file=None,
use_service_account=False,
):
if auth is None:
if auth is None and not use_service_account:
auth = DigestAuth(
self.profile_data['username'], self.profile_data['password1']
)
Expand All @@ -205,6 +206,7 @@ def _make_submission(
forced_submission_time,
auth,
media_file,
use_service_account,
)

def _make_submissions(self, username=None):
Expand Down
199 changes: 198 additions & 1 deletion onadata/apps/api/tests/viewsets/test_attachment_viewset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
from django.conf import settings
from kobo_service_account.utils import get_request_headers
from rest_framework import status
from rest_framework.reverse import reverse

from onadata.apps.api.tests.viewsets.test_abstract_viewset import \
from onadata.apps.api.tests.viewsets.test_abstract_viewset import (
TestAbstractViewSet
)
from onadata.apps.api.viewsets.attachment_viewset import AttachmentViewSet
from onadata.apps.logger.models.attachment import Attachment
from onadata.apps.main.models import UserProfile


Expand Down Expand Up @@ -249,3 +252,197 @@ def test_attachment_storage_bytes_create_instance_signal(self):
self.assertEqual(xform.attachment_storage_bytes, media_file_size)
user_profile.refresh_from_db()
self.assertEqual(user_profile.attachment_storage_bytes, media_file_size)

def test_update_attachment_on_edit(self):
data = {
'owner': self.user.username,
'public': False,
'public_data': False,
'description': 'transportation_with_attachment',
'downloadable': True,
'encrypted': False,
'id_string': 'transportation_with_attachment',
'title': 'transportation_with_attachment',
}

path = os.path.join(
settings.ONADATA_DIR,
'apps',
'main',
'tests',
'fixtures',
'transportation',
'transportation_with_attachment.xls',
)
self.publish_xls_form(data=data, path=path)

xml_path = os.path.join(
self.main_directory,
'fixtures',
'transportation',
'instances',
'transport_with_attachment',
'transport_with_attachment.xml',
)
media_file_path = os.path.join(
self.main_directory,
'fixtures',
'transportation',
'instances',
'transport_with_attachment',
'1335783522563.jpg'
)
user_profile = UserProfile.objects.get(user=self.xform.user)
# Submit the same XML again, but this time include the attachment
with open(media_file_path, 'rb') as media_file:
self._make_submission(xml_path, media_file=media_file)
submission_uuid = self.xform.instances.first().uuid
self.assertEqual(self.xform.instances.count(), 1)
self.assertEqual(self.xform.instances.first().uuid, submission_uuid)
media_file_size = os.path.getsize(media_file_path)
self.xform.refresh_from_db()
self.assertEqual(self.xform.attachment_storage_bytes, media_file_size)
user_profile.refresh_from_db()
self.assertEqual(user_profile.attachment_storage_bytes, media_file_size)

xml_path = os.path.join(
self.main_directory,
'fixtures',
'transportation',
'instances',
'transport_with_attachment',
'transport_with_attachment_edit.xml',
)
media_file_path = os.path.join(
self.main_directory,
'fixtures',
'transportation',
'instances',
'transport_with_attachment',
'IMG_2235.JPG'
)
# Edit are only allowed with service account
with open(media_file_path, 'rb') as media_file:
self._make_submission(
xml_path,
media_file=media_file,
auth=False,
use_service_account=True,
)

# Validate counters are up-to-date and instances count is still one.
self.assertEqual(self.xform.instances.count(), 1)
new_media_file_size = os.path.getsize(media_file_path)
self.xform.refresh_from_db()
self.assertEqual(self.xform.attachment_storage_bytes, new_media_file_size)
user_profile.refresh_from_db()
self.assertEqual(user_profile.attachment_storage_bytes, new_media_file_size)
self.assertNotEqual(new_media_file_size, media_file_size)

instance = self.xform.instances.first()
attachment = instance.attachments.first()

# Validate previous attachment has been replaced but file still exists
soft_deleted_attachment_qs = Attachment.all_objects.filter(
instance=instance,
deleted_at__isnull=False
)
self.assertEqual(soft_deleted_attachment_qs.count(), 1)
soft_deleted_attachment = soft_deleted_attachment_qs.first()
self.assertEqual(
soft_deleted_attachment.media_file_basename, '1335783522563.jpg'
)
self.assertTrue(
soft_deleted_attachment.media_file.storage.exists(
str(soft_deleted_attachment.media_file)
)
)

# Validate that /api/v1/media endpoint returns the correct list
expected = {
'url': f'http://testserver/api/v1/media/{attachment.pk}',
'field_xpath': None,
'download_url': attachment.secure_url(),
'small_download_url': attachment.secure_url('small'),
'medium_download_url': attachment.secure_url('medium'),
'large_download_url': attachment.secure_url('large'),
'id': attachment.pk,
'xform': self.xform.pk,
'instance': instance.pk,
'mimetype': attachment.mimetype,
'filename': attachment.media_file.name
}
request = self.factory.get('/', **self.extra)
response = self.list_view(request, pk=attachment.pk)
self.assertEqual(response.status_code, 200)
self.assertTrue(response.data[0], expected)
self.assertTrue(len(response.data), 1)

# Validate that /api/v1/data endpoint returns the correct attachment list
# Unfortunately, attachments list differ from /api/v1/media
expected = {
'download_url': expected['download_url'],
'download_small_url': expected['small_download_url'],
'download_medium_url': expected['medium_download_url'],
'download_large_url': expected['large_download_url'],
'id': expected['id'],
'xform': expected['xform'],
'instance': expected['instance'],
'mimetype': expected['mimetype'],
'filename': expected['filename']
}

instance_response = self.client.get(
reverse(
'data-detail',
kwargs={'pk': self.xform.pk, 'dataid': instance.pk},
),
format='json',
)
self.assertEqual(instance_response.data['_attachments'][0], expected)
self.assertEqual(len(instance_response.data['_attachments']), 1)

def test_storage_counters_still_accurate_on_hard_delete(self):
"""
This test is not an API test, not really an Attachment unit test.
It is there to simplify the code base for attachment replacement.
"""
self.test_update_attachment_on_edit()
self.xform.refresh_from_db()

instance = self.xform.instances.first()
user_profile = UserProfile.objects.get(user=self.xform.user)
self.assertEqual(self.xform.instances.count(), 1)
self.assertNotEqual(self.xform.attachment_storage_bytes, 0)
self.assertNotEqual(user_profile.attachment_storage_bytes, 0)

total_size = sum(
[
a.media_file_size
for a in Attachment.all_objects.filter(instance=instance)
]
)

self.assertGreater(total_size, self.xform.attachment_storage_bytes)

# When deleting a submission, it (hard) deletes all attachments related
# to it, even the soft-deleted one.
self.client.delete(
reverse(
'data-detail',
kwargs={'pk': self.xform.pk, 'dataid': instance.pk},
),
format='json',
)
# Only not soft-deleted attachments should have been subtracted,
# and counters should be equal to 0
self.assertEqual(self.xform.instances.count(), 0)
self.xform.refresh_from_db()
self.assertEqual(self.xform.attachment_storage_bytes, 0)
user_profile.refresh_from_db()
self.assertEqual(user_profile.attachment_storage_bytes, 0)
self.assertFalse(
Attachment.all_objects.filter(instance=instance).exists()
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from __future__ import annotations

from django.core.management.base import BaseCommand

from onadata.apps.logger.models import (
Attachment,
Instance,
)
from onadata.apps.logger.signals import pre_delete_attachment
from onadata.libs.utils.logger_tools import get_soft_deleted_attachments


class Command(BaseCommand):

help = "Soft delete orphan attachments, i.e: Hide them in API responses"

def add_arguments(self, parser):
parser.add_argument(
'--chunks',
type=int,
default=2000,
help="Number of records to process per query"
)

def handle(self, *args, **kwargs):
chunks = kwargs['chunks']
verbosity = kwargs['verbosity']

queryset = Attachment.objects.values_list('instance_id', flat=True).distinct()

if verbosity > 1:
self.stdout.write(
f'Calculating number of instance with attachments…'
)
instances_count = queryset.count()

cpt = 1

for instance_id in queryset.iterator(chunk_size=chunks):
instance = Instance.objects.get(pk=instance_id)
if verbosity > 0:
message = '' if verbosity <= 1 else f' - {cpt}/{instances_count}'
self.stdout.write(
f'Processing Instance object #{instance_id}{message}…'
)
soft_deleted_attachments = get_soft_deleted_attachments(instance)
for soft_deleted_attachment in soft_deleted_attachments:
pre_delete_attachment(
soft_deleted_attachment, only_update_counters=True
)

cpt += 1
self.stdout.write('Done!')
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def handle(self, *args, **kwargs):
)
# aggregate total media file size for all media per xform
form_attachments = Attachment.objects.filter(
instance__xform_id=xform['pk']
instance__xform_id=xform['pk'],
).aggregate(total=Sum('media_file_size'))

if form_attachments['total']:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 3.2.15 on 2023-10-16 17:25

from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('logger', '0032_alter_daily_submission_counter_user'),
]

operations = [
migrations.AddField(
model_name='attachment',
name='deleted_at',
field=models.DateTimeField(blank=True, db_index=True, null=True),
),
]
10 changes: 10 additions & 0 deletions onadata/apps/logger/models/attachment.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ def hash_attachment_contents(contents):
return get_hash(contents)


class AttachmentDefaultManager(models.Manager):

def get_queryset(self):
return super().get_queryset().filter(deleted_at__isnull=True)


class Attachment(models.Model):
instance = models.ForeignKey(
Instance, related_name='attachments', on_delete=models.CASCADE
Expand All @@ -40,6 +46,10 @@ class Attachment(models.Model):
media_file_size = models.PositiveIntegerField(blank=True, null=True)
mimetype = models.CharField(
max_length=100, null=False, blank=True, default='')
deleted_at = models.DateTimeField(blank=True, null=True, db_index=True)

objects = AttachmentDefaultManager()
all_objects = models.Manager()

class Meta:
app_label = 'logger'
Expand Down
2 changes: 1 addition & 1 deletion onadata/apps/logger/models/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
def get_id_string_from_xml_str(xml_str):
xml_obj = clean_and_parse_xml(xml_str)
root_node = xml_obj.documentElement
id_string = root_node.getAttribute("id")
id_string = root_node.getAttribute('id')

if len(id_string) == 0:
# may be hidden in submission/data/id_string
Expand Down
2 changes: 1 addition & 1 deletion onadata/apps/logger/models/xform.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def url(self):
def data_dictionary(self):
from onadata.apps.viewer.models.data_dictionary import\
DataDictionary
return DataDictionary.objects.get(pk=self.pk)
return DataDictionary.all_objects.get(pk=self.pk)

@property
def has_instances_with_geopoints(self):
Expand Down
6 changes: 3 additions & 3 deletions onadata/apps/logger/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ def pre_delete_attachment(instance, **kwargs):
# change the name of the parameter
attachment = instance
file_size = attachment.media_file_size

only_update_counters = kwargs.pop('only_update_counters', False)
xform = attachment.instance.xform

if file_size:
if file_size and attachment.deleted_at is None:
with transaction.atomic():
"""
Update both counters at the same time (in a transaction) to avoid
Expand All @@ -49,7 +49,7 @@ def pre_delete_attachment(instance, **kwargs):
attachment_storage_bytes=F('attachment_storage_bytes') - file_size
)

if not (media_file_name := str(attachment.media_file)):
if only_update_counters or not (media_file_name := str(attachment.media_file)):
return

# Clean-up storage
Expand Down
Loading

0 comments on commit ca44143

Please sign in to comment.