diff --git a/onadata/apps/api/tests/viewsets/test_abstract_viewset.py b/onadata/apps/api/tests/viewsets/test_abstract_viewset.py index 64628318f..eca65dc9b 100644 --- a/onadata/apps/api/tests/viewsets/test_abstract_viewset.py +++ b/onadata/apps/api/tests/viewsets/test_abstract_viewset.py @@ -60,11 +60,11 @@ def publish_xls_form( 'owner': self.user.username, 'public': False, 'public_data': False, - 'description': u'transportation_2011_07_25', + 'description': 'transportation_2011_07_25', 'downloadable': True, 'encrypted': False, - 'id_string': u'transportation_2011_07_25', - 'title': u'transportation_2011_07_25', + 'id_string': 'transportation_2011_07_25', + 'title': 'transportation_2011_07_25', } if not path: @@ -192,8 +192,9 @@ def _make_submission( forced_submission_time=None, auth=None, media_file=None, + use_service_account=False, ): - if auth is None: + if auth is None and not use_service_account: auth = DigestAuth( self.profile_data['username'], self.profile_data['password1'] ) @@ -205,6 +206,7 @@ def _make_submission( forced_submission_time, auth, media_file, + use_service_account, ) def _make_submissions(self, username=None): diff --git a/onadata/apps/api/tests/viewsets/test_attachment_viewset.py b/onadata/apps/api/tests/viewsets/test_attachment_viewset.py index bd31c0890..61e32d099 100644 --- a/onadata/apps/api/tests/viewsets/test_attachment_viewset.py +++ b/onadata/apps/api/tests/viewsets/test_attachment_viewset.py @@ -5,10 +5,13 @@ from django.conf import settings from kobo_service_account.utils import get_request_headers from rest_framework import status +from rest_framework.reverse import reverse -from onadata.apps.api.tests.viewsets.test_abstract_viewset import \ +from onadata.apps.api.tests.viewsets.test_abstract_viewset import ( TestAbstractViewSet +) from onadata.apps.api.viewsets.attachment_viewset import AttachmentViewSet +from onadata.apps.logger.models.attachment import Attachment from onadata.apps.main.models import UserProfile @@ -249,3 +252,197 @@ def test_attachment_storage_bytes_create_instance_signal(self): self.assertEqual(xform.attachment_storage_bytes, media_file_size) user_profile.refresh_from_db() self.assertEqual(user_profile.attachment_storage_bytes, media_file_size) + + def test_update_attachment_on_edit(self): + data = { + 'owner': self.user.username, + 'public': False, + 'public_data': False, + 'description': 'transportation_with_attachment', + 'downloadable': True, + 'encrypted': False, + 'id_string': 'transportation_with_attachment', + 'title': 'transportation_with_attachment', + } + + path = os.path.join( + settings.ONADATA_DIR, + 'apps', + 'main', + 'tests', + 'fixtures', + 'transportation', + 'transportation_with_attachment.xls', + ) + self.publish_xls_form(data=data, path=path) + + xml_path = os.path.join( + self.main_directory, + 'fixtures', + 'transportation', + 'instances', + 'transport_with_attachment', + 'transport_with_attachment.xml', + ) + media_file_path = os.path.join( + self.main_directory, + 'fixtures', + 'transportation', + 'instances', + 'transport_with_attachment', + '1335783522563.jpg' + ) + user_profile = UserProfile.objects.get(user=self.xform.user) + # Submit the same XML again, but this time include the attachment + with open(media_file_path, 'rb') as media_file: + self._make_submission(xml_path, media_file=media_file) + submission_uuid = self.xform.instances.first().uuid + self.assertEqual(self.xform.instances.count(), 1) + self.assertEqual(self.xform.instances.first().uuid, submission_uuid) + media_file_size = os.path.getsize(media_file_path) + self.xform.refresh_from_db() + self.assertEqual(self.xform.attachment_storage_bytes, media_file_size) + user_profile.refresh_from_db() + self.assertEqual(user_profile.attachment_storage_bytes, media_file_size) + + xml_path = os.path.join( + self.main_directory, + 'fixtures', + 'transportation', + 'instances', + 'transport_with_attachment', + 'transport_with_attachment_edit.xml', + ) + media_file_path = os.path.join( + self.main_directory, + 'fixtures', + 'transportation', + 'instances', + 'transport_with_attachment', + 'IMG_2235.JPG' + ) + # Edit are only allowed with service account + with open(media_file_path, 'rb') as media_file: + self._make_submission( + xml_path, + media_file=media_file, + auth=False, + use_service_account=True, + ) + + # Validate counters are up-to-date and instances count is still one. + self.assertEqual(self.xform.instances.count(), 1) + new_media_file_size = os.path.getsize(media_file_path) + self.xform.refresh_from_db() + self.assertEqual(self.xform.attachment_storage_bytes, new_media_file_size) + user_profile.refresh_from_db() + self.assertEqual(user_profile.attachment_storage_bytes, new_media_file_size) + self.assertNotEqual(new_media_file_size, media_file_size) + + instance = self.xform.instances.first() + attachment = instance.attachments.first() + + # Validate previous attachment has been replaced but file still exists + soft_deleted_attachment_qs = Attachment.all_objects.filter( + instance=instance, + deleted_at__isnull=False + ) + self.assertEqual(soft_deleted_attachment_qs.count(), 1) + soft_deleted_attachment = soft_deleted_attachment_qs.first() + self.assertEqual( + soft_deleted_attachment.media_file_basename, '1335783522563.jpg' + ) + self.assertTrue( + soft_deleted_attachment.media_file.storage.exists( + str(soft_deleted_attachment.media_file) + ) + ) + + # Validate that /api/v1/media endpoint returns the correct list + expected = { + 'url': f'http://testserver/api/v1/media/{attachment.pk}', + 'field_xpath': None, + 'download_url': attachment.secure_url(), + 'small_download_url': attachment.secure_url('small'), + 'medium_download_url': attachment.secure_url('medium'), + 'large_download_url': attachment.secure_url('large'), + 'id': attachment.pk, + 'xform': self.xform.pk, + 'instance': instance.pk, + 'mimetype': attachment.mimetype, + 'filename': attachment.media_file.name + } + request = self.factory.get('/', **self.extra) + response = self.list_view(request, pk=attachment.pk) + self.assertEqual(response.status_code, 200) + self.assertTrue(response.data[0], expected) + self.assertTrue(len(response.data), 1) + + # Validate that /api/v1/data endpoint returns the correct attachment list + # Unfortunately, attachments list differ from /api/v1/media + expected = { + 'download_url': expected['download_url'], + 'download_small_url': expected['small_download_url'], + 'download_medium_url': expected['medium_download_url'], + 'download_large_url': expected['large_download_url'], + 'id': expected['id'], + 'xform': expected['xform'], + 'instance': expected['instance'], + 'mimetype': expected['mimetype'], + 'filename': expected['filename'] + } + + instance_response = self.client.get( + reverse( + 'data-detail', + kwargs={'pk': self.xform.pk, 'dataid': instance.pk}, + ), + format='json', + ) + self.assertEqual(instance_response.data['_attachments'][0], expected) + self.assertEqual(len(instance_response.data['_attachments']), 1) + + def test_storage_counters_still_accurate_on_hard_delete(self): + """ + This test is not an API test, not really an Attachment unit test. + It is there to simplify the code base for attachment replacement. + + + """ + self.test_update_attachment_on_edit() + self.xform.refresh_from_db() + + instance = self.xform.instances.first() + user_profile = UserProfile.objects.get(user=self.xform.user) + self.assertEqual(self.xform.instances.count(), 1) + self.assertNotEqual(self.xform.attachment_storage_bytes, 0) + self.assertNotEqual(user_profile.attachment_storage_bytes, 0) + + total_size = sum( + [ + a.media_file_size + for a in Attachment.all_objects.filter(instance=instance) + ] + ) + + self.assertGreater(total_size, self.xform.attachment_storage_bytes) + + # When deleting a submission, it (hard) deletes all attachments related + # to it, even the soft-deleted one. + self.client.delete( + reverse( + 'data-detail', + kwargs={'pk': self.xform.pk, 'dataid': instance.pk}, + ), + format='json', + ) + # Only not soft-deleted attachments should have been subtracted, + # and counters should be equal to 0 + self.assertEqual(self.xform.instances.count(), 0) + self.xform.refresh_from_db() + self.assertEqual(self.xform.attachment_storage_bytes, 0) + user_profile.refresh_from_db() + self.assertEqual(user_profile.attachment_storage_bytes, 0) + self.assertFalse( + Attachment.all_objects.filter(instance=instance).exists() + ) diff --git a/onadata/apps/logger/management/commands/soft_delete_orphan_attachments.py b/onadata/apps/logger/management/commands/soft_delete_orphan_attachments.py new file mode 100644 index 000000000..586bacfe0 --- /dev/null +++ b/onadata/apps/logger/management/commands/soft_delete_orphan_attachments.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from django.core.management.base import BaseCommand + +from onadata.apps.logger.models import ( + Attachment, + Instance, +) +from onadata.apps.logger.signals import pre_delete_attachment +from onadata.libs.utils.logger_tools import get_soft_deleted_attachments + + +class Command(BaseCommand): + + help = "Soft delete orphan attachments, i.e: Hide them in API responses" + + def add_arguments(self, parser): + parser.add_argument( + '--chunks', + type=int, + default=2000, + help="Number of records to process per query" + ) + + def handle(self, *args, **kwargs): + chunks = kwargs['chunks'] + verbosity = kwargs['verbosity'] + + queryset = Attachment.objects.values_list('instance_id', flat=True).distinct() + + if verbosity > 1: + self.stdout.write( + f'Calculating number of instance with attachments…' + ) + instances_count = queryset.count() + + cpt = 1 + + for instance_id in queryset.iterator(chunk_size=chunks): + instance = Instance.objects.get(pk=instance_id) + if verbosity > 0: + message = '' if verbosity <= 1 else f' - {cpt}/{instances_count}' + self.stdout.write( + f'Processing Instance object #{instance_id}{message}…' + ) + soft_deleted_attachments = get_soft_deleted_attachments(instance) + for soft_deleted_attachment in soft_deleted_attachments: + pre_delete_attachment( + soft_deleted_attachment, only_update_counters=True + ) + + cpt += 1 + self.stdout.write('Done!') diff --git a/onadata/apps/logger/management/commands/update_attachment_storage_bytes.py b/onadata/apps/logger/management/commands/update_attachment_storage_bytes.py index 1718e88a3..ad7ff24bb 100644 --- a/onadata/apps/logger/management/commands/update_attachment_storage_bytes.py +++ b/onadata/apps/logger/management/commands/update_attachment_storage_bytes.py @@ -125,7 +125,7 @@ def handle(self, *args, **kwargs): ) # aggregate total media file size for all media per xform form_attachments = Attachment.objects.filter( - instance__xform_id=xform['pk'] + instance__xform_id=xform['pk'], ).aggregate(total=Sum('media_file_size')) if form_attachments['total']: diff --git a/onadata/apps/logger/migrations/0033_add_deleted_at_field_to_attachment.py b/onadata/apps/logger/migrations/0033_add_deleted_at_field_to_attachment.py new file mode 100644 index 000000000..d2addc60a --- /dev/null +++ b/onadata/apps/logger/migrations/0033_add_deleted_at_field_to_attachment.py @@ -0,0 +1,20 @@ +# Generated by Django 3.2.15 on 2023-10-16 17:25 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('logger', '0032_alter_daily_submission_counter_user'), + ] + + operations = [ + migrations.AddField( + model_name='attachment', + name='deleted_at', + field=models.DateTimeField(blank=True, db_index=True, null=True), + ), + ] diff --git a/onadata/apps/logger/models/attachment.py b/onadata/apps/logger/models/attachment.py index 4a50d3c4f..e6dcf2852 100644 --- a/onadata/apps/logger/models/attachment.py +++ b/onadata/apps/logger/models/attachment.py @@ -28,6 +28,12 @@ def hash_attachment_contents(contents): return get_hash(contents) +class AttachmentDefaultManager(models.Manager): + + def get_queryset(self): + return super().get_queryset().filter(deleted_at__isnull=True) + + class Attachment(models.Model): instance = models.ForeignKey( Instance, related_name='attachments', on_delete=models.CASCADE @@ -40,6 +46,10 @@ class Attachment(models.Model): media_file_size = models.PositiveIntegerField(blank=True, null=True) mimetype = models.CharField( max_length=100, null=False, blank=True, default='') + deleted_at = models.DateTimeField(blank=True, null=True, db_index=True) + + objects = AttachmentDefaultManager() + all_objects = models.Manager() class Meta: app_label = 'logger' diff --git a/onadata/apps/logger/models/instance.py b/onadata/apps/logger/models/instance.py index 19437d838..1ac617449 100644 --- a/onadata/apps/logger/models/instance.py +++ b/onadata/apps/logger/models/instance.py @@ -54,7 +54,7 @@ def get_id_string_from_xml_str(xml_str): xml_obj = clean_and_parse_xml(xml_str) root_node = xml_obj.documentElement - id_string = root_node.getAttribute("id") + id_string = root_node.getAttribute('id') if len(id_string) == 0: # may be hidden in submission/data/id_string diff --git a/onadata/apps/logger/models/xform.py b/onadata/apps/logger/models/xform.py index 2d075936a..146a31e9a 100644 --- a/onadata/apps/logger/models/xform.py +++ b/onadata/apps/logger/models/xform.py @@ -132,7 +132,7 @@ def url(self): def data_dictionary(self): from onadata.apps.viewer.models.data_dictionary import\ DataDictionary - return DataDictionary.objects.get(pk=self.pk) + return DataDictionary.all_objects.get(pk=self.pk) @property def has_instances_with_geopoints(self): diff --git a/onadata/apps/logger/signals.py b/onadata/apps/logger/signals.py index 263e6cf62..e8941710f 100644 --- a/onadata/apps/logger/signals.py +++ b/onadata/apps/logger/signals.py @@ -31,10 +31,10 @@ def pre_delete_attachment(instance, **kwargs): # change the name of the parameter attachment = instance file_size = attachment.media_file_size - + only_update_counters = kwargs.pop('only_update_counters', False) xform = attachment.instance.xform - if file_size: + if file_size and attachment.deleted_at is None: with transaction.atomic(): """ Update both counters at the same time (in a transaction) to avoid @@ -49,7 +49,7 @@ def pre_delete_attachment(instance, **kwargs): attachment_storage_bytes=F('attachment_storage_bytes') - file_size ) - if not (media_file_name := str(attachment.media_file)): + if only_update_counters or not (media_file_name := str(attachment.media_file)): return # Clean-up storage diff --git a/onadata/apps/logger/xform_instance_parser.py b/onadata/apps/logger/xform_instance_parser.py index 45da8997f..cea69d0ba 100644 --- a/onadata/apps/logger/xform_instance_parser.py +++ b/onadata/apps/logger/xform_instance_parser.py @@ -119,20 +119,22 @@ def get_deprecated_uuid_from_xml(xml): return None -def clean_and_parse_xml(xml_string): +def clean_and_parse_xml(xml_string: str) -> Node: clean_xml_str = xml_string.strip() - clean_xml_str = re.sub(r">\s+<", "><", smart_str(clean_xml_str)) + clean_xml_str = re.sub(r'>\s+<', '><', smart_str(clean_xml_str)) xml_obj = minidom.parseString(clean_xml_str) return xml_obj -def _xml_node_to_dict(node, repeats=[]): +def _xml_node_to_dict(node: Node, repeats: list = []) -> dict: assert isinstance(node, Node) if len(node.childNodes) == 0: # there's no data for this leaf node return None - elif len(node.childNodes) == 1 and \ - node.childNodes[0].nodeType == node.TEXT_NODE: + elif ( + len(node.childNodes) == 1 + and node.childNodes[0].nodeType == node.TEXT_NODE + ): # there is data for this leaf node return {node.nodeName: node.childNodes[0].nodeValue} else: @@ -329,9 +331,10 @@ def _set_attributes(self): try: assert key not in self._attributes except AssertionError: - logger = logging.getLogger("console_logger") - logger.debug("Skipping duplicate attribute: %s" - " with value %s" % (key, value)) + logger = logging.getLogger('console_logger') + #logger.debug( + # f'Skipping duplicate attribute: {key} with value {value}' + #) else: self._attributes[key] = value @@ -357,3 +360,44 @@ def xform_instance_to_flat_dict(xml_str, data_dictionary): def parse_xform_instance(xml_str, data_dictionary): parser = XFormInstanceParser(xml_str, data_dictionary) return parser.get_flat_dict_with_attributes() + + +def get_xform_media_question_xpaths( + xform: 'onadata.apps.logger.models.XForm', +) -> list: + logger = logging.getLogger('console_logger') + parser = XFormInstanceParser(xform.xml, xform.data_dictionary()) + all_attributes = _get_all_attributes(parser.get_root_node()) + media_field_xpaths = [] + + # This code expects that the attributes from Enketo Express are **always** + # sent in the same order. + # For example: + # + # `ref` attribute should always come right after `mediatype` + for (key, value) in all_attributes: + if key.lower() == 'mediatype': + try: + next_attribute = next(all_attributes) + except StopIteration: + logger.error( + f'`ref` attribute seems to be missing in {xform.xml}', + exc_info=True, + ) + continue + + next_attribute_key, next_attribute_value = next_attribute + try: + assert next_attribute_key.lower() == 'ref' + except AssertionError: + logger = logging.getLogger('console_logger') + logger.error( + f'`ref` should come after `mediatype:{value}` in {xform.xml}', + exc_info=True, + ) + continue + + # We are returning XPaths, leading slash should be removed + media_field_xpaths.append(next_attribute_value[1:]) + + return media_field_xpaths diff --git a/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/1335783522563.jpg b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/1335783522563.jpg new file mode 100755 index 000000000..e8d953e38 Binary files /dev/null and b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/1335783522563.jpg differ diff --git a/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/IMG_2235.JPG b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/IMG_2235.JPG new file mode 100644 index 000000000..ef7476d31 Binary files /dev/null and b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/IMG_2235.JPG differ diff --git a/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/transport_with_attachment.xml b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/transport_with_attachment.xml new file mode 100755 index 000000000..daa901514 --- /dev/null +++ b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/transport_with_attachment.xml @@ -0,0 +1,23 @@ + + + + none + + + + + + + + + + + + + + + 1335783522563.jpg + + uuid:5b2cc313-fc09-437e-8149-fcd32f695d41 + + diff --git a/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/transport_with_attachment_edit.xml b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/transport_with_attachment_edit.xml new file mode 100755 index 000000000..d914a12f0 --- /dev/null +++ b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/transport_with_attachment_edit.xml @@ -0,0 +1,24 @@ + + + + none + + + + + + + + + + + + + + + IMG_2235.JPG + + uuid:30362a89-73f2-4b64-8838-867c2bd3ea35 + uuid:5b2cc313-fc09-437e-8149-fcd32f695d41 + + diff --git a/onadata/apps/main/tests/fixtures/transportation/transportation_with_attachment.xls b/onadata/apps/main/tests/fixtures/transportation/transportation_with_attachment.xls new file mode 100644 index 000000000..b44a43efe Binary files /dev/null and b/onadata/apps/main/tests/fixtures/transportation/transportation_with_attachment.xls differ diff --git a/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xls b/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xls index feb40833c..7afb20942 100644 Binary files a/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xls and b/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xls differ diff --git a/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xml b/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xml index 57b192afa..2c72706c8 100644 --- a/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xml +++ b/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xml @@ -1 +1,16 @@ -b891c5f8bf4f4e3bb1bd540a858c7c88Office4-1.2624975 36.7923384 0.0 25.0Cooluuid:a32f232c-77cb-4468-b55b-6495d5e5de7 + + + + b891c5f8bf4f4e3bb1bd540a858c7c88 + + Office + 4 + + -1.2624975 36.7923384 0.0 25.0 + + Cool + + + uuid:a32f232c-77cb-4468-b55b-6495d5e5de7 + + diff --git a/onadata/apps/main/tests/test_csv_export.py b/onadata/apps/main/tests/test_csv_export.py index 49a11e63b..5f0fb09d6 100644 --- a/onadata/apps/main/tests/test_csv_export.py +++ b/onadata/apps/main/tests/test_csv_export.py @@ -68,22 +68,41 @@ def test_csv_nested_repeat_output(self): self.assertEqual(actual_content, expected_content) def test_dotted_fields_csv_export_output(self): - path = os.path.join(os.path.dirname(__file__), 'fixtures', 'userone', - 'userone_with_dot_name_fields.xls') + path = os.path.join( + os.path.dirname(__file__), + 'fixtures', + 'userone', + 'userone_with_dot_name_fields.xls', + ) self._publish_xls_file_and_set_xform(path) - path = os.path.join(os.path.dirname(__file__), 'fixtures', 'userone', - 'userone_with_dot_name_fields.xml') + path = os.path.join( + os.path.dirname(__file__), + 'fixtures', + 'userone', + 'userone_with_dot_name_fields.xml', + ) self._make_submission( - path, forced_submission_time=self._submission_time) + path, forced_submission_time=self._submission_time + ) # test csv - export = generate_export(Export.CSV_EXPORT, 'csv', self.user.username, - 'userone') + export = generate_export( + Export.CSV_EXPORT, + 'csv', + self.user.username, + 'userone_with_dot_name_fields', + ) self.assertTrue(default_storage.exists(export.filepath)) path, ext = os.path.splitext(export.filename) self.assertEqual(ext, '.csv') - with open(os.path.join( - os.path.dirname(__file__), 'fixtures', 'userone', - 'userone_with_dot_name_fields.csv'), 'rb') as f1: + with open( + os.path.join( + os.path.dirname(__file__), + 'fixtures', + 'userone', + 'userone_with_dot_name_fields.csv', + ), + 'rb', + ) as f1: with default_storage.open(export.filepath) as f2: expected_content = f1.read() actual_content = f2.read() diff --git a/onadata/apps/viewer/views.py b/onadata/apps/viewer/views.py index 1047a6712..c04eb237d 100644 --- a/onadata/apps/viewer/views.py +++ b/onadata/apps/viewer/views.py @@ -270,6 +270,10 @@ def attachment_url(request, size='medium'): media_file_logger.info('attachment not found') return HttpResponseNotFound(t('Attachment not found')) + # Attachment has a deleted date, it should not be shown anymore + if attachment.deleted_at: + return HttpResponseNotFound(_('Attachment not found')) + # Checks whether users are allowed to see the media file before giving them # the url xform = attachment.instance.xform diff --git a/onadata/libs/serializers/attachment_serializer.py b/onadata/libs/serializers/attachment_serializer.py index 718f54b5b..1c4752912 100644 --- a/onadata/libs/serializers/attachment_serializer.py +++ b/onadata/libs/serializers/attachment_serializer.py @@ -1,9 +1,9 @@ # coding: utf-8 +import json + from rest_framework import serializers from onadata.apps.logger.models.attachment import Attachment from onadata.libs.utils.decorators import check_obj -from onadata.libs.utils.image_tools import image_url -import json def dict_key_for_value(_dict, value): @@ -13,7 +13,10 @@ def dict_key_for_value(_dict, value): return list(_dict.keys())[list(_dict.values()).index(value)] -def get_path(data, question_name, path_list=[]): +def get_path(data, question_name, path_list=None): + if path_list is None: + path_list = [] + name = data.get('name') if name == question_name: return '/'.join(path_list) diff --git a/onadata/libs/tests/mixins/make_submission_mixin.py b/onadata/libs/tests/mixins/make_submission_mixin.py index 55be147dd..269578d48 100644 --- a/onadata/libs/tests/mixins/make_submission_mixin.py +++ b/onadata/libs/tests/mixins/make_submission_mixin.py @@ -4,8 +4,10 @@ from tempfile import NamedTemporaryFile from typing import Union +from django.conf import settings from django.contrib.auth import authenticate from django_digest.test import DigestAuth +from kobo_service_account.utils import get_request_headers from rest_framework.test import APIRequestFactory from onadata.apps.api.viewsets.xform_submission_api import XFormSubmissionApi @@ -43,6 +45,7 @@ def _make_submission( forced_submission_time: bool = None, auth: Union[DigestAuth, bool] = None, media_file: 'io.BufferedReader' = None, + use_service_account: bool = False, ): """ Pass `auth=False` for an anonymous request, or omit `auth` to perform @@ -50,9 +53,17 @@ def _make_submission( """ # store temporary file with dynamic uuid self.factory = APIRequestFactory() - if auth is None: + + if auth is None and not use_service_account: auth = DigestAuth('bob', 'bob') + extras = {} + if use_service_account: + extras = self.get_meta_from_headers( + get_request_headers(self.user.username) + ) + extras['HTTP_HOST'] = settings.TEST_HTTP_HOST + if add_uuid: path = self._add_uuid_to_submission_xml(path, self.xform) @@ -67,7 +78,7 @@ def _make_submission( url_prefix = f'{username}/' if username else '' url = f'/{url_prefix}submission' - request = self.factory.post(url, post_data) + request = self.factory.post(url, post_data, **extras) if auth: request.user = authenticate(username=auth.username, password=auth.password) diff --git a/onadata/libs/utils/logger_tools.py b/onadata/libs/utils/logger_tools.py index 783b053b3..ba915b150 100644 --- a/onadata/libs/utils/logger_tools.py +++ b/onadata/libs/utils/logger_tools.py @@ -1,11 +1,13 @@ # coding: utf-8 from __future__ import annotations +import logging import os import re import sys import traceback from datetime import date, datetime +from xml.etree import ElementTree as ET from xml.parsers.expat import ExpatError try: from zoneinfo import ZoneInfo @@ -55,7 +57,11 @@ update_xform_submission_count, ) from onadata.apps.logger.models.xform import XLSFormError -from onadata.apps.logger.signals import post_save_attachment +from onadata.apps.logger.signals import ( + post_save_attachment, + pre_delete_attachment, +) + from onadata.apps.logger.xform_instance_parser import ( InstanceEmptyError, InstanceInvalidUserError, @@ -65,6 +71,7 @@ get_uuid_from_xml, get_deprecated_uuid_from_xml, get_submission_date_from_xml, + get_xform_media_question_xpaths, ) from onadata.apps.main.models import UserProfile from onadata.apps.viewer.models.data_dictionary import DataDictionary @@ -180,7 +187,7 @@ def create_instance( if existing_instance: existing_instance.check_active(force=False) # ensure we have saved the extra attachments - new_attachments = save_attachments(existing_instance, media_files) + new_attachments, _ = save_attachments(existing_instance, media_files) if not new_attachments: raise DuplicateInstance() else: @@ -261,8 +268,9 @@ def get_xform_from_submission(xml, username, uuid=None): id_string = get_id_string_from_xml_str(xml) - return get_object_or_404(XForm, id_string__exact=id_string, - user__username=username) + return get_object_or_404( + XForm, id_string__exact=id_string, user__username=username + ) def inject_instanceid(xml_str, uuid): @@ -564,10 +572,11 @@ def save_attachments( instance: Instance, media_files: list['django.core.files.uploadedfile.UploadedFile'], defer_counting: bool = False, -) -> list[Attachment]: +) -> tuple[list[Attachment], list[Attachment]]: """ - Returns `True` if any new attachment was saved, `False` if all attachments - were duplicates or none were provided. + Return a tuple of two lists. + - The former is new attachments + - The latter is the replaced/soft-deleted attachments `defer_counting=False` will set a Python-only attribute of the same name on any *new* `Attachment` instances created. This will prevent @@ -598,7 +607,9 @@ def save_attachments( new_attachment.save() new_attachments.append(new_attachment) - return new_attachments + soft_deleted_attachments = get_soft_deleted_attachments(instance) + + return new_attachments, soft_deleted_attachments def save_submission( @@ -628,10 +639,11 @@ def save_submission( # attribute set to `True` *if* a new instance was created. We are # responsible for calling `update_xform_submission_count()` if the returned # `Instance` has `defer_counting = True`. - instance = _get_instance(request, xml, new_uuid, status, xform, - defer_counting=True) + instance = _get_instance( + request, xml, new_uuid, status, xform, defer_counting=True + ) - new_attachments = save_attachments( + new_attachments, soft_deleted_attachments = save_attachments( instance, media_files, defer_counting=True ) @@ -675,9 +687,61 @@ def save_submission( del new_attachment.defer_counting post_save_attachment(new_attachment, created=True) + for soft_deleted_attachment in soft_deleted_attachments: + pre_delete_attachment(soft_deleted_attachment, only_update_counters=True) + return instance +def get_soft_deleted_attachments(instance: Instance) -> list[Attachment]: + """ + Soft delete replaced attachments when editing a submission + """ + # Retrieve all media questions of Xform + media_question_xpaths = get_xform_media_question_xpaths(instance.xform) + + # If XForm does not have any media fields, do not go further + if not media_question_xpaths: + return [] + + # Parse instance XML to get the basename of each file of the updated + # submission + xml_parsed = ET.fromstring(instance.xml) + basenames = [] + + for media_question_xpath in media_question_xpaths: + root_name, xpath_without_root = media_question_xpath.split('/', 1) + try: + assert root_name == xml_parsed.tag + except AssertionError: + logging.warning( + 'Instance XML root tag name does not match with its form' + ) + + # With repeat groups, several nodes can have the same XPath. We + # need to retrieve all of them + questions = xml_parsed.findall(xpath_without_root) + for question in questions: + try: + basename = question.text + except AttributeError: + raise XPathNotFoundException + + # Only keep non-empty fields + if basename: + basenames.append(basename) + + # Update Attachment objects to hide them if they are not used anymore. + # We do not want to delete them until the instance itself is deleted. + queryset = Attachment.objects.filter( + instance=instance + ).exclude(media_file_basename__in=basenames) + soft_deleted_attachments = list(queryset.all()) + queryset.update(deleted_at=timezone.now()) + + return soft_deleted_attachments + + def _get_instance( request: 'rest_framework.request.Request', xml: str, @@ -857,3 +921,7 @@ class UnauthenticatedEditAttempt(Exception): then returns the appropriate 401 response. """ pass + + +class XPathNotFoundException(Exception): + pass