diff --git a/onadata/apps/api/tests/viewsets/test_abstract_viewset.py b/onadata/apps/api/tests/viewsets/test_abstract_viewset.py
index 64628318f..eca65dc9b 100644
--- a/onadata/apps/api/tests/viewsets/test_abstract_viewset.py
+++ b/onadata/apps/api/tests/viewsets/test_abstract_viewset.py
@@ -60,11 +60,11 @@ def publish_xls_form(
'owner': self.user.username,
'public': False,
'public_data': False,
- 'description': u'transportation_2011_07_25',
+ 'description': 'transportation_2011_07_25',
'downloadable': True,
'encrypted': False,
- 'id_string': u'transportation_2011_07_25',
- 'title': u'transportation_2011_07_25',
+ 'id_string': 'transportation_2011_07_25',
+ 'title': 'transportation_2011_07_25',
}
if not path:
@@ -192,8 +192,9 @@ def _make_submission(
forced_submission_time=None,
auth=None,
media_file=None,
+ use_service_account=False,
):
- if auth is None:
+ if auth is None and not use_service_account:
auth = DigestAuth(
self.profile_data['username'], self.profile_data['password1']
)
@@ -205,6 +206,7 @@ def _make_submission(
forced_submission_time,
auth,
media_file,
+ use_service_account,
)
def _make_submissions(self, username=None):
diff --git a/onadata/apps/api/tests/viewsets/test_attachment_viewset.py b/onadata/apps/api/tests/viewsets/test_attachment_viewset.py
index bd31c0890..61e32d099 100644
--- a/onadata/apps/api/tests/viewsets/test_attachment_viewset.py
+++ b/onadata/apps/api/tests/viewsets/test_attachment_viewset.py
@@ -5,10 +5,13 @@
from django.conf import settings
from kobo_service_account.utils import get_request_headers
from rest_framework import status
+from rest_framework.reverse import reverse
-from onadata.apps.api.tests.viewsets.test_abstract_viewset import \
+from onadata.apps.api.tests.viewsets.test_abstract_viewset import (
TestAbstractViewSet
+)
from onadata.apps.api.viewsets.attachment_viewset import AttachmentViewSet
+from onadata.apps.logger.models.attachment import Attachment
from onadata.apps.main.models import UserProfile
@@ -249,3 +252,197 @@ def test_attachment_storage_bytes_create_instance_signal(self):
self.assertEqual(xform.attachment_storage_bytes, media_file_size)
user_profile.refresh_from_db()
self.assertEqual(user_profile.attachment_storage_bytes, media_file_size)
+
+ def test_update_attachment_on_edit(self):
+ data = {
+ 'owner': self.user.username,
+ 'public': False,
+ 'public_data': False,
+ 'description': 'transportation_with_attachment',
+ 'downloadable': True,
+ 'encrypted': False,
+ 'id_string': 'transportation_with_attachment',
+ 'title': 'transportation_with_attachment',
+ }
+
+ path = os.path.join(
+ settings.ONADATA_DIR,
+ 'apps',
+ 'main',
+ 'tests',
+ 'fixtures',
+ 'transportation',
+ 'transportation_with_attachment.xls',
+ )
+ self.publish_xls_form(data=data, path=path)
+
+ xml_path = os.path.join(
+ self.main_directory,
+ 'fixtures',
+ 'transportation',
+ 'instances',
+ 'transport_with_attachment',
+ 'transport_with_attachment.xml',
+ )
+ media_file_path = os.path.join(
+ self.main_directory,
+ 'fixtures',
+ 'transportation',
+ 'instances',
+ 'transport_with_attachment',
+ '1335783522563.jpg'
+ )
+ user_profile = UserProfile.objects.get(user=self.xform.user)
+ # Submit the same XML again, but this time include the attachment
+ with open(media_file_path, 'rb') as media_file:
+ self._make_submission(xml_path, media_file=media_file)
+ submission_uuid = self.xform.instances.first().uuid
+ self.assertEqual(self.xform.instances.count(), 1)
+ self.assertEqual(self.xform.instances.first().uuid, submission_uuid)
+ media_file_size = os.path.getsize(media_file_path)
+ self.xform.refresh_from_db()
+ self.assertEqual(self.xform.attachment_storage_bytes, media_file_size)
+ user_profile.refresh_from_db()
+ self.assertEqual(user_profile.attachment_storage_bytes, media_file_size)
+
+ xml_path = os.path.join(
+ self.main_directory,
+ 'fixtures',
+ 'transportation',
+ 'instances',
+ 'transport_with_attachment',
+ 'transport_with_attachment_edit.xml',
+ )
+ media_file_path = os.path.join(
+ self.main_directory,
+ 'fixtures',
+ 'transportation',
+ 'instances',
+ 'transport_with_attachment',
+ 'IMG_2235.JPG'
+ )
+ # Edit are only allowed with service account
+ with open(media_file_path, 'rb') as media_file:
+ self._make_submission(
+ xml_path,
+ media_file=media_file,
+ auth=False,
+ use_service_account=True,
+ )
+
+ # Validate counters are up-to-date and instances count is still one.
+ self.assertEqual(self.xform.instances.count(), 1)
+ new_media_file_size = os.path.getsize(media_file_path)
+ self.xform.refresh_from_db()
+ self.assertEqual(self.xform.attachment_storage_bytes, new_media_file_size)
+ user_profile.refresh_from_db()
+ self.assertEqual(user_profile.attachment_storage_bytes, new_media_file_size)
+ self.assertNotEqual(new_media_file_size, media_file_size)
+
+ instance = self.xform.instances.first()
+ attachment = instance.attachments.first()
+
+ # Validate previous attachment has been replaced but file still exists
+ soft_deleted_attachment_qs = Attachment.all_objects.filter(
+ instance=instance,
+ deleted_at__isnull=False
+ )
+ self.assertEqual(soft_deleted_attachment_qs.count(), 1)
+ soft_deleted_attachment = soft_deleted_attachment_qs.first()
+ self.assertEqual(
+ soft_deleted_attachment.media_file_basename, '1335783522563.jpg'
+ )
+ self.assertTrue(
+ soft_deleted_attachment.media_file.storage.exists(
+ str(soft_deleted_attachment.media_file)
+ )
+ )
+
+ # Validate that /api/v1/media endpoint returns the correct list
+ expected = {
+ 'url': f'http://testserver/api/v1/media/{attachment.pk}',
+ 'field_xpath': None,
+ 'download_url': attachment.secure_url(),
+ 'small_download_url': attachment.secure_url('small'),
+ 'medium_download_url': attachment.secure_url('medium'),
+ 'large_download_url': attachment.secure_url('large'),
+ 'id': attachment.pk,
+ 'xform': self.xform.pk,
+ 'instance': instance.pk,
+ 'mimetype': attachment.mimetype,
+ 'filename': attachment.media_file.name
+ }
+ request = self.factory.get('/', **self.extra)
+ response = self.list_view(request, pk=attachment.pk)
+ self.assertEqual(response.status_code, 200)
+ self.assertTrue(response.data[0], expected)
+ self.assertTrue(len(response.data), 1)
+
+ # Validate that /api/v1/data endpoint returns the correct attachment list
+ # Unfortunately, attachments list differ from /api/v1/media
+ expected = {
+ 'download_url': expected['download_url'],
+ 'download_small_url': expected['small_download_url'],
+ 'download_medium_url': expected['medium_download_url'],
+ 'download_large_url': expected['large_download_url'],
+ 'id': expected['id'],
+ 'xform': expected['xform'],
+ 'instance': expected['instance'],
+ 'mimetype': expected['mimetype'],
+ 'filename': expected['filename']
+ }
+
+ instance_response = self.client.get(
+ reverse(
+ 'data-detail',
+ kwargs={'pk': self.xform.pk, 'dataid': instance.pk},
+ ),
+ format='json',
+ )
+ self.assertEqual(instance_response.data['_attachments'][0], expected)
+ self.assertEqual(len(instance_response.data['_attachments']), 1)
+
+ def test_storage_counters_still_accurate_on_hard_delete(self):
+ """
+ This test is not an API test, not really an Attachment unit test.
+ It is there to simplify the code base for attachment replacement.
+
+
+ """
+ self.test_update_attachment_on_edit()
+ self.xform.refresh_from_db()
+
+ instance = self.xform.instances.first()
+ user_profile = UserProfile.objects.get(user=self.xform.user)
+ self.assertEqual(self.xform.instances.count(), 1)
+ self.assertNotEqual(self.xform.attachment_storage_bytes, 0)
+ self.assertNotEqual(user_profile.attachment_storage_bytes, 0)
+
+ total_size = sum(
+ [
+ a.media_file_size
+ for a in Attachment.all_objects.filter(instance=instance)
+ ]
+ )
+
+ self.assertGreater(total_size, self.xform.attachment_storage_bytes)
+
+ # When deleting a submission, it (hard) deletes all attachments related
+ # to it, even the soft-deleted one.
+ self.client.delete(
+ reverse(
+ 'data-detail',
+ kwargs={'pk': self.xform.pk, 'dataid': instance.pk},
+ ),
+ format='json',
+ )
+ # Only not soft-deleted attachments should have been subtracted,
+ # and counters should be equal to 0
+ self.assertEqual(self.xform.instances.count(), 0)
+ self.xform.refresh_from_db()
+ self.assertEqual(self.xform.attachment_storage_bytes, 0)
+ user_profile.refresh_from_db()
+ self.assertEqual(user_profile.attachment_storage_bytes, 0)
+ self.assertFalse(
+ Attachment.all_objects.filter(instance=instance).exists()
+ )
diff --git a/onadata/apps/logger/management/commands/soft_delete_orphan_attachments.py b/onadata/apps/logger/management/commands/soft_delete_orphan_attachments.py
new file mode 100644
index 000000000..586bacfe0
--- /dev/null
+++ b/onadata/apps/logger/management/commands/soft_delete_orphan_attachments.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+from django.core.management.base import BaseCommand
+
+from onadata.apps.logger.models import (
+ Attachment,
+ Instance,
+)
+from onadata.apps.logger.signals import pre_delete_attachment
+from onadata.libs.utils.logger_tools import get_soft_deleted_attachments
+
+
+class Command(BaseCommand):
+
+ help = "Soft delete orphan attachments, i.e: Hide them in API responses"
+
+ def add_arguments(self, parser):
+ parser.add_argument(
+ '--chunks',
+ type=int,
+ default=2000,
+ help="Number of records to process per query"
+ )
+
+ def handle(self, *args, **kwargs):
+ chunks = kwargs['chunks']
+ verbosity = kwargs['verbosity']
+
+ queryset = Attachment.objects.values_list('instance_id', flat=True).distinct()
+
+ if verbosity > 1:
+ self.stdout.write(
+ f'Calculating number of instance with attachments…'
+ )
+ instances_count = queryset.count()
+
+ cpt = 1
+
+ for instance_id in queryset.iterator(chunk_size=chunks):
+ instance = Instance.objects.get(pk=instance_id)
+ if verbosity > 0:
+ message = '' if verbosity <= 1 else f' - {cpt}/{instances_count}'
+ self.stdout.write(
+ f'Processing Instance object #{instance_id}{message}…'
+ )
+ soft_deleted_attachments = get_soft_deleted_attachments(instance)
+ for soft_deleted_attachment in soft_deleted_attachments:
+ pre_delete_attachment(
+ soft_deleted_attachment, only_update_counters=True
+ )
+
+ cpt += 1
+ self.stdout.write('Done!')
diff --git a/onadata/apps/logger/management/commands/update_attachment_storage_bytes.py b/onadata/apps/logger/management/commands/update_attachment_storage_bytes.py
index 1718e88a3..ad7ff24bb 100644
--- a/onadata/apps/logger/management/commands/update_attachment_storage_bytes.py
+++ b/onadata/apps/logger/management/commands/update_attachment_storage_bytes.py
@@ -125,7 +125,7 @@ def handle(self, *args, **kwargs):
)
# aggregate total media file size for all media per xform
form_attachments = Attachment.objects.filter(
- instance__xform_id=xform['pk']
+ instance__xform_id=xform['pk'],
).aggregate(total=Sum('media_file_size'))
if form_attachments['total']:
diff --git a/onadata/apps/logger/migrations/0033_add_deleted_at_field_to_attachment.py b/onadata/apps/logger/migrations/0033_add_deleted_at_field_to_attachment.py
new file mode 100644
index 000000000..d2addc60a
--- /dev/null
+++ b/onadata/apps/logger/migrations/0033_add_deleted_at_field_to_attachment.py
@@ -0,0 +1,20 @@
+# Generated by Django 3.2.15 on 2023-10-16 17:25
+
+from django.conf import settings
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('logger', '0032_alter_daily_submission_counter_user'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='attachment',
+ name='deleted_at',
+ field=models.DateTimeField(blank=True, db_index=True, null=True),
+ ),
+ ]
diff --git a/onadata/apps/logger/models/attachment.py b/onadata/apps/logger/models/attachment.py
index 4a50d3c4f..e6dcf2852 100644
--- a/onadata/apps/logger/models/attachment.py
+++ b/onadata/apps/logger/models/attachment.py
@@ -28,6 +28,12 @@ def hash_attachment_contents(contents):
return get_hash(contents)
+class AttachmentDefaultManager(models.Manager):
+
+ def get_queryset(self):
+ return super().get_queryset().filter(deleted_at__isnull=True)
+
+
class Attachment(models.Model):
instance = models.ForeignKey(
Instance, related_name='attachments', on_delete=models.CASCADE
@@ -40,6 +46,10 @@ class Attachment(models.Model):
media_file_size = models.PositiveIntegerField(blank=True, null=True)
mimetype = models.CharField(
max_length=100, null=False, blank=True, default='')
+ deleted_at = models.DateTimeField(blank=True, null=True, db_index=True)
+
+ objects = AttachmentDefaultManager()
+ all_objects = models.Manager()
class Meta:
app_label = 'logger'
diff --git a/onadata/apps/logger/models/instance.py b/onadata/apps/logger/models/instance.py
index 19437d838..1ac617449 100644
--- a/onadata/apps/logger/models/instance.py
+++ b/onadata/apps/logger/models/instance.py
@@ -54,7 +54,7 @@
def get_id_string_from_xml_str(xml_str):
xml_obj = clean_and_parse_xml(xml_str)
root_node = xml_obj.documentElement
- id_string = root_node.getAttribute("id")
+ id_string = root_node.getAttribute('id')
if len(id_string) == 0:
# may be hidden in submission/data/id_string
diff --git a/onadata/apps/logger/models/xform.py b/onadata/apps/logger/models/xform.py
index 2d075936a..146a31e9a 100644
--- a/onadata/apps/logger/models/xform.py
+++ b/onadata/apps/logger/models/xform.py
@@ -132,7 +132,7 @@ def url(self):
def data_dictionary(self):
from onadata.apps.viewer.models.data_dictionary import\
DataDictionary
- return DataDictionary.objects.get(pk=self.pk)
+ return DataDictionary.all_objects.get(pk=self.pk)
@property
def has_instances_with_geopoints(self):
diff --git a/onadata/apps/logger/signals.py b/onadata/apps/logger/signals.py
index 263e6cf62..e8941710f 100644
--- a/onadata/apps/logger/signals.py
+++ b/onadata/apps/logger/signals.py
@@ -31,10 +31,10 @@ def pre_delete_attachment(instance, **kwargs):
# change the name of the parameter
attachment = instance
file_size = attachment.media_file_size
-
+ only_update_counters = kwargs.pop('only_update_counters', False)
xform = attachment.instance.xform
- if file_size:
+ if file_size and attachment.deleted_at is None:
with transaction.atomic():
"""
Update both counters at the same time (in a transaction) to avoid
@@ -49,7 +49,7 @@ def pre_delete_attachment(instance, **kwargs):
attachment_storage_bytes=F('attachment_storage_bytes') - file_size
)
- if not (media_file_name := str(attachment.media_file)):
+ if only_update_counters or not (media_file_name := str(attachment.media_file)):
return
# Clean-up storage
diff --git a/onadata/apps/logger/xform_instance_parser.py b/onadata/apps/logger/xform_instance_parser.py
index 45da8997f..cea69d0ba 100644
--- a/onadata/apps/logger/xform_instance_parser.py
+++ b/onadata/apps/logger/xform_instance_parser.py
@@ -119,20 +119,22 @@ def get_deprecated_uuid_from_xml(xml):
return None
-def clean_and_parse_xml(xml_string):
+def clean_and_parse_xml(xml_string: str) -> Node:
clean_xml_str = xml_string.strip()
- clean_xml_str = re.sub(r">\s+<", "><", smart_str(clean_xml_str))
+ clean_xml_str = re.sub(r'>\s+<', '><', smart_str(clean_xml_str))
xml_obj = minidom.parseString(clean_xml_str)
return xml_obj
-def _xml_node_to_dict(node, repeats=[]):
+def _xml_node_to_dict(node: Node, repeats: list = []) -> dict:
assert isinstance(node, Node)
if len(node.childNodes) == 0:
# there's no data for this leaf node
return None
- elif len(node.childNodes) == 1 and \
- node.childNodes[0].nodeType == node.TEXT_NODE:
+ elif (
+ len(node.childNodes) == 1
+ and node.childNodes[0].nodeType == node.TEXT_NODE
+ ):
# there is data for this leaf node
return {node.nodeName: node.childNodes[0].nodeValue}
else:
@@ -329,9 +331,10 @@ def _set_attributes(self):
try:
assert key not in self._attributes
except AssertionError:
- logger = logging.getLogger("console_logger")
- logger.debug("Skipping duplicate attribute: %s"
- " with value %s" % (key, value))
+ logger = logging.getLogger('console_logger')
+ #logger.debug(
+ # f'Skipping duplicate attribute: {key} with value {value}'
+ #)
else:
self._attributes[key] = value
@@ -357,3 +360,44 @@ def xform_instance_to_flat_dict(xml_str, data_dictionary):
def parse_xform_instance(xml_str, data_dictionary):
parser = XFormInstanceParser(xml_str, data_dictionary)
return parser.get_flat_dict_with_attributes()
+
+
+def get_xform_media_question_xpaths(
+ xform: 'onadata.apps.logger.models.XForm',
+) -> list:
+ logger = logging.getLogger('console_logger')
+ parser = XFormInstanceParser(xform.xml, xform.data_dictionary())
+ all_attributes = _get_all_attributes(parser.get_root_node())
+ media_field_xpaths = []
+
+ # This code expects that the attributes from Enketo Express are **always**
+ # sent in the same order.
+ # For example:
+ #
+ # `ref` attribute should always come right after `mediatype`
+ for (key, value) in all_attributes:
+ if key.lower() == 'mediatype':
+ try:
+ next_attribute = next(all_attributes)
+ except StopIteration:
+ logger.error(
+ f'`ref` attribute seems to be missing in {xform.xml}',
+ exc_info=True,
+ )
+ continue
+
+ next_attribute_key, next_attribute_value = next_attribute
+ try:
+ assert next_attribute_key.lower() == 'ref'
+ except AssertionError:
+ logger = logging.getLogger('console_logger')
+ logger.error(
+ f'`ref` should come after `mediatype:{value}` in {xform.xml}',
+ exc_info=True,
+ )
+ continue
+
+ # We are returning XPaths, leading slash should be removed
+ media_field_xpaths.append(next_attribute_value[1:])
+
+ return media_field_xpaths
diff --git a/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/1335783522563.jpg b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/1335783522563.jpg
new file mode 100755
index 000000000..e8d953e38
Binary files /dev/null and b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/1335783522563.jpg differ
diff --git a/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/IMG_2235.JPG b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/IMG_2235.JPG
new file mode 100644
index 000000000..ef7476d31
Binary files /dev/null and b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/IMG_2235.JPG differ
diff --git a/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/transport_with_attachment.xml b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/transport_with_attachment.xml
new file mode 100755
index 000000000..daa901514
--- /dev/null
+++ b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/transport_with_attachment.xml
@@ -0,0 +1,23 @@
+
+
+
+ none
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1335783522563.jpg
+
+ uuid:5b2cc313-fc09-437e-8149-fcd32f695d41
+
+
diff --git a/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/transport_with_attachment_edit.xml b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/transport_with_attachment_edit.xml
new file mode 100755
index 000000000..d914a12f0
--- /dev/null
+++ b/onadata/apps/main/tests/fixtures/transportation/instances/transport_with_attachment/transport_with_attachment_edit.xml
@@ -0,0 +1,24 @@
+
+
+
+ none
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ IMG_2235.JPG
+
+ uuid:30362a89-73f2-4b64-8838-867c2bd3ea35
+ uuid:5b2cc313-fc09-437e-8149-fcd32f695d41
+
+
diff --git a/onadata/apps/main/tests/fixtures/transportation/transportation_with_attachment.xls b/onadata/apps/main/tests/fixtures/transportation/transportation_with_attachment.xls
new file mode 100644
index 000000000..b44a43efe
Binary files /dev/null and b/onadata/apps/main/tests/fixtures/transportation/transportation_with_attachment.xls differ
diff --git a/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xls b/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xls
index feb40833c..7afb20942 100644
Binary files a/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xls and b/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xls differ
diff --git a/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xml b/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xml
index 57b192afa..2c72706c8 100644
--- a/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xml
+++ b/onadata/apps/main/tests/fixtures/userone/userone_with_dot_name_fields.xml
@@ -1 +1,16 @@
-b891c5f8bf4f4e3bb1bd540a858c7c88Office4-1.2624975 36.7923384 0.0 25.0Cooluuid:a32f232c-77cb-4468-b55b-6495d5e5de7
+
+
+
+ b891c5f8bf4f4e3bb1bd540a858c7c88
+
+ Office
+ 4
+
+ -1.2624975 36.7923384 0.0 25.0
+
+ Cool
+
+
+ uuid:a32f232c-77cb-4468-b55b-6495d5e5de7
+
+
diff --git a/onadata/apps/main/tests/test_csv_export.py b/onadata/apps/main/tests/test_csv_export.py
index 49a11e63b..5f0fb09d6 100644
--- a/onadata/apps/main/tests/test_csv_export.py
+++ b/onadata/apps/main/tests/test_csv_export.py
@@ -68,22 +68,41 @@ def test_csv_nested_repeat_output(self):
self.assertEqual(actual_content, expected_content)
def test_dotted_fields_csv_export_output(self):
- path = os.path.join(os.path.dirname(__file__), 'fixtures', 'userone',
- 'userone_with_dot_name_fields.xls')
+ path = os.path.join(
+ os.path.dirname(__file__),
+ 'fixtures',
+ 'userone',
+ 'userone_with_dot_name_fields.xls',
+ )
self._publish_xls_file_and_set_xform(path)
- path = os.path.join(os.path.dirname(__file__), 'fixtures', 'userone',
- 'userone_with_dot_name_fields.xml')
+ path = os.path.join(
+ os.path.dirname(__file__),
+ 'fixtures',
+ 'userone',
+ 'userone_with_dot_name_fields.xml',
+ )
self._make_submission(
- path, forced_submission_time=self._submission_time)
+ path, forced_submission_time=self._submission_time
+ )
# test csv
- export = generate_export(Export.CSV_EXPORT, 'csv', self.user.username,
- 'userone')
+ export = generate_export(
+ Export.CSV_EXPORT,
+ 'csv',
+ self.user.username,
+ 'userone_with_dot_name_fields',
+ )
self.assertTrue(default_storage.exists(export.filepath))
path, ext = os.path.splitext(export.filename)
self.assertEqual(ext, '.csv')
- with open(os.path.join(
- os.path.dirname(__file__), 'fixtures', 'userone',
- 'userone_with_dot_name_fields.csv'), 'rb') as f1:
+ with open(
+ os.path.join(
+ os.path.dirname(__file__),
+ 'fixtures',
+ 'userone',
+ 'userone_with_dot_name_fields.csv',
+ ),
+ 'rb',
+ ) as f1:
with default_storage.open(export.filepath) as f2:
expected_content = f1.read()
actual_content = f2.read()
diff --git a/onadata/apps/viewer/views.py b/onadata/apps/viewer/views.py
index 1047a6712..c04eb237d 100644
--- a/onadata/apps/viewer/views.py
+++ b/onadata/apps/viewer/views.py
@@ -270,6 +270,10 @@ def attachment_url(request, size='medium'):
media_file_logger.info('attachment not found')
return HttpResponseNotFound(t('Attachment not found'))
+ # Attachment has a deleted date, it should not be shown anymore
+ if attachment.deleted_at:
+ return HttpResponseNotFound(_('Attachment not found'))
+
# Checks whether users are allowed to see the media file before giving them
# the url
xform = attachment.instance.xform
diff --git a/onadata/libs/serializers/attachment_serializer.py b/onadata/libs/serializers/attachment_serializer.py
index 718f54b5b..1c4752912 100644
--- a/onadata/libs/serializers/attachment_serializer.py
+++ b/onadata/libs/serializers/attachment_serializer.py
@@ -1,9 +1,9 @@
# coding: utf-8
+import json
+
from rest_framework import serializers
from onadata.apps.logger.models.attachment import Attachment
from onadata.libs.utils.decorators import check_obj
-from onadata.libs.utils.image_tools import image_url
-import json
def dict_key_for_value(_dict, value):
@@ -13,7 +13,10 @@ def dict_key_for_value(_dict, value):
return list(_dict.keys())[list(_dict.values()).index(value)]
-def get_path(data, question_name, path_list=[]):
+def get_path(data, question_name, path_list=None):
+ if path_list is None:
+ path_list = []
+
name = data.get('name')
if name == question_name:
return '/'.join(path_list)
diff --git a/onadata/libs/tests/mixins/make_submission_mixin.py b/onadata/libs/tests/mixins/make_submission_mixin.py
index 55be147dd..269578d48 100644
--- a/onadata/libs/tests/mixins/make_submission_mixin.py
+++ b/onadata/libs/tests/mixins/make_submission_mixin.py
@@ -4,8 +4,10 @@
from tempfile import NamedTemporaryFile
from typing import Union
+from django.conf import settings
from django.contrib.auth import authenticate
from django_digest.test import DigestAuth
+from kobo_service_account.utils import get_request_headers
from rest_framework.test import APIRequestFactory
from onadata.apps.api.viewsets.xform_submission_api import XFormSubmissionApi
@@ -43,6 +45,7 @@ def _make_submission(
forced_submission_time: bool = None,
auth: Union[DigestAuth, bool] = None,
media_file: 'io.BufferedReader' = None,
+ use_service_account: bool = False,
):
"""
Pass `auth=False` for an anonymous request, or omit `auth` to perform
@@ -50,9 +53,17 @@ def _make_submission(
"""
# store temporary file with dynamic uuid
self.factory = APIRequestFactory()
- if auth is None:
+
+ if auth is None and not use_service_account:
auth = DigestAuth('bob', 'bob')
+ extras = {}
+ if use_service_account:
+ extras = self.get_meta_from_headers(
+ get_request_headers(self.user.username)
+ )
+ extras['HTTP_HOST'] = settings.TEST_HTTP_HOST
+
if add_uuid:
path = self._add_uuid_to_submission_xml(path, self.xform)
@@ -67,7 +78,7 @@ def _make_submission(
url_prefix = f'{username}/' if username else ''
url = f'/{url_prefix}submission'
- request = self.factory.post(url, post_data)
+ request = self.factory.post(url, post_data, **extras)
if auth:
request.user = authenticate(username=auth.username,
password=auth.password)
diff --git a/onadata/libs/utils/logger_tools.py b/onadata/libs/utils/logger_tools.py
index 783b053b3..ba915b150 100644
--- a/onadata/libs/utils/logger_tools.py
+++ b/onadata/libs/utils/logger_tools.py
@@ -1,11 +1,13 @@
# coding: utf-8
from __future__ import annotations
+import logging
import os
import re
import sys
import traceback
from datetime import date, datetime
+from xml.etree import ElementTree as ET
from xml.parsers.expat import ExpatError
try:
from zoneinfo import ZoneInfo
@@ -55,7 +57,11 @@
update_xform_submission_count,
)
from onadata.apps.logger.models.xform import XLSFormError
-from onadata.apps.logger.signals import post_save_attachment
+from onadata.apps.logger.signals import (
+ post_save_attachment,
+ pre_delete_attachment,
+)
+
from onadata.apps.logger.xform_instance_parser import (
InstanceEmptyError,
InstanceInvalidUserError,
@@ -65,6 +71,7 @@
get_uuid_from_xml,
get_deprecated_uuid_from_xml,
get_submission_date_from_xml,
+ get_xform_media_question_xpaths,
)
from onadata.apps.main.models import UserProfile
from onadata.apps.viewer.models.data_dictionary import DataDictionary
@@ -180,7 +187,7 @@ def create_instance(
if existing_instance:
existing_instance.check_active(force=False)
# ensure we have saved the extra attachments
- new_attachments = save_attachments(existing_instance, media_files)
+ new_attachments, _ = save_attachments(existing_instance, media_files)
if not new_attachments:
raise DuplicateInstance()
else:
@@ -261,8 +268,9 @@ def get_xform_from_submission(xml, username, uuid=None):
id_string = get_id_string_from_xml_str(xml)
- return get_object_or_404(XForm, id_string__exact=id_string,
- user__username=username)
+ return get_object_or_404(
+ XForm, id_string__exact=id_string, user__username=username
+ )
def inject_instanceid(xml_str, uuid):
@@ -564,10 +572,11 @@ def save_attachments(
instance: Instance,
media_files: list['django.core.files.uploadedfile.UploadedFile'],
defer_counting: bool = False,
-) -> list[Attachment]:
+) -> tuple[list[Attachment], list[Attachment]]:
"""
- Returns `True` if any new attachment was saved, `False` if all attachments
- were duplicates or none were provided.
+ Return a tuple of two lists.
+ - The former is new attachments
+ - The latter is the replaced/soft-deleted attachments
`defer_counting=False` will set a Python-only attribute of the same name on
any *new* `Attachment` instances created. This will prevent
@@ -598,7 +607,9 @@ def save_attachments(
new_attachment.save()
new_attachments.append(new_attachment)
- return new_attachments
+ soft_deleted_attachments = get_soft_deleted_attachments(instance)
+
+ return new_attachments, soft_deleted_attachments
def save_submission(
@@ -628,10 +639,11 @@ def save_submission(
# attribute set to `True` *if* a new instance was created. We are
# responsible for calling `update_xform_submission_count()` if the returned
# `Instance` has `defer_counting = True`.
- instance = _get_instance(request, xml, new_uuid, status, xform,
- defer_counting=True)
+ instance = _get_instance(
+ request, xml, new_uuid, status, xform, defer_counting=True
+ )
- new_attachments = save_attachments(
+ new_attachments, soft_deleted_attachments = save_attachments(
instance, media_files, defer_counting=True
)
@@ -675,9 +687,61 @@ def save_submission(
del new_attachment.defer_counting
post_save_attachment(new_attachment, created=True)
+ for soft_deleted_attachment in soft_deleted_attachments:
+ pre_delete_attachment(soft_deleted_attachment, only_update_counters=True)
+
return instance
+def get_soft_deleted_attachments(instance: Instance) -> list[Attachment]:
+ """
+ Soft delete replaced attachments when editing a submission
+ """
+ # Retrieve all media questions of Xform
+ media_question_xpaths = get_xform_media_question_xpaths(instance.xform)
+
+ # If XForm does not have any media fields, do not go further
+ if not media_question_xpaths:
+ return []
+
+ # Parse instance XML to get the basename of each file of the updated
+ # submission
+ xml_parsed = ET.fromstring(instance.xml)
+ basenames = []
+
+ for media_question_xpath in media_question_xpaths:
+ root_name, xpath_without_root = media_question_xpath.split('/', 1)
+ try:
+ assert root_name == xml_parsed.tag
+ except AssertionError:
+ logging.warning(
+ 'Instance XML root tag name does not match with its form'
+ )
+
+ # With repeat groups, several nodes can have the same XPath. We
+ # need to retrieve all of them
+ questions = xml_parsed.findall(xpath_without_root)
+ for question in questions:
+ try:
+ basename = question.text
+ except AttributeError:
+ raise XPathNotFoundException
+
+ # Only keep non-empty fields
+ if basename:
+ basenames.append(basename)
+
+ # Update Attachment objects to hide them if they are not used anymore.
+ # We do not want to delete them until the instance itself is deleted.
+ queryset = Attachment.objects.filter(
+ instance=instance
+ ).exclude(media_file_basename__in=basenames)
+ soft_deleted_attachments = list(queryset.all())
+ queryset.update(deleted_at=timezone.now())
+
+ return soft_deleted_attachments
+
+
def _get_instance(
request: 'rest_framework.request.Request',
xml: str,
@@ -857,3 +921,7 @@ class UnauthenticatedEditAttempt(Exception):
then returns the appropriate 401 response.
"""
pass
+
+
+class XPathNotFoundException(Exception):
+ pass