Skip to content

Commit a138321

Browse files
committed
✨(backend) extract attachment keys from updated content for access
We can't prevent document editors from copy/pasting content to from one document to another. The problem is that copying content, will copy the urls pointing to attachments but if we don't do anything, the reader of the document to which the content is being pasted, may not be allowed to access the attachment files from the original document. Using the work from the previous commit, we can grant access to the readers of the target document by extracting the attachment keys from the content and adding themto the target document's "attachments" field. Before doing this, we check that the current user can indeed access the attachment files extracted from the content and that they are allowed to edit the current document.
1 parent 71f2aad commit a138321

File tree

5 files changed

+190
-5
lines changed

5 files changed

+190
-5
lines changed

src/backend/core/api/serializers.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import magic
1111
from rest_framework import exceptions, serializers
1212

13-
from core import enums, models
13+
from core import enums, models, utils
1414
from core.services.ai_services import AI_ACTIONS
1515
from core.services.converter_services import (
1616
ConversionError,
@@ -231,6 +231,36 @@ def validate_id(self, value):
231231

232232
return value
233233

234+
def save(self, **kwargs):
235+
"""
236+
Process the content field to extract attachment keys and update the document's
237+
"attachments" field for access control.
238+
"""
239+
content = self.validated_data.get("content", "")
240+
extracted_attachments = set(utils.extract_attachments(content))
241+
242+
existing_attachments = (
243+
set(self.instance.attachments or []) if self.instance else set()
244+
)
245+
new_attachments = extracted_attachments - existing_attachments
246+
247+
if new_attachments:
248+
user = self.context["request"].user
249+
readable_documents = models.Document.objects.readable(user).filter(
250+
Q(attachments__overlap=list(new_attachments))
251+
)
252+
253+
readable_attachments = set()
254+
for document in readable_documents:
255+
readable_attachments.update(set(document.attachments) & new_attachments)
256+
257+
# Update attachments with readable keys
258+
self.validated_data["attachments"] = list(
259+
existing_attachments | readable_attachments
260+
)
261+
262+
return super().save(**kwargs)
263+
234264

235265
class ServerCreateDocumentSerializer(serializers.Serializer):
236266
"""

src/backend/core/tests/documents/test_api_documents_media_auth.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Test file uploads API endpoint for users in impress's core app.
2+
Test media-auth authorization API endpoint in docs core app.
33
"""
44

55
from io import BytesIO
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
"""
2+
Test extract-attachments on document update in docs core app.
3+
"""
4+
5+
import base64
6+
from uuid import uuid4
7+
8+
import pytest
9+
import y_py
10+
from rest_framework.test import APIClient
11+
12+
from core import factories
13+
14+
pytestmark = pytest.mark.django_db
15+
16+
17+
def get_ydoc_with_mages(image_keys):
18+
"""Return a ydoc from text for testing purposes."""
19+
ydoc = y_py.YDoc() # pylint: disable=no-member
20+
with ydoc.begin_transaction() as txn:
21+
xml_fragment = ydoc.get_xml_element("document-store")
22+
for key in image_keys:
23+
xml_image = xml_fragment.push_xml_element(txn, "image")
24+
xml_image.set_attribute(txn, "src", f"http://localhost/media/{key:s}")
25+
26+
update = y_py.encode_state_as_update(ydoc) # pylint: disable=no-member
27+
return base64.b64encode(update).decode("utf-8")
28+
29+
30+
def test_api_documents_update_new_attachment_keys_anonymous(django_assert_num_queries):
31+
"""
32+
When an anonymous user updates a document, the attachment keys extracted from the
33+
updated content should be added to the list of "attachments" ot the document if these
34+
attachments are already readable by anonymous users.
35+
"""
36+
image_keys = [f"{uuid4()!s}/attachments/{uuid4()!s}.png" for _ in range(4)]
37+
document = factories.DocumentFactory(
38+
content=get_ydoc_with_mages(image_keys[:1]),
39+
attachments=[image_keys[0]],
40+
link_reach="public",
41+
link_role="editor",
42+
)
43+
44+
factories.DocumentFactory(attachments=[image_keys[1]], link_reach="public")
45+
factories.DocumentFactory(attachments=[image_keys[2]], link_reach="authenticated")
46+
factories.DocumentFactory(attachments=[image_keys[3]], link_reach="restricted")
47+
expected_keys = {image_keys[i] for i in [0, 1]}
48+
49+
with django_assert_num_queries(4):
50+
response = APIClient().put(
51+
f"/api/v1.0/documents/{document.id!s}/",
52+
{"content": get_ydoc_with_mages(image_keys)},
53+
format="json",
54+
)
55+
assert response.status_code == 200
56+
57+
document.refresh_from_db()
58+
assert set(document.attachments) == expected_keys
59+
60+
# Check that the db query to check attachments readability for extracted
61+
# keys is not done if the content changes but no new keys are found
62+
with django_assert_num_queries(3):
63+
response = APIClient().put(
64+
f"/api/v1.0/documents/{document.id!s}/",
65+
{"content": get_ydoc_with_mages(image_keys[:2])},
66+
format="json",
67+
)
68+
assert response.status_code == 200
69+
70+
document.refresh_from_db()
71+
assert len(document.attachments) == 2
72+
assert set(document.attachments) == expected_keys
73+
74+
75+
def test_api_documents_update_new_attachment_keys_authenticated(
76+
django_assert_num_queries,
77+
):
78+
"""
79+
When an authenticated user updates a document, the attachment keys extracted from the
80+
updated content should be added to the list of "attachments" ot the document if these
81+
attachments are already readable by the editing user.
82+
"""
83+
user = factories.UserFactory()
84+
client = APIClient()
85+
client.force_login(user)
86+
87+
image_keys = [f"{uuid4()!s}/attachments/{uuid4()!s}.png" for _ in range(5)]
88+
document = factories.DocumentFactory(
89+
content=get_ydoc_with_mages(image_keys[:1]),
90+
attachments=[image_keys[0]],
91+
users=[(user, "editor")],
92+
)
93+
94+
factories.DocumentFactory(attachments=[image_keys[1]], link_reach="public")
95+
factories.DocumentFactory(attachments=[image_keys[2]], link_reach="authenticated")
96+
factories.DocumentFactory(attachments=[image_keys[3]], link_reach="restricted")
97+
factories.DocumentFactory(attachments=[image_keys[4]], users=[user])
98+
expected_keys = {image_keys[i] for i in [0, 1, 2, 4]}
99+
100+
with django_assert_num_queries(5):
101+
response = client.put(
102+
f"/api/v1.0/documents/{document.id!s}/",
103+
{"content": get_ydoc_with_mages(image_keys)},
104+
format="json",
105+
)
106+
assert response.status_code == 200
107+
108+
document.refresh_from_db()
109+
assert set(document.attachments) == expected_keys
110+
111+
# Check that the db query to check attachments readability for extracted
112+
# keys is not done if the content changes but no new keys are found
113+
with django_assert_num_queries(4):
114+
response = client.put(
115+
f"/api/v1.0/documents/{document.id!s}/",
116+
{"content": get_ydoc_with_mages(image_keys[:2])},
117+
format="json",
118+
)
119+
assert response.status_code == 200
120+
121+
document.refresh_from_db()
122+
assert len(document.attachments) == 4
123+
assert set(document.attachments) == expected_keys
124+
125+
126+
def test_api_documents_update_new_attachment_keys_duplicate():
127+
"""
128+
Duplicate keys in the content should not result in duplicates in the document's attachments.
129+
"""
130+
user = factories.UserFactory()
131+
client = APIClient()
132+
client.force_login(user)
133+
134+
image_key1 = f"{uuid4()!s}/attachments/{uuid4()!s}.png"
135+
image_key2 = f"{uuid4()!s}/attachments/{uuid4()!s}.png"
136+
document = factories.DocumentFactory(
137+
content=get_ydoc_with_mages([image_key1]),
138+
attachments=[image_key1],
139+
users=[(user, "editor")],
140+
)
141+
142+
factories.DocumentFactory(attachments=[image_key2], users=[user])
143+
144+
response = client.put(
145+
f"/api/v1.0/documents/{document.id!s}/",
146+
{"content": get_ydoc_with_mages([image_key1, image_key2, image_key2])},
147+
format="json",
148+
)
149+
assert response.status_code == 200
150+
151+
document.refresh_from_db()
152+
assert len(document.attachments) == 2
153+
assert set(document.attachments) == {image_key1, image_key2}

src/backend/core/tests/test_utils_base64_yjs_to_text.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import y_py
77

8-
from core import models, utils
8+
from core import utils
99
from core.utils import base64_yjs_to_text
1010

1111

src/backend/core/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ def base64_yjs_to_text(base64_string):
2828
return soup.get_text(separator=" ").strip()
2929

3030

31-
def extract_attachments(document):
31+
def extract_attachments(content):
3232
"""Helper method to extract media paths from a document's content."""
33-
xml_content = base64_yjs_to_xml(document.content)
33+
if not content:
34+
return []
35+
xml_content = base64_yjs_to_xml(content)
3436
return re.findall(enums.MEDIA_STORAGE_URL_EXTRACT, xml_content)

0 commit comments

Comments
 (0)