Skip to content

Commit f654039

Browse files
authored
fix: don't wrap HTML data with newlines when serializing for LC (#35532)
When serializing to OLX, the Learning Core runtime wraps HTML content in CDATA to avoid having to escape every individual `<`, `>`, and `&`. The runtime also puts newlines around the content within the CDATA, So, given HTML content `...`, we get `<![CDATA[\n...\n]]>`. The problem is that every time you serialize an HTML block to OLX, it adds another pair of newlines. These newlines aren't visible to the end users, but they do make it so that importing and exporting content never reached a stable, aka "canonical" form. It also makes unit testing difficult, because the value of `html_block.data` becomes a moving target. We do not believe these newlines are necessary, so we have removed them from the `CDATA` block, and added a unit test to ensure that HTML blocks having a canonical serialization. Closes: #35525
1 parent 9ae65bb commit f654039

File tree

3 files changed

+84
-2
lines changed

3 files changed

+84
-2
lines changed

openedx/core/djangoapps/content_libraries/tests/test_runtime.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44
import json
55
from gettext import GNUTranslations
6+
from django.test import TestCase
67

78
from completion.test_utils import CompletionWaffleTestMixin
89
from django.db import connections, transaction
@@ -24,6 +25,7 @@
2425
from openedx.core.djangoapps.dark_lang.models import DarkLangConfig
2526
from openedx.core.djangoapps.xblock import api as xblock_api
2627
from openedx.core.djangolib.testing.utils import skip_unless_lms, skip_unless_cms
28+
from openedx.core.lib.xblock_serializer import api as serializer_api
2729
from common.djangoapps.student.tests.factories import UserFactory
2830

2931

@@ -59,6 +61,86 @@ def setUp(self):
5961
)
6062

6163

64+
@skip_unless_cms
65+
class ContentLibraryOlxTests(ContentLibraryContentTestMixin, TestCase):
66+
"""
67+
Basic test of the Learning-Core-based XBlock serialization-deserialization, using XBlocks in a content library.
68+
"""
69+
70+
def test_html_round_trip(self):
71+
"""
72+
Test that if we deserialize and serialize an HTMLBlock repeatedly, two things hold true:
73+
74+
1. Even if the OLX changes format, the inner content does not change format.
75+
2. The OLX settles into a stable state after 1 round trip.
76+
77+
(We are particularly testing HTML, but it would be good to confirm that these principles hold true for
78+
XBlocks in general.)
79+
"""
80+
usage_key = library_api.create_library_block(self.library.key, "html", "roundtrip").usage_key
81+
82+
# The block's actual HTML has some extraneous spaces and newlines, as well as comment.
83+
# We expect this to be preserved through the round-trips.
84+
block_content = '''\
85+
<div class="i-like-double-quotes">
86+
<div class='i-like-single-quotes'>
87+
<p> There is a space on either side of this sentence. </p>
88+
<p>\tThere is a tab on either side of this sentence.\t<p>
89+
<p>🙃There is an emoji on either side of this sentence.🙂</p>
90+
<p>There is nothing on either side of this sentence.</p>
91+
</div>
92+
<p><![CDATA[ This is an inner CDATA 🤯 Technically illegal in HTML, but let's test it anyway. ?!&<>\t ]]&gt;</p>
93+
<!-- This is a comment within the HTML. -->
94+
</div>'''
95+
96+
# The OLX containing the HTML also has some extraneous stuff, which do *not* expect to survive the round-trip.
97+
olx_1 = f'''\
98+
99+
<html
100+
display_name="Round Trip Test HTML Block"
101+
some_fake_field="some fake value"
102+
><![CDATA[{block_content}]]><!--
103+
I am an OLX comment.
104+
--></html>'''
105+
106+
# Here is what we expect the OLX to settle down to. Notable changes:
107+
# * url_name is added.
108+
# * some_fake_field is gone.
109+
# * The OLX comment is gone.
110+
# * A trailing newline is added at the end of the export.
111+
# DEVS: If you are purposefully tweaking the formatting of the xblock serializer, then it's fine to
112+
# update the value of this variable, as long as:
113+
# 1. the {block_content} remains unchanged, and
114+
# 2. the canonical_olx remains stable through the 2nd round trip.
115+
canonical_olx = (
116+
f'<html url_name="roundtrip" display_name="Round Trip Test HTML Block"><![CDATA[{block_content}]]></html>\n'
117+
)
118+
119+
# Save the block to LC, and re-load it.
120+
library_api.set_library_block_olx(usage_key, olx_1)
121+
library_api.publish_changes(self.library.key)
122+
block_saved_1 = xblock_api.load_block(usage_key, self.staff_user)
123+
124+
# Content should be preserved...
125+
assert block_saved_1.data == block_content
126+
127+
# ...but the serialized OLX will have changed to match the 'canonical' OLX.
128+
olx_2 = serializer_api.serialize_xblock_to_olx(block_saved_1).olx_str
129+
assert olx_2 == canonical_olx
130+
131+
# Now, save that OLX back to LC, and re-load it again.
132+
library_api.set_library_block_olx(usage_key, olx_2)
133+
library_api.publish_changes(self.library.key)
134+
block_saved_2 = xblock_api.load_block(usage_key, self.staff_user)
135+
136+
# Again, content should be preserved...
137+
assert block_saved_2.data == block_saved_1.data == block_content
138+
139+
# ...and this time, the OLX should have settled too.
140+
olx_3 = serializer_api.serialize_xblock_to_olx(block_saved_2).olx_str
141+
assert olx_3 == olx_2 == canonical_olx
142+
143+
62144
class ContentLibraryRuntimeTests(ContentLibraryContentTestMixin):
63145
"""
64146
Basic tests of the Learning-Core-based XBlock runtime using XBlocks in a

openedx/core/djangoapps/content_staging/tests/test_clipboard.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def test_copy_html(self):
159159
<html url_name="toyhtml" display_name="Text"><![CDATA[
160160
<a href='/static/handouts/sample_handout.txt'>Sample</a>
161161
]]></html>
162-
""").lstrip()
162+
""").replace("\n", "") + "\n" # No newlines, expect one trailing newline.
163163

164164
# Now if we GET the clipboard again, the GET response should exactly equal the last POST response:
165165
assert client.get(CLIPBOARD_ENDPOINT).json() == response_data

openedx/core/lib/xblock_serializer/block_serializer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def _serialize_html_block(self, block) -> etree.Element:
133133

134134
# Escape any CDATA special chars
135135
escaped_block_data = block.data.replace("]]>", "]]&gt;")
136-
olx_node.text = etree.CDATA("\n" + escaped_block_data + "\n")
136+
olx_node.text = etree.CDATA(escaped_block_data)
137137
return olx_node
138138

139139

0 commit comments

Comments
 (0)