Skip to content

Commit

Permalink
more detailed encoding checks
Browse files Browse the repository at this point in the history
  • Loading branch information
rbri committed Jul 31, 2024
1 parent 857076e commit 9a72f0b
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 85 deletions.
3 changes: 3 additions & 0 deletions checkstyle_suppressions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@
<suppress checks="LineLength" files="XMLSerializer.java" lines="1-30"/>
<suppress checks="LineLength" files="XMLSerializerTest.java"/>
<suppress checks="LineLength" files="XSLProcessorTest.java"/>
<suppress checks="LineLength" files="XMLHttpRequestResponseAsTextEncodingTest.java"/>
<suppress checks="LineLength" files="XMLHttpRequestResponseAsXMLEncodingTest.java"/>
<suppress checks="LineLength" files="XMLHttpRequestResponseTextEncodingTest.java"/>
<suppress checks="LineLength" files="XMLHttpRequestResponseXMLEncodingTest.java"/>

<suppress checks="MethodName" files="HtmlPageEncodingTest.java"/>
<suppress checks="MethodName" files="CssStyleSheetEncodingTest.java"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Locale;

import org.apache.commons.io.ByteOrderMark;
import org.apache.commons.lang3.ArrayUtils;
Expand All @@ -44,6 +45,8 @@ public enum TestCharset {
UTF8("UTF8", UTF_8),
/** iso 8859 1. */
ISO88591("ISO88591", ISO_8859_1),
/** windows-1250. */
WINDOWS1250("WINDOWS1250", Charset.forName("windows-1250")),
/** gb 2312. */
GB2312("GB2312", Charset.forName("GB2312"));

Expand Down Expand Up @@ -114,4 +117,12 @@ protected void setupXmlResponse(final String xml, final String bom, final TestMi
getMockWebConnection().setResponse(URL_SECOND, xml, mimeTypeXml.getMimeType(),
charsetXmlResponseHeader == null ? null : charsetXmlResponseHeader.getCharset());
}

protected static String escape(final String str) {
final StringBuilder res = new StringBuilder();
for (final char c : str.toCharArray()) {
res.append("\\u").append(String.format("%04X", (int) c).toLowerCase(Locale.ROOT));
}
return res.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ public static Collection<Object[]> data() throws Exception {

final String[] xmlEncodingHeaders = {"", "utf8"};
final TestCharset[] charsetHtmlResponseHeaders =
new TestCharset[] {null, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.GB2312};
new TestCharset[] {null, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.WINDOWS1250, TestCharset.GB2312};
final TestMimeType[] mimeTypeXmls = {TestMimeType.EMPTY, TestMimeType.XML, TestMimeType.PLAIN};
final TestCharset[] charsetXmlResponseHeaders =
new TestCharset[] {null, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.GB2312};
new TestCharset[] {null, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.WINDOWS1250, TestCharset.GB2312};
final String[] boms = {null, BOM_UTF_8, BOM_UTF_16LE, BOM_UTF_16BE};

for (final Object xmlEncodingHeader : xmlEncodingHeaders) {
Expand Down Expand Up @@ -139,13 +139,23 @@ private void responseText(
+ " <head>\n"
+ " <script>\n"
+ LOG_TEXTAREA_FUNCTION

+ " function unicodeEscape(str) {\n"
+ " let result = '', index = 0, charCode, escape;\n"
+ " while (!isNaN(charCode = str.charCodeAt(index++))) {\n"
+ " escape = charCode.toString(16);\n"
+ " result += '\\\\u' + ('0000' + escape).slice(-4);\n"
+ " }\n"
+ " return result;\n"
+ " }\n"

+ " function test() {\n"
+ " var request = new XMLHttpRequest();\n"
+ " request.onreadystatechange = () => {\n"
+ " if (request.readyState === 4) {\n"
+ " let txt = request.response;\n"
+ " if (txt == null) { log('null'); return; }\n"
+ " log(txt);\n"
+ " log(unicodeEscape(txt));\n"
+ " }\n"
+ " }\n"

Expand All @@ -162,7 +172,7 @@ private void responseText(
final String xml = "<?xml version=\"1.0\" " + xmlEnc + "?>"
+ "<htmlunit>"
+ "<c1>a</c1>"
+ "<c2>\u00E4</c2>"
+ "<c2>\u008A\u009A\u00E4\u00A9</c2>"
+ "<c3>\u0623\u0647\u0644\u0627\u064B</c3>"
+ "<c4>\u043C\u0438\u0440</c4>"
+ "<c5>\u623F\u95F4</c5>"
Expand All @@ -171,36 +181,41 @@ private void responseText(
String[] expected = getExpectedAlerts();
if (expected == null || expected.length == 0) {
expected = new String[] {
"<?xml version=\"1.0\" "
+ xmlEnc
+ "?><htmlunit><c1>a</c1><c2>ä</c2><c3>?????</c3><c4>???</c4><c5>??</c5></htmlunit>"};
escape("<?xml version=\"1.0\" " + xmlEnc + "?>")
+ "\\u003c\\u0068\\u0074\\u006d\\u006c\\u0075\\u006e\\u0069\\u0074\\u003e\\u003c\\u0063\\u0031\\u003e\\u0061\\u003c\\u002f\\u0063\\u0031\\u003e\\u003c\\u0063\\u0032\\u003e\\u0160\\u0161\\u00e4\\u00a9\\u003c\\u002f\\u0063\\u0032\\u003e\\u003c\\u0063\\u0033\\u003e\\u003f\\u003f\\u003f\\u003f\\u003f\\u003c\\u002f\\u0063\\u0033\\u003e\\u003c\\u0063\\u0034\\u003e\\u003f\\u003f\\u003f\\u003c\\u002f\\u0063\\u0034\\u003e\\u003c\\u0063\\u0035\\u003e\\u003f\\u003f\\u003c\\u002f\\u0063\\u0035\\u003e\\u003c\\u002f\\u0068\\u0074\\u006d\\u006c\\u0075\\u006e\\u0069\\u0074\\u003e"};

if (TestCharset.UTF8.equals(charsetXmlResponseHeader) || bom != null) {
expected = new String[] {
"<?xml version=\"1.0\" "
+ xmlEnc
+ "?><htmlunit><c1>a</c1><c2>ä</c2><c3>أهلاً</c3><c4>мир</c4><c5>房间</c5></htmlunit>"};
escape("<?xml version=\"1.0\" " + xmlEnc + "?>")
+ "\\u003c\\u0068\\u0074\\u006d\\u006c\\u0075\\u006e\\u0069\\u0074\\u003e\\u003c\\u0063\\u0031\\u003e\\u0061\\u003c\\u002f\\u0063\\u0031\\u003e\\u003c\\u0063\\u0032\\u003e\\u008a\\u009a\\u00e4\\u00a9\\u003c\\u002f\\u0063\\u0032\\u003e\\u003c\\u0063\\u0033\\u003e\\u0623\\u0647\\u0644\\u0627\\u064b\\u003c\\u002f\\u0063\\u0033\\u003e\\u003c\\u0063\\u0034\\u003e\\u043c\\u0438\\u0440\\u003c\\u002f\\u0063\\u0034\\u003e\\u003c\\u0063\\u0035\\u003e\\u623f\\u95f4\\u003c\\u002f\\u0063\\u0035\\u003e\\u003c\\u002f\\u0068\\u0074\\u006d\\u006c\\u0075\\u006e\\u0069\\u0074\\u003e"};
}
else if (TestMimeType.EMPTY.equals(mimeTypeXml)) {
if (TestCharset.GB2312.equals(charsetXmlResponseHeader)) {
expected = new String[] {
"<?xml version=\"1.0\" "
+ xmlEnc
+ "?><htmlunit><c1>a</c1><c2>?</c2><c3>?????</c3><c4>�ާڧ�</c4><c5>����</c5></htmlunit>"};
escape("<?xml version=\"1.0\" " + xmlEnc + "?>")
+ "\\u003c\\u0068\\u0074\\u006d\\u006c\\u0075\\u006e\\u0069\\u0074\\u003e\\u003c\\u0063\\u0031\\u003e\\u0061\\u003c\\u002f\\u0063\\u0031\\u003e\\u003c\\u0063\\u0032\\u003e\\u003f\\u003f\\u003f\\u003f\\u003c\\u002f\\u0063\\u0032\\u003e\\u003c\\u0063\\u0033\\u003e\\u003f\\u003f\\u003f\\u003f\\u003f\\u003c\\u002f\\u0063\\u0033\\u003e\\u003c\\u0063\\u0034\\u003e\\ufffd\\u07a7\\u06a7\\ufffd\\u003c\\u002f\\u0063\\u0034\\u003e\\u003c\\u0063\\u0035\\u003e\\ufffd\\ufffd\\ufffd\\ufffd\\u003c\\u002f\\u0063\\u0035\\u003e\\u003c\\u002f\\u0068\\u0074\\u006d\\u006c\\u0075\\u006e\\u0069\\u0074\\u003e"};
}
else if (TestCharset.WINDOWS1250.equals(charsetXmlResponseHeader)) {
expected = new String[] {
escape("<?xml version=\"1.0\" " + xmlEnc + "?>")
+ "\\u003c\\u0068\\u0074\\u006d\\u006c\\u0075\\u006e\\u0069\\u0074\\u003e\\u003c\\u0063\\u0031\\u003e\\u0061\\u003c\\u002f\\u0063\\u0031\\u003e\\u003c\\u0063\\u0032\\u003e\\u003f\\u003f\\ufffd\\u003c\\u002f\\u0063\\u0032\\u003e\\u003c\\u0063\\u0033\\u003e\\u003f\\u003f\\u003f\\u003f\\u003f\\u003c\\u002f\\u0063\\u0033\\u003e\\u003c\\u0063\\u0034\\u003e\\u003f\\u003f\\u003f\\u003c\\u002f\\u0063\\u0034\\u003e\\u003c\\u0063\\u0035\\u003e\\u003f\\u003f\\u003c\\u002f\\u0063\\u0035\\u003e\\u003c\\u002f\\u0068\\u0074\\u006d\\u006c\\u0075\\u006e\\u0069\\u0074\\u003e"};
}
else if (null == charsetXmlResponseHeader || TestCharset.ISO88591.equals(charsetXmlResponseHeader)) {
expected = new String[] {
"<?xml version=\"1.0\" "
+ xmlEnc
+ "?><htmlunit><c1>a</c1><c2>�</c2><c3>?????</c3><c4>???</c4><c5>??</c5></htmlunit>"};
escape("<?xml version=\"1.0\" " + xmlEnc + "?>")
+ "\\u003c\\u0068\\u0074\\u006d\\u006c\\u0075\\u006e\\u0069\\u0074\\u003e\\u003c\\u0063\\u0031\\u003e\\u0061\\u003c\\u002f\\u0063\\u0031\\u003e\\u003c\\u0063\\u0032\\u003e\\ufffd\\ufffd\\ufffd\\u003c\\u002f\\u0063\\u0032\\u003e\\u003c\\u0063\\u0033\\u003e\\u003f\\u003f\\u003f\\u003f\\u003f\\u003c\\u002f\\u0063\\u0033\\u003e\\u003c\\u0063\\u0034\\u003e\\u003f\\u003f\\u003f\\u003c\\u002f\\u0063\\u0034\\u003e\\u003c\\u0063\\u0035\\u003e\\u003f\\u003f\\u003c\\u002f\\u0063\\u0035\\u003e\\u003c\\u002f\\u0068\\u0074\\u006d\\u006c\\u0075\\u006e\\u0069\\u0074\\u003e"};
}
}
else if (TestMimeType.PLAIN.equals(mimeTypeXml) || TestMimeType.XML.equals(mimeTypeXml)) {
if (TestCharset.GB2312.equals(charsetXmlResponseHeader)) {
expected = new String[] {
"<?xml version=\"1.0\" "
+ xmlEnc
+ "?><htmlunit><c1>a</c1><c2>?</c2><c3>?????</c3><c4>мир</c4><c5>房间</c5></htmlunit>"};
escape("<?xml version=\"1.0\" " + xmlEnc + "?>")
+ "\\u003c\\u0068\\u0074\\u006d\\u006c\\u0075\\u006e\\u0069\\u0074\\u003e\\u003c\\u0063\\u0031\\u003e\\u0061\\u003c\\u002f\\u0063\\u0031\\u003e\\u003c\\u0063\\u0032\\u003e\\u003f\\u003f\\u003f\\u003f\\u003c\\u002f\\u0063\\u0032\\u003e\\u003c\\u0063\\u0033\\u003e\\u003f\\u003f\\u003f\\u003f\\u003f\\u003c\\u002f\\u0063\\u0033\\u003e\\u003c\\u0063\\u0034\\u003e\\u043c\\u0438\\u0440\\u003c\\u002f\\u0063\\u0034\\u003e\\u003c\\u0063\\u0035\\u003e\\u623f\\u95f4\\u003c\\u002f\\u0063\\u0035\\u003e\\u003c\\u002f\\u0068\\u0074\\u006d\\u006c\\u0075\\u006e\\u0069\\u0074\\u003e"};
}
else if (TestCharset.WINDOWS1250.equals(charsetXmlResponseHeader)) {
expected = new String[] {
escape("<?xml version=\"1.0\" " + xmlEnc + "?>")
+ "\\u003c\\u0068\\u0074\\u006d\\u006c\\u0075\\u006e\\u0069\\u0074\\u003e\\u003c\\u0063\\u0031\\u003e\\u0061\\u003c\\u002f\\u0063\\u0031\\u003e\\u003c\\u0063\\u0032\\u003e\\u003f\\u003f\\u00e4\\u00a9\\u003c\\u002f\\u0063\\u0032\\u003e\\u003c\\u0063\\u0033\\u003e\\u003f\\u003f\\u003f\\u003f\\u003f\\u003c\\u002f\\u0063\\u0033\\u003e\\u003c\\u0063\\u0034\\u003e\\u003f\\u003f\\u003f\\u003c\\u002f\\u0063\\u0034\\u003e\\u003c\\u0063\\u0035\\u003e\\u003f\\u003f\\u003c\\u002f\\u0063\\u0035\\u003e\\u003c\\u002f\\u0068\\u0074\\u006d\\u006c\\u0075\\u006e\\u0069\\u0074\\u003e"};
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ public static Collection<Object[]> data() throws Exception {

final String[] xmlEncodingHeaders = {"", "utf8"};
final TestCharset[] charsetHtmlResponseHeaders =
new TestCharset[] {null, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.GB2312};
new TestCharset[] {null, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.WINDOWS1250, TestCharset.GB2312};
final TestMimeType[] mimeTypeXmls = {TestMimeType.EMPTY, TestMimeType.XML, TestMimeType.PLAIN};
final TestCharset[] charsetXmlResponseHeaders =
new TestCharset[] {null, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.GB2312};
new TestCharset[] {null, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.WINDOWS1250, TestCharset.GB2312};
final String[] boms = {null, BOM_UTF_8, BOM_UTF_16LE, BOM_UTF_16BE};

for (final Object xmlEncodingHeader : xmlEncodingHeaders) {
Expand Down Expand Up @@ -139,18 +139,28 @@ private void responseText(
+ " <head>\n"
+ " <script>\n"
+ LOG_TEXTAREA_FUNCTION

+ " function unicodeEscape(str) {\n"
+ " let result = '', index = 0, charCode, escape;\n"
+ " while (!isNaN(charCode = str.charCodeAt(index++))) {\n"
+ " escape = charCode.toString(16);\n"
+ " result += '\\\\u' + ('0000' + escape).slice(-4);\n"
+ " }\n"
+ " return result;\n"
+ " }\n"

+ " function test() {\n"
+ " var request = new XMLHttpRequest();\n"
+ " request.responseType = 'document';"
+ " request.responseType = 'document';\n"
+ " request.onreadystatechange = () => {\n"
+ " if (request.readyState === 4) {\n"
+ " let xml = request.response;\n"
+ " if (xml == null) { log('null'); return; }\n"
+ " log(xml.getElementsByTagName('c1')[0].childNodes[0].nodeValue);\n"
+ " log(xml.getElementsByTagName('c2')[0].childNodes[0].nodeValue);\n"
+ " log(xml.getElementsByTagName('c3')[0].childNodes[0].nodeValue);\n"
+ " log(xml.getElementsByTagName('c4')[0].childNodes[0].nodeValue);\n"
+ " log(xml.getElementsByTagName('c5')[0].childNodes[0].nodeValue);\n"
+ " log(unicodeEscape(xml.getElementsByTagName('c1')[0].childNodes[0].nodeValue));\n"
+ " log(unicodeEscape(xml.getElementsByTagName('c2')[0].childNodes[0].nodeValue));\n"
+ " log(unicodeEscape(xml.getElementsByTagName('c3')[0].childNodes[0].nodeValue));\n"
+ " log(unicodeEscape(xml.getElementsByTagName('c4')[0].childNodes[0].nodeValue));\n"
+ " log(unicodeEscape(xml.getElementsByTagName('c5')[0].childNodes[0].nodeValue));\n"
+ " }\n"
+ " }\n"

Expand All @@ -167,21 +177,21 @@ private void responseText(
final String xml = "<?xml version=\"1.0\" " + xmlEnc + "?>"
+ "<htmlunit>"
+ "<c1>a</c1>"
+ "<c2>\u00E4</c2>"
+ "<c2>\u008A\u009A\u00E4\u00A9</c2>"
+ "<c3>\u0623\u0647\u0644\u0627\u064B</c3>"
+ "<c4>\u043C\u0438\u0440</c4>"
+ "<c5>\u623F\u95F4</c5>"
+ "</htmlunit>";

String[] expected = getExpectedAlerts();
if (expected == null || expected.length == 0) {
expected = new String[] {"a", "�", "?????", "???", "??"};
expected = new String[] {"\\u0061","\\u0160\\u0161\\u00e4\\u00a9", "\\u003f\\u003f\\u003f\\u003f\\u003f", "\\u003f\\u003f\\u003f", "\\u003f\\u003f"};

if (TestMimeType.PLAIN.equals(mimeTypeXml)) {
expected = new String[] {"null"};
}
else if (TestCharset.UTF8.equals(charsetXmlResponseHeader) || bom != null) {
expected = new String[] {"a", "ä", "أهلاً", "мир", "房间"};
expected = new String[] {"\\u0061", "\\u008a\\u009a\\u00e4\\u00a9", "\\u0623\\u0647\\u0644\\u0627\\u064b", "\\u043c\\u0438\\u0440", "\\u623f\\u95f4"};
}
else if (TestMimeType.EMPTY.equals(mimeTypeXml)) {
/* real FF - ignored for the moment
Expand All @@ -191,24 +201,21 @@ else if (TestMimeType.EMPTY.equals(mimeTypeXml)) {
}
else */
if (TestCharset.GB2312.equals(charsetXmlResponseHeader)) {
expected = new String[] {"a", "?", "?????", "�ާڧ�", "����"};
expected = new String[] {"\\u0061", "\\u003f\\u003f\\u003f\\u003f", "\\u003f\\u003f\\u003f\\u003f\\u003f", "\\ufffd\\u07a7\\u06a7\\ufffd", "\\ufffd\\ufffd\\ufffd\\ufffd"};
}
}
else if (TestMimeType.XML.equals(mimeTypeXml)) {
if (TestCharset.GB2312.equals(charsetXmlResponseHeader)) {
expected = new String[] {"a", "?", "?????", "мир", "房间"};
else if (TestCharset.WINDOWS1250.equals(charsetXmlResponseHeader)) {
expected = new String[] {"\\u0061", "\\u003f\\u003f\\ufffd", "\\u003f\\u003f\\u003f\\u003f\\u003f", "\\u003f\\u003f\\u003f", "\\u003f\\u003f"};
}
else if (null == charsetXmlResponseHeader
|| TestCharset.ISO88591.equals(charsetXmlResponseHeader)) {
expected = new String[] {"a", "ä", "?????", "???", "??"};
else if (null == charsetXmlResponseHeader || TestCharset.ISO88591.equals(charsetXmlResponseHeader)) {
expected = new String[] {"\\u0061", "\\ufffd\\ufffd\\ufffd", "\\u003f\\u003f\\u003f\\u003f\\u003f", "\\u003f\\u003f\\u003f", "\\u003f\\u003f"};
}
}
else {
else if (TestMimeType.XML.equals(mimeTypeXml)) {
if (TestCharset.GB2312.equals(charsetXmlResponseHeader)) {
expected = new String[] {"a", "?", "?????", "мир", "房间"};
expected = new String[] {"\\u0061", "\\u003f\\u003f\\u003f\\u003f", "\\u003f\\u003f\\u003f\\u003f\\u003f", "\\u043c\\u0438\\u0440", "\\u623f\\u95f4"};
}
else if (TestCharset.ISO88591.equals(charsetXmlResponseHeader)) {
expected = new String[] {"a", "ä", "?????", "???", "??"};
else if (TestCharset.WINDOWS1250.equals(charsetXmlResponseHeader)) {
expected = new String[] {"\\u0061", "\\u003f\\u003f\\u00e4\\u00a9", "\\u003f\\u003f\\u003f\\u003f\\u003f","\\u003f\\u003f\\u003f", "\\u003f\\u003f"};
}
}
}
Expand Down
Loading

0 comments on commit 9a72f0b

Please sign in to comment.