diff --git a/rhino/src/main/java/org/mozilla/javascript/TokenStream.java b/rhino/src/main/java/org/mozilla/javascript/TokenStream.java
index f133ac8f31..f7676e3a06 100644
--- a/rhino/src/main/java/org/mozilla/javascript/TokenStream.java
+++ b/rhino/src/main/java/org/mozilla/javascript/TokenStream.java
@@ -1127,6 +1127,8 @@ final int getToken() throws IOException {
String str = getStringFromBuffer();
this.string = internString(str);
+ cursor = sourceCursor;
+ tokenEnd = cursor;
return Token.STRING;
}
@@ -1330,6 +1332,7 @@ && matchChar('.')) {
lookForSlash = true;
} else if (c == '/') {
if (lookForSlash) {
+ cursor = sourceCursor;
tokenEnd = cursor;
return Token.COMMENT;
}
@@ -1653,6 +1656,8 @@ int readTemplateLiteral(boolean isTaggedLiteral) throws IOException {
case '`':
rawString.setLength(rawString.length() - 1); // don't include "`"
this.string = hasInvalidEscapeSequences ? null : getStringFromBuffer();
+ cursor = sourceCursor;
+ tokenEnd = cursor;
return Token.TEMPLATE_LITERAL;
case '$':
if (matchTemplateLiteralChar('{')) {
@@ -1907,6 +1912,8 @@ int getNextXMLToken() throws IOException {
if (!xmlIsTagContent && xmlOpenTagsCount == 0) {
this.string = getStringFromBuffer();
+ cursor = sourceCursor;
+ tokenEnd = cursor;
return Token.XMLEND;
}
} else {
diff --git a/tests/src/test/java/org/mozilla/javascript/tests/ParserTest.java b/tests/src/test/java/org/mozilla/javascript/tests/ParserTest.java
index c9b6aa9480..336dcf2c9a 100644
--- a/tests/src/test/java/org/mozilla/javascript/tests/ParserTest.java
+++ b/tests/src/test/java/org/mozilla/javascript/tests/ParserTest.java
@@ -48,11 +48,15 @@
import org.mozilla.javascript.ast.StringLiteral;
import org.mozilla.javascript.ast.SwitchCase;
import org.mozilla.javascript.ast.SwitchStatement;
+import org.mozilla.javascript.ast.TemplateCharacters;
+import org.mozilla.javascript.ast.TemplateLiteral;
import org.mozilla.javascript.ast.TryStatement;
import org.mozilla.javascript.ast.UpdateExpression;
import org.mozilla.javascript.ast.VariableDeclaration;
import org.mozilla.javascript.ast.VariableInitializer;
import org.mozilla.javascript.ast.WithStatement;
+import org.mozilla.javascript.ast.XmlFragment;
+import org.mozilla.javascript.ast.XmlLiteral;
import org.mozilla.javascript.testing.TestErrorReporter;
public class ParserTest {
@@ -1203,14 +1207,52 @@ public void parseUnicodeFormatName() {
}
@Test
- public void testParseUnicodeMultibyteCharacter() {
+ public void parseUnicodeMultibyteCharacter() {
AstRoot root = parse("\uD842\uDFB7");
AstNode first = ((ExpressionStatement) root.getFirstChild()).getExpression();
assertEquals("𠮷", first.getString());
}
@Test
- public void testParseUnicodeIdentifierPartWhichIsNotJavaIdentifierPart() {
+ public void parseMultibyteCharacter_StringLiteral() {
+ AstRoot root = parse("'\uD83C\uDF1F'");
+ StringLiteral first =
+ (StringLiteral) ((ExpressionStatement) root.getFirstChild()).getExpression();
+ assertEquals(4, first.getLength());
+ assertEquals("'🌟'", first.getValue(true));
+ }
+
+ @Test
+ public void parseMultibyteCharacter_TemplateLiteral() {
+ AstRoot root = parse("`\uD83C\uDF1F`");
+ TemplateLiteral first =
+ (TemplateLiteral) ((ExpressionStatement) root.getFirstChild()).getExpression();
+ TemplateCharacters templateCharacter = (TemplateCharacters) first.getElement(0);
+ assertEquals(2, templateCharacter.getLength());
+ assertEquals("🌟", templateCharacter.getValue());
+ assertEquals(4, first.getLength());
+ }
+
+ @Test
+ public void parseMultibyteCharacter_XMLLiteral() {
+ AstRoot root = parse("\uD83C\uDF1F");
+ XmlLiteral first =
+ (XmlLiteral) ((ExpressionStatement) root.getFirstChild()).getExpression();
+ XmlFragment fragment = first.getFragments().get(0);
+ assertEquals(13, fragment.getLength());
+ assertEquals("🌟", fragment.toSource());
+ }
+
+ @Test
+ public void parseMultibyteCharacter_Comment() {
+ AstRoot root = parse("/*\uD83C\uDF1F*/");
+ Comment comment = root.getComments().first();
+ assertEquals(6, comment.getLength());
+ assertEquals("/*🌟*/", comment.getValue());
+ }
+
+ @Test
+ public void parseUnicodeIdentifierPartWhichIsNotJavaIdentifierPart() {
// On the JDK 11 I'm using, Character.isUnicodeIdentifierPart(U+9FEB) returns true
// but Character.isJavaIdentifierPart(U+9FEB) returns false. On a JDK 17 results
// seem to vary, but I think it's enough to verify that TokenStream uses