From 9f673f2fe6bc01a9fc4f09093cd0e772998338d6 Mon Sep 17 00:00:00 2001 From: James Prior Date: Sun, 4 Dec 2022 16:32:56 +0000 Subject: [PATCH] Fixed `when` tag expression parsing. --- CHANGELOG.md | 4 + src/builtin/tags/case.ts | 76 ++++- src/expressions/common.ts | 10 +- src/expressions/index.ts | 1 + src/expressions/standard/index.ts | 2 + src/expressions/standard/lex.ts | 345 +++++++++++++++++++++ src/expressions/standard/parse.ts | 101 ++++++ tests/golden/golden_liquid.json | 86 ++++- tests/parse_common_expression.test.ts | 179 +++++++++++ tests/tokenize_common_expression.test.ts | 86 +++++ tests/tokenize_filtered_expression.test.ts | 6 + 11 files changed, 874 insertions(+), 22 deletions(-) create mode 100644 src/expressions/standard/index.ts create mode 100644 src/expressions/standard/lex.ts create mode 100644 src/expressions/standard/parse.ts create mode 100644 tests/parse_common_expression.test.ts create mode 100644 tests/tokenize_common_expression.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a280831..7ad47b92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Version 1.5.0 (unreleased) +**Fixes** + +- Fixed `case`/`when` tag expression parsing. `when` expressions no longer fail when presented with a string containing a comma. Handling of comma and `or` separated "sub-expressions" is now consistent with the reference implementation. + **Compatibility** - `for` tag arguments can now be separated by commas as well as whitespace. See [Shopify/liquid#1658](https://github.com/Shopify/liquid/pull/1658). diff --git a/src/builtin/tags/case.ts b/src/builtin/tags/case.ts index 28953960..877073f1 100644 --- a/src/builtin/tags/case.ts +++ b/src/builtin/tags/case.ts @@ -1,8 +1,12 @@ import { BlockNode, forcedOutput, Node, ChildNode } from "../../ast"; import { RenderContext } from "../../context"; import { Environment } from "../../environment"; -import { BooleanExpression, Literal } from "../../expression"; -import { parse } from "../../expressions/boolean/parse"; +import { + BooleanExpression, + Expression, + InfixExpression, + Literal, +} from "../../expression"; import { RenderStream } from "../../io/output_stream"; import { Tag } from "../../tag"; import { @@ -13,6 +17,14 @@ import { TOKEN_TAG, } from "../../token"; +import { + TOKEN_COMMA, + TOKEN_OR, + ExpressionTokenStream, +} from "../../expressions"; + +import { tokenize, parse } from "../../expressions/standard"; + const TAG_CASE = "case"; const TAG_ENDCASE = "endcase"; const TAG_WHEN = "when"; @@ -31,27 +43,26 @@ export class CaseTag implements Tag { TAG_ELSE, TOKEN_EOF, ]); + protected static END_CASE_BLOCK = new Set([TAG_ENDCASE]); + protected static DELIM_TOKENS = new Set([TOKEN_COMMA, TOKEN_OR]); readonly block = true; readonly name: string = TAG_CASE; readonly end: string = TAG_ENDCASE; protected nodeClass = CaseNode; - protected parseExpression( - _when: string, - obj: string, - stream: TokenStream - ): BooleanExpression { - stream.expect(TOKEN_EXPRESSION); - return parse(`${_when} == ${obj}`); - } - public parse(stream: TokenStream, environment: Environment): Node { const parser = environment.parser; const token = stream.next(); + + // Parse the case expression stream.expect(TOKEN_EXPRESSION); - const _case = stream.next().value; + const _case = this.parse_case_expression( + stream.current.value, + stream.current.index + ); + stream.next(); // Eat whitespace or junk between `case` and when/else/endcase while ( @@ -66,10 +77,14 @@ export class CaseTag implements Tag { stream.current.value === TAG_WHEN ) { const whenToken = stream.next(); - // One conditional block for every object in a comma separated list. - const whenExprs = stream.current.value - .split(",") - .map((expr) => this.parseExpression(_case, expr, stream)); + stream.expect(TOKEN_EXPRESSION); + + const whenExprs = this.parse_when_expression( + stream.current.value, + stream.current.index + ).map( + (expr) => new BooleanExpression(new InfixExpression(_case, "==", expr)) + ); const whenBlock = parser.parseBlock( stream, @@ -96,8 +111,37 @@ export class CaseTag implements Tag { parser.parseBlock(stream, CaseTag.END_CASE_BLOCK, stream.next()) ); } + + stream.expectTag(TAG_ENDCASE); return new this.nodeClass(token, whens); } + + protected parse_case_expression( + expr: string, + startIndex: number + ): Expression { + return parse(new ExpressionTokenStream(tokenize(expr, startIndex))); + } + + protected parse_when_expression( + expr: string, + startIndex: number + ): Expression[] { + const expressions: Expression[] = []; + const stream = new ExpressionTokenStream(tokenize(expr, startIndex)); + + for (;;) { + expressions.push(parse(stream)); + stream.next(); + if (CaseTag.DELIM_TOKENS.has(stream.current.kind)) { + stream.next(); + } else { + break; + } + } + + return expressions; + } } export class CaseNode implements Node { diff --git a/src/expressions/common.ts b/src/expressions/common.ts index e5dde90b..4951e556 100644 --- a/src/expressions/common.ts +++ b/src/expressions/common.ts @@ -45,6 +45,11 @@ const enum MatchGroup { IDENT_QUOTED = "identQuoted", } +export type Tokenizer = ( + expression: string, + startIndex?: number +) => Generator; + // Optional trailing question mark. export const IDENTIFIER_PATTERN = "[a-zA-Z_][\\w\\-]*\\??"; @@ -232,8 +237,3 @@ export function makeParseRange( } return _parseRangeLiteral; } - -export type Tokenizer = ( - expression: string, - startIndex?: number -) => Generator; diff --git a/src/expressions/index.ts b/src/expressions/index.ts index c9f21be7..da8930ff 100644 --- a/src/expressions/index.ts +++ b/src/expressions/index.ts @@ -6,3 +6,4 @@ export * as include from "./include"; export * as loop from "./loop"; export * as boolean_not from "./boolean_not"; export * as arguments from "./arguments"; +export * as standard from "./standard"; diff --git a/src/expressions/standard/index.ts b/src/expressions/standard/index.ts new file mode 100644 index 00000000..0b4ab9ba --- /dev/null +++ b/src/expressions/standard/index.ts @@ -0,0 +1,2 @@ +export * from "./lex"; +export * from "./parse"; diff --git a/src/expressions/standard/lex.ts b/src/expressions/standard/lex.ts new file mode 100644 index 00000000..2e81720f --- /dev/null +++ b/src/expressions/standard/lex.ts @@ -0,0 +1,345 @@ +import { LiquidSyntaxError } from "../../errors"; +import { Token } from "../../token"; + +import { + IDENTIFIER_PATTERN, + IDENT_INDEX_PATTERN, + IDENT_STRING_PATTERN, + STRING_PATTERN, + Tokenizer, +} from "../common"; + +import { + TOKEN_DOT, + TOKEN_FLOAT, + TOKEN_IDENT, + TOKEN_IDENT_INDEX, + TOKEN_IDENT_STRING, + TOKEN_INTEGER, + TOKEN_LBRACKET, + TOKEN_LPAREN, + TOKEN_RANGE, + TOKEN_RBRACKET, + TOKEN_RPAREN, + TOKEN_STRING, + TOKEN_TRUE, + TOKEN_NEWLINE, + TOKEN_SKIP, + TOKEN_ILLEGAL, + TOKEN_FALSE, + TOKEN_NIL, + TOKEN_NULL, + TOKEN_EMPTY, + TOKEN_BLANK, + TOKEN_OR, + TOKEN_COMMA, +} from "../tokens"; + +const RULES = [ + [TOKEN_IDENT_INDEX, IDENT_INDEX_PATTERN], + [TOKEN_IDENT_STRING, IDENT_STRING_PATTERN], + [TOKEN_STRING, STRING_PATTERN], + [TOKEN_RANGE, "\\.\\."], + [TOKEN_FLOAT, "-?\\d+\\.(?!\\.)\\d*"], + [TOKEN_INTEGER, "-?\\d+\\b"], + [TOKEN_DOT, "\\."], + [TOKEN_IDENT, IDENTIFIER_PATTERN], + [TOKEN_LPAREN, "\\("], + [TOKEN_RPAREN, "\\)"], + [TOKEN_LBRACKET, "\\["], + [TOKEN_RBRACKET, "]"], + [TOKEN_COMMA, ","], + [TOKEN_NEWLINE, "\\n"], + [TOKEN_SKIP, "[ \\t\\r]+"], + [TOKEN_ILLEGAL, "."], +]; + +const KEYWORDS = new Set([ + TOKEN_TRUE, + TOKEN_FALSE, + TOKEN_NIL, + TOKEN_NULL, + TOKEN_EMPTY, + TOKEN_BLANK, + TOKEN_OR, +]); + +export const RE = new RegExp( + RULES.map(([n, p]) => `(?<${n}>${p})`).join("|"), + "gs" +); + +interface IdentIndexMatch { + TOKEN_IDENT_INDEX: string; + identIndex: string; +} + +interface IdentStringMatch { + TOKEN_IDENT_STRING: string; + identQuoted: string; +} + +interface StringMatch { + TOKEN_STRING: string; + quoted: string; +} + +interface RangeMatch { + TOKEN_RANGE: string; +} + +interface FloatMatch { + TOKEN_FLOAT: string; +} + +interface IntegerMatch { + TOKEN_INTEGER: string; +} + +interface DotMatch { + TOKEN_DOT: string; +} + +interface IdentifierMatch { + TOKEN_IDENT: string; +} + +interface LParenMatch { + TOKEN_LPAREN: string; +} + +interface RParenMatch { + TOKEN_RPAREN: string; +} + +interface LBracketMatch { + TOKEN_LBRACKET: string; +} + +interface RBracketMatch { + TOKEN_RBRACKET: string; +} + +interface CommaMatch { + TOKEN_COMMA: string; +} + +interface NewlineMatch { + TOKEN_NEWLINE: string; +} + +interface SkipMatch { + TOKEN_SKIP: string; +} + +interface IllegalMatch { + TOKEN_ILLEGAL: string; +} + +type MatchGroups = Readonly< + Partial< + IdentIndexMatch & + IdentStringMatch & + StringMatch & + RangeMatch & + FloatMatch & + IntegerMatch & + DotMatch & + IdentifierMatch & + LParenMatch & + RParenMatch & + LBracketMatch & + RBracketMatch & + CommaMatch & + NewlineMatch & + SkipMatch & + IllegalMatch + > +>; + +function isIdentIndexMatch(match: MatchGroups): match is IdentIndexMatch { + return match.TOKEN_IDENT_INDEX === undefined ? false : true; +} + +function isIdentStringMatch(match: MatchGroups): match is IdentStringMatch { + return match.TOKEN_IDENT_STRING === undefined ? false : true; +} + +function isStringMatch(match: MatchGroups): match is StringMatch { + return match.TOKEN_STRING === undefined ? false : true; +} + +function isRangeMatch(match: MatchGroups): match is RangeMatch { + return match.TOKEN_RANGE === undefined ? false : true; +} + +function isFloatMatch(match: MatchGroups): match is FloatMatch { + return match.TOKEN_FLOAT === undefined ? false : true; +} + +function isIntegerMatch(match: MatchGroups): match is IntegerMatch { + return match.TOKEN_INTEGER === undefined ? false : true; +} + +function isDotMatch(match: MatchGroups): match is DotMatch { + return match.TOKEN_DOT === undefined ? false : true; +} + +function isIdentifierMatch(match: MatchGroups): match is IdentifierMatch { + return match.TOKEN_IDENT === undefined ? false : true; +} + +function isLParenMatch(match: MatchGroups): match is LParenMatch { + return match.TOKEN_LPAREN === undefined ? false : true; +} + +function isRParenMatch(match: MatchGroups): match is RParenMatch { + return match.TOKEN_RPAREN === undefined ? false : true; +} + +function isLBracketMatch(match: MatchGroups): match is LBracketMatch { + return match.TOKEN_LBRACKET === undefined ? false : true; +} + +function isRBracketMatch(match: MatchGroups): match is RBracketMatch { + return match.TOKEN_RBRACKET === undefined ? false : true; +} + +function isCommaMatch(match: MatchGroups): match is CommaMatch { + return match.TOKEN_COMMA === undefined ? false : true; +} + +function isNewlineMatch(match: MatchGroups): match is NewlineMatch { + return match.TOKEN_NEWLINE === undefined ? false : true; +} + +function isSkipMatch(match: MatchGroups): match is SkipMatch { + return match.TOKEN_SKIP === undefined ? false : true; +} + +function isIllegalMatch(match: MatchGroups): match is IllegalMatch { + return match.TOKEN_ILLEGAL === undefined ? false : true; +} + +export function makeTokenizer(re: RegExp, keywords: Set): Tokenizer { + return function* tokenize( + source: string, + startIndex: number = 0 + ): Generator { + for (const match of source.matchAll(re)) { + const groups = match.groups as MatchGroups; + if (isIdentifierMatch(groups)) { + if (keywords.has(groups.TOKEN_IDENT)) + yield new Token( + groups.TOKEN_IDENT, + match[0], + match.index + startIndex, + source + ); + else + yield new Token( + TOKEN_IDENT, + groups.TOKEN_IDENT, + match.index + startIndex, + source + ); + } else if (isIdentIndexMatch(groups)) + yield new Token( + TOKEN_IDENT_INDEX, + groups.identIndex, + match.index + startIndex, + source + ); + else if (isIdentStringMatch(groups)) + yield new Token( + TOKEN_IDENT, + groups.identQuoted, + match.index + startIndex, + source + ); + else if (isStringMatch(groups)) + yield new Token( + TOKEN_STRING, + groups.quoted, + match.index + startIndex, + source + ); + else if (isNewlineMatch(groups) || isSkipMatch(groups)) continue; + else if (isRangeMatch(groups)) + yield new Token( + TOKEN_RANGE, + groups.TOKEN_RANGE, + match.index + startIndex, + source + ); + else if (isFloatMatch(groups)) + yield new Token( + TOKEN_FLOAT, + groups.TOKEN_FLOAT, + match.index + startIndex, + source + ); + else if (isIntegerMatch(groups)) + yield new Token( + TOKEN_INTEGER, + groups.TOKEN_INTEGER, + match.index + startIndex, + source + ); + else if (isDotMatch(groups)) + yield new Token( + TOKEN_DOT, + groups.TOKEN_DOT, + match.index + startIndex, + source + ); + else if (isLParenMatch(groups)) + yield new Token( + TOKEN_LPAREN, + groups.TOKEN_LPAREN, + match.index + startIndex, + source + ); + else if (isRParenMatch(groups)) + yield new Token( + TOKEN_RPAREN, + groups.TOKEN_RPAREN, + match.index + startIndex, + source + ); + else if (isLBracketMatch(groups)) + yield new Token( + TOKEN_LBRACKET, + groups.TOKEN_LBRACKET, + match.index + startIndex, + source + ); + else if (isRBracketMatch(groups)) + yield new Token( + TOKEN_RBRACKET, + groups.TOKEN_RBRACKET, + match.index + startIndex, + source + ); + else if (isCommaMatch(groups)) + yield new Token( + TOKEN_COMMA, + groups.TOKEN_COMMA, + match.index + startIndex, + source + ); + else if (isIllegalMatch(groups)) + throw new LiquidSyntaxError( + `unexpected token '${groups.TOKEN_ILLEGAL}'`, + new Token( + TOKEN_ILLEGAL, + groups.TOKEN_ILLEGAL, + match.index + startIndex, + source + ) + ); + } + }; +} + +export const tokenize = makeTokenizer(RE, KEYWORDS); diff --git a/src/expressions/standard/parse.ts b/src/expressions/standard/parse.ts new file mode 100644 index 00000000..8e8f5448 --- /dev/null +++ b/src/expressions/standard/parse.ts @@ -0,0 +1,101 @@ +import { + makeParseRange, + parseBlank, + parseBoolean, + parseEmpty, + parseFloatLiteral, + parseIntegerLiteral, + parseNil, + parseStringLiteral, + IDENT_TOKENS, +} from "../common"; + +import { LiquidSyntaxError } from "../../errors"; +import { + Expression, + Identifier, + IdentifierPath, + IdentifierPathElement, +} from "../../expression"; + +import { + ExpressionTokenStream, + TOKEN_FLOAT, + TOKEN_IDENT, + TOKEN_INTEGER, + TOKEN_LPAREN, + TOKEN_STRING, + TOKEN_TRUE, + TOKEN_FALSE, + TOKEN_NIL, + TOKEN_EMPTY, + TOKEN_BLANK, + TOKEN_IDENT_INDEX, + TOKEN_LBRACKET, + TOKEN_RBRACKET, + TOKEN_DOT, +} from "../tokens"; + +type parseFunc = (stream: ExpressionTokenStream) => Expression; + +function parseIdentifier(stream: ExpressionTokenStream): Identifier { + stream.expect(TOKEN_IDENT); + + if (!IDENT_TOKENS.has(stream.peek.kind)) { + return new Identifier(stream.current.value, []); + } + + const root = stream.next().value; + const path: IdentifierPath = []; + + for (;;) { + switch (stream.current.kind) { + case TOKEN_IDENT: + path.push(new IdentifierPathElement(stream.current.value)); + break; + case TOKEN_IDENT_INDEX: + path.push(new IdentifierPathElement(Number(stream.current.value))); + break; + case TOKEN_LBRACKET: + stream.next(); + path.push(parseIdentifier(stream)); + stream.next(); + stream.expect(TOKEN_RBRACKET); + break; + case TOKEN_DOT: + break; + } + + if (IDENT_TOKENS.has(stream.peek.kind)) { + stream.next(); + } else { + break; + } + } + + return new Identifier(root, path); +} + +const LITERAL_OR_IDENT_MAP = new Map([ + [TOKEN_IDENT, parseIdentifier], + [TOKEN_STRING, parseStringLiteral], + [TOKEN_INTEGER, parseIntegerLiteral], + [TOKEN_FLOAT, parseFloatLiteral], + [TOKEN_NIL, parseNil], + [TOKEN_TRUE, parseBoolean], + [TOKEN_FALSE, parseBoolean], + [TOKEN_BLANK, parseBlank], + [TOKEN_EMPTY, parseEmpty], + [TOKEN_LPAREN, makeParseRange(parse)], +]); + +export function parse(stream: ExpressionTokenStream): Expression { + const func = LITERAL_OR_IDENT_MAP.get(stream.current.kind); + if (!func) + throw new LiquidSyntaxError( + `unexpected '${stream.current.value}'`, + stream.current + ); + + return func(stream); +} diff --git a/tests/golden/golden_liquid.json b/tests/golden/golden_liquid.json index 70a53419..edb927c8 100644 --- a/tests/golden/golden_liquid.json +++ b/tests/golden/golden_liquid.json @@ -1,5 +1,5 @@ { - "version": "0.12.0", + "version": "0.13.0", "test_groups": [ { "name": "liquid.golden.abs_filter", @@ -797,6 +797,28 @@ "error": false, "strict": false }, + { + "name": "comma string literal", + "template": "{% case foo %}{% when 'foo' %}bar{% when ',' %}comma{% endcase %}", + "want": "comma", + "context": { + "foo": "," + }, + "partials": {}, + "error": false, + "strict": false + }, + { + "name": "empty when tag", + "template": "{% case foo %}{% when %}bar{% endcase %}", + "want": "", + "context": { + "foo": "bar" + }, + "partials": {}, + "error": true, + "strict": false + }, { "name": "evaluate multiple matching blocks", "template": "{% case title %}{% when 'Hello' %}foo{% when a, 'Hello' %}bar{% endcase %}", @@ -809,6 +831,28 @@ "error": false, "strict": false }, + { + "name": "mix or and comma separated when expression", + "template": "{% case title %}{% when 'foo' %}foo{% when 'bar' or 'Hello', 'Hello' %}bar{% endcase %}", + "want": "barbar", + "context": { + "title": "Hello" + }, + "partials": {}, + "error": false, + "strict": false + }, + { + "name": "mix or and comma separated when expression", + "template": "{% case title %}{% when 'foo' %}foo{% when 'bar' or 'Hello', 'Hello' %}bar{% endcase %}", + "want": "barbar", + "context": { + "title": "Hello" + }, + "partials": {}, + "error": false, + "strict": false + }, { "name": "name not in scope", "template": "{% case nosuchthing %}{% when 'foo' %}foo{% when 'bar' %}bar{% endcase %}", @@ -851,6 +895,17 @@ "error": false, "strict": false }, + { + "name": "or separated when expression", + "template": "{% case title %}{% when 'foo' %}foo{% when 'bar' or 'Hello' %}bar{% endcase %}", + "want": "bar", + "context": { + "title": "Hello" + }, + "partials": {}, + "error": false, + "strict": false + }, { "name": "simple case/when", "template": "{% case title %}{% when 'foo' %}foo{% when 'Hello' %}bar{% endcase %}", @@ -874,6 +929,17 @@ "error": false, "strict": false }, + { + "name": "unexpected when token", + "template": "{% case title %}{% when 'foo' %}foo{% when 'bar' and 'Hello', 'Hello' %}bar{% endcase %}", + "want": "", + "context": { + "title": "Hello" + }, + "partials": {}, + "error": false, + "strict": false + }, { "name": "whitespace", "template": "{% case title %} \n\t{% when 'foo' %}foo\n{% when 'Hello' %}bar{% endcase %}", @@ -2299,6 +2365,15 @@ "error": false, "strict": false }, + { + "name": "comma separated arguments", + "template": "{% for i in (1..6), limit: 4, offset: 2 %}{{ i }} {% endfor %}", + "want": "3 4 5 6 ", + "context": {}, + "partials": {}, + "error": false, + "strict": false + }, { "name": "continue", "template": "{% for tag in product.tags %}{% if tag == 'sports' %}{% continue %}{% else %}{{ tag }} {% endif %}{% else %}no images{% endfor %}", @@ -2951,6 +3026,15 @@ "partials": {}, "error": false, "strict": false + }, + { + "name": "some comma separated arguments", + "template": "{% for i in (1..6) limit: 4, offset: 2, %}{{ i }} {% endfor %}", + "want": "3 4 5 6 ", + "context": {}, + "partials": {}, + "error": false, + "strict": false } ] }, diff --git a/tests/parse_common_expression.test.ts b/tests/parse_common_expression.test.ts new file mode 100644 index 00000000..85903170 --- /dev/null +++ b/tests/parse_common_expression.test.ts @@ -0,0 +1,179 @@ +import { tokenize } from "../src/expressions/standard"; +import { parse } from "../src/expressions/standard"; + +import { + ExpressionTokenStream, + TOKEN_IN, + TOKEN_LPAREN, +} from "../src/expressions/tokens"; + +import { + FloatLiteral, + Identifier, + IdentifierPathElement, + IntegerLiteral, + RangeLiteral, + StringLiteral, + BLANK, + EMPTY, + TRUE, + FALSE, + NIL, +} from "../src/expression"; + +import { + TOKEN_BLANK, + TOKEN_EMPTY, + TOKEN_FALSE, + TOKEN_TRUE, + TOKEN_FLOAT, + TOKEN_INTEGER, + TOKEN_NIL, + TOKEN_RPAREN, + TOKEN_STRING, + TOKEN_IDENT, + TOKEN_IDENT_INDEX, + TOKEN_RBRACKET, +} from "../src/expressions/tokens"; + +import { Float, Integer } from "../src/number"; +import { Token } from "../src/token"; + +describe("parse common liquid expressions", () => { + test("parse literal integer", () => { + const expr = "42"; + const stream = new ExpressionTokenStream(tokenize(expr)); + const result = parse(stream); + expect(result).toStrictEqual(new IntegerLiteral(new Integer(42))); + expect(stream.current).toStrictEqual( + new Token(TOKEN_INTEGER, "42", 0, expr) + ); + }); + test("parse literal float", () => { + const expr = "42.2"; + const stream = new ExpressionTokenStream(tokenize(expr)); + const result = parse(stream); + expect(result).toStrictEqual(new FloatLiteral(new Float(42.2))); + expect(stream.current).toStrictEqual( + new Token(TOKEN_FLOAT, "42.2", 0, expr) + ); + }); + test("parse literal string", () => { + const expr = '"42.2"'; + const stream = new ExpressionTokenStream(tokenize(expr)); + const result = parse(stream); + expect(result).toStrictEqual(new StringLiteral("42.2")); + expect(stream.current).toStrictEqual( + new Token(TOKEN_STRING, "42.2", 0, expr) + ); + }); + test("parse literal range", () => { + const expr = "(1..5)"; + const stream = new ExpressionTokenStream(tokenize(expr)); + const result = parse(stream); + expect(result).toStrictEqual( + new RangeLiteral( + new IntegerLiteral(new Integer(1)), + new IntegerLiteral(new Integer(5)) + ) + ); + expect(stream.current).toStrictEqual(new Token(TOKEN_RPAREN, ")", 5, expr)); + }); + test("parse blank", () => { + const expr = "blank"; + const stream = new ExpressionTokenStream(tokenize(expr)); + const result = parse(stream); + expect(result).toStrictEqual(BLANK); + expect(stream.current).toStrictEqual( + new Token(TOKEN_BLANK, "blank", 0, expr) + ); + }); + test("parse empty", () => { + const expr = "empty"; + const stream = new ExpressionTokenStream(tokenize(expr)); + const result = parse(stream); + expect(result).toStrictEqual(EMPTY); + expect(stream.current).toStrictEqual( + new Token(TOKEN_EMPTY, "empty", 0, expr) + ); + }); + test("parse nil", () => { + const expr = "nil"; + const stream = new ExpressionTokenStream(tokenize(expr)); + const result = parse(stream); + expect(result).toStrictEqual(NIL); + expect(stream.current).toStrictEqual(new Token(TOKEN_NIL, "nil", 0, expr)); + }); + test("parse true", () => { + const expr = "true"; + const stream = new ExpressionTokenStream(tokenize(expr)); + const result = parse(stream); + expect(result).toStrictEqual(TRUE); + expect(stream.current).toStrictEqual( + new Token(TOKEN_TRUE, "true", 0, expr) + ); + }); + test("parse false", () => { + const expr = "false"; + const stream = new ExpressionTokenStream(tokenize(expr)); + const result = parse(stream); + expect(result).toStrictEqual(FALSE); + expect(stream.current).toStrictEqual( + new Token(TOKEN_FALSE, "false", 0, expr) + ); + }); + test("parse identifier", () => { + const expr = "a.b[1].c['foo']"; + const stream = new ExpressionTokenStream(tokenize(expr)); + const result = parse(stream); + expect(result).toStrictEqual( + new Identifier("a", [ + new IdentifierPathElement("b"), + new IdentifierPathElement(1), + new IdentifierPathElement("c"), + new IdentifierPathElement("foo"), + ]) + ); + expect(stream.current).toStrictEqual( + new Token(TOKEN_IDENT, "foo", 8, expr) + ); + }); + test("parse nested identifiers", () => { + const expr = "a.b[c.d[1]]"; + const stream = new ExpressionTokenStream(tokenize(expr)); + const result = parse(stream); + expect(result).toStrictEqual( + new Identifier("a", [ + new IdentifierPathElement("b"), + new Identifier("c", [ + new IdentifierPathElement("d"), + new IdentifierPathElement(1), + ]), + ]) + ); + expect(stream.current).toStrictEqual( + new Token(TOKEN_RBRACKET, "]", 10, expr) + ); + }); + test("parse identifier followed by a comma", () => { + const expr = "a.b[1],"; + const stream = new ExpressionTokenStream(tokenize(expr)); + const result = parse(stream); + expect(result).toStrictEqual( + new Identifier("a", [ + new IdentifierPathElement("b"), + new IdentifierPathElement(1), + ]) + ); + expect(stream.current).toStrictEqual( + new Token(TOKEN_IDENT_INDEX, "1", 3, expr) + ); + }); + test("parse up to comma", () => { + const expr = "a, 'Hello'"; + const stream = new ExpressionTokenStream(tokenize(expr)); + const result = parse(stream); + expect(result).toStrictEqual(new Identifier("a", [])); + expect(stream.current).toStrictEqual(new Token(TOKEN_IDENT, "a", 0, expr)); + }); +}); diff --git a/tests/tokenize_common_expression.test.ts b/tests/tokenize_common_expression.test.ts new file mode 100644 index 00000000..26fd6994 --- /dev/null +++ b/tests/tokenize_common_expression.test.ts @@ -0,0 +1,86 @@ +import { Token } from "../src/token"; +import { tokenize } from "../src/expressions/standard"; + +import { + TOKEN_COLON, + TOKEN_COMMA, + TOKEN_DOT, + TOKEN_FLOAT, + TOKEN_IDENT, + TOKEN_IDENT_INDEX, + TOKEN_INTEGER, + TOKEN_LBRACKET, + TOKEN_LPAREN, + TOKEN_PIPE, + TOKEN_RANGE, + TOKEN_RBRACKET, + TOKEN_RPAREN, + TOKEN_STRING, +} from "../src/expressions/tokens"; + +describe("tokenize filtered expressions", () => { + test("double quoted string literal", () => { + const tokens = Array.from(tokenize('"hello"')); + expect(tokens).toStrictEqual([ + new Token(TOKEN_STRING, "hello", 0, '"hello"'), + ]); + }); + test("single quoted string literal", () => { + const tokens = Array.from(tokenize("'hello'")); + expect(tokens).toStrictEqual([ + new Token(TOKEN_STRING, "hello", 0, "'hello'"), + ]); + }); + test("single quoted string representation of a float", () => { + const tokens = Array.from(tokenize("'42.2'")); + expect(tokens).toStrictEqual([ + new Token(TOKEN_STRING, "42.2", 0, "'42.2'"), + ]); + }); + test("integer literal", () => { + const tokens = Array.from(tokenize("42")); + expect(tokens).toStrictEqual([new Token(TOKEN_INTEGER, "42", 0, "42")]); + }); + test("negative integer literal", () => { + const tokens = Array.from(tokenize("-42")); + expect(tokens).toStrictEqual([new Token(TOKEN_INTEGER, "-42", 0, "-42")]); + }); + test("float literal", () => { + const tokens = Array.from(tokenize("1.34")); + expect(tokens).toStrictEqual([new Token(TOKEN_FLOAT, "1.34", 0, "1.34")]); + }); + test("negative float literal", () => { + const tokens = Array.from(tokenize("-1.34")); + expect(tokens).toStrictEqual([new Token(TOKEN_FLOAT, "-1.34", 0, "-1.34")]); + }); + test("range literal", () => { + const tokens = Array.from(tokenize("(1..5)")); + expect(tokens).toStrictEqual([ + new Token(TOKEN_LPAREN, "(", 0, "(1..5)"), + new Token(TOKEN_INTEGER, "1", 1, "(1..5)"), + new Token(TOKEN_RANGE, "..", 2, "(1..5)"), + new Token(TOKEN_INTEGER, "5", 4, "(1..5)"), + new Token(TOKEN_RPAREN, ")", 5, "(1..5)"), + ]); + }); + test("range literal with float start", () => { + const tokens = Array.from(tokenize("(2.4..5)")); + expect(tokens).toStrictEqual([ + new Token(TOKEN_LPAREN, "(", 0, "(2.4..5)"), + new Token(TOKEN_FLOAT, "2.4", 1, "(2.4..5)"), + new Token(TOKEN_RANGE, "..", 4, "(2.4..5)"), + new Token(TOKEN_INTEGER, "5", 6, "(2.4..5)"), + new Token(TOKEN_RPAREN, ")", 7, "(2.4..5)"), + ]); + }); + test("range literal with identifiers for start and stop", () => { + const tokens = Array.from(tokenize("(a..b)")); + expect(tokens).toStrictEqual([ + new Token(TOKEN_LPAREN, "(", 0, "(a..b)"), + new Token(TOKEN_IDENT, "a", 1, "(a..b)"), + new Token(TOKEN_RANGE, "..", 2, "(a..b)"), + new Token(TOKEN_IDENT, "b", 4, "(a..b)"), + new Token(TOKEN_RPAREN, ")", 5, "(a..b)"), + ]); + }); +}); diff --git a/tests/tokenize_filtered_expression.test.ts b/tests/tokenize_filtered_expression.test.ts index 5c171d31..4042d0ee 100644 --- a/tests/tokenize_filtered_expression.test.ts +++ b/tests/tokenize_filtered_expression.test.ts @@ -31,6 +31,12 @@ describe("tokenize filtered expressions", () => { new Token(TOKEN_STRING, "hello", 0, "'hello'"), ]); }); + test("single quoted string representation of a float", () => { + const tokens = Array.from(tokenize("'42.2'")); + expect(tokens).toStrictEqual([ + new Token(TOKEN_STRING, "42.2", 0, "'42.2'"), + ]); + }); test("integer literal", () => { const tokens = Array.from(tokenize("42")); expect(tokens).toStrictEqual([new Token(TOKEN_INTEGER, "42", 0, "42")]);