Skip to content

Commit

Permalink
Add ErrorStrategy and ErrorListener to catch and rewrite error messag…
Browse files Browse the repository at this point in the history
…es to be more helpful (#2136)

* Add an errorHandler and an error rewriter to the compiler phase

* Some refactoring

* Add another specific case and support negative token matching

* Fix tests
  • Loading branch information
mbooth4 authored Feb 19, 2025
1 parent ea1288c commit 30123a6
Show file tree
Hide file tree
Showing 8 changed files with 346 additions and 61 deletions.
22 changes: 6 additions & 16 deletions packages/malloy/src/lang/grammar/MalloyParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ sqlString
;

sqlInterpolation
: OPEN_CODE sqExpr (closeCurly | CLOSE_CODE)
: OPEN_CODE sqExpr (CCURLY | CLOSE_CODE)
;

importStatement
Expand All @@ -77,7 +77,7 @@ importStatement
importSelect
: OCURLY
importItem (COMMA importItem)*
closeCurly FROM
CCURLY FROM
;

importItem
Expand Down Expand Up @@ -126,7 +126,7 @@ connectionId
: id;

queryProperties
: OCURLY (queryStatement | SEMI)* closeCurly
: OCURLY (queryStatement | SEMI)* CCURLY
;

queryName : id;
Expand Down Expand Up @@ -156,7 +156,7 @@ parameterNameDef: id;
sourceNameDef: id;

exploreProperties
: OCURLY (exploreStatement | SEMI)* closeCurly
: OCURLY (exploreStatement | SEMI)* CCURLY
;

exploreStatement
Expand Down Expand Up @@ -260,7 +260,7 @@ sqExpr
;

includeBlock
: OCURLY (includeItem | SEMI)* closeCurly
: OCURLY (includeItem | SEMI)* CCURLY
;

includeItem
Expand Down Expand Up @@ -350,7 +350,7 @@ filterStatement
;

fieldProperties
: OCURLY (fieldPropertyStatement | SEMI)* closeCurly
: OCURLY (fieldPropertyStatement | SEMI)* CCURLY
;

aggregateOrdering
Expand Down Expand Up @@ -723,13 +723,3 @@ debugPartial: partialAllowedFieldExpr EOF;
experimentalStatementForTesting // this only exists to enable tests for the experimental compiler flag
: SEMI SEMI OBRACK string CBRACK
;

// Try to show a nice error for a missing }. Only use this when the next
// legal symbols after the curly are things which would be illegal inside
// the curly brackets.
closeCurly
: CCURLY
// ANTLR VSCode plugin loses it's tiny mind if { } aren't matched
// even in the error string below
| { this.notifyErrorListeners("'{' missing a '}'"); }
;
46 changes: 4 additions & 42 deletions packages/malloy/src/lang/parse-malloy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
*/

import {
ANTLRErrorListener,
CharStreams,
CommonTokenStream,
ParserRuleContext,
Expand Down Expand Up @@ -51,7 +50,6 @@ import {
LogMessage,
LogMessageOptions,
MessageCode,
MessageLogger,
MessageParameterType,
makeLogMessage,
} from './parse-log';
Expand Down Expand Up @@ -92,6 +90,8 @@ import {MalloyParseInfo} from './malloy-parse-info';
import {walkForModelAnnotation} from './parse-tree-walkers/model-annotation-walker';
import {walkForTablePath} from './parse-tree-walkers/find-table-path-walker';
import {EventStream} from '../runtime_types';
import {MalloyErrorStrategy} from './syntax-errors/malloy-error-strategy';
import {MalloyParserErrorListener} from './syntax-errors/malloy-parser-error-listener';

export type StepResponses =
| DataRequestResponse
Expand Down Expand Up @@ -244,8 +244,9 @@ class ParseStep implements TranslationStep {
const malloyParser = new MalloyParser(tokenStream);
malloyParser.removeErrorListeners();
malloyParser.addErrorListener(
new MalloyParserErrorHandler(that, that.root.logger)
new MalloyParserErrorListener(that, that.root.logger)
);
malloyParser.errorHandler = new MalloyErrorStrategy();

// Admitted code smell here, testing likes to parse from an arbitrary
// node and this is the simplest way to allow that.
Expand Down Expand Up @@ -1149,45 +1150,6 @@ export interface UpdateData extends URLData, SchemaData, SQLSources, ModelData {
}
export type ParseUpdate = Partial<UpdateData>;

export class MalloyParserErrorHandler implements ANTLRErrorListener<Token> {
constructor(
readonly translator: MalloyTranslation,
readonly messages: MessageLogger
) {}

logError<T extends MessageCode>(
code: T,
parameters: MessageParameterType<T>,
options?: Omit<LogMessageOptions, 'severity'>
): T {
this.messages.log(
makeLogMessage(code, parameters, {severity: 'error', ...options})
);
return code;
}

syntaxError(
recognizer: unknown,
offendingSymbol: Token | undefined,
line: number,
charPositionInLine: number,
message: string,
_e: unknown
): void {
const errAt = {line: line - 1, character: charPositionInLine};
const range = offendingSymbol
? this.translator.rangeFromToken(offendingSymbol)
: {start: errAt, end: errAt};
this.logError(
'syntax-error',
{message},
{
at: {url: this.translator.sourceURL, range},
}
);
}
}

function flattenDependencyTree(dependencies: DependencyTree) {
return [
...Object.keys(dependencies),
Expand Down
109 changes: 109 additions & 0 deletions packages/malloy/src/lang/syntax-errors/custom-error-messages.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

import {Parser, Token} from 'antlr4ts';

export interface ErrorCase {
// The rule contexts in which to apply this error case.
// If no context is provided, this error case will apply to all rules.
ruleContextOptions?: string[];
// This is the symbol that triggered the error. In general, prefer to use
// this over `offendingToken` when possible.
offendingSymbol?: number;
// The value of the current token when this error rewrite should occur.
// In general, prefer to use offendingSymbol
currentToken?: number;

// The tokens preceding the offending token, in the order they occur.
// Make the token negative to match all other tokens.
precedingTokenOptions?: number[][];

// If provided, at least one of the look ahead sequences would need to match.
// Make the token negative to match all other tokens.
lookAheadOptions?: number[][];

// The error message to show to the user, instead of whatever was default
// Supports tokenization: ${currentToken}
errorMessage: string;
}

export const checkCustomErrorMessage = (
parser: Parser,
offendingSymbol: Token | undefined,
errorCases: ErrorCase[]
): string => {
const currentRuleName = parser.getRuleInvocationStack()[0];
const currentToken = parser.currentToken;

for (const errorCase of errorCases) {
// Check to see if the initial conditions match
const isCurrentTokenMatch =
!errorCase.currentToken || currentToken.type === errorCase.currentToken;
const isOffendingSymbolMatch =
!errorCase.offendingSymbol ||
offendingSymbol?.type === errorCase.offendingSymbol;
const isRuleContextMatch =
!errorCase.ruleContextOptions ||
errorCase.ruleContextOptions.includes(currentRuleName);
if (isCurrentTokenMatch && isOffendingSymbolMatch && isRuleContextMatch) {
// If so, try to check the preceding tokens.
if (errorCase.precedingTokenOptions) {
const hasPrecedingTokenMatch = errorCase.precedingTokenOptions.some(
sequence => checkTokenSequenceMatch(parser, sequence, 'lookback')
);
if (!hasPrecedingTokenMatch) {
continue; // Continue to check a different error case
}
}
if (errorCase.lookAheadOptions) {
const hasLookaheadTokenMatch = errorCase.lookAheadOptions.some(
sequence => checkTokenSequenceMatch(parser, sequence, 'lookahead')
);
if (!hasLookaheadTokenMatch) {
continue; // Continue to check a different error case
}
}

// If all cases match, return the custom error message
const message = errorCase.errorMessage.replace(
'${currentToken}',
offendingSymbol?.text || currentToken.text || ''
);
return message;
}
}

return '';
};

const checkTokenSequenceMatch = (
parser: Parser,
sequence: number[],
direction: 'lookahead' | 'lookback'
): boolean => {
try {
for (let i = 0; i < sequence.length; i++) {
// Note: positive lookahead starts at '2' because '1' is the current token.
const tokenIndex = direction === 'lookahead' ? i + 2 : -1 * (i + 1);
const streamToken = parser.inputStream.LA(tokenIndex);

// Note: negative checking is < -1 becuase Token.EOF is -1, but below
// that we use negatives to indicate "does-not-match" rules.
if (sequence[i] >= -1 && streamToken !== sequence[i]) {
return false;
}

if (sequence[i] < -1 && streamToken === -1 * sequence[i]) {
return false;
}
}
return true;
} catch (ex) {
// There may not be enough lookback tokens. If so, the case doesn't match.
return false;
}
};
50 changes: 50 additions & 0 deletions packages/malloy/src/lang/syntax-errors/malloy-error-strategy.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

import {DefaultErrorStrategy, Parser} from 'antlr4ts';
import {MalloyParser} from '../lib/Malloy/MalloyParser';
import {checkCustomErrorMessage, ErrorCase} from './custom-error-messages';

const customErrorCases: ErrorCase[] = [
{
errorMessage: "Missing '{' after 'extend'",
currentToken: MalloyParser.EXTEND,
ruleContextOptions: ['sqExpr'],
lookAheadOptions: [[-MalloyParser.OCURLY]],
},
];

/**
* Custom error strategy for the Malloy Parser. This strategy attempts to
* detect known cases where the default error strategy results in an unhelpful
* parse tree or misleading messages. In any cases not explicitly handled, this
* custom error strategy will fall back to the default error strategy.
*
* For more details, read the documentation in DefaultErrorStrategy.d.ts
* or reference the superclass at:
* https://github.com/tunnelvisionlabs/antlr4ts/blob/master/src/DefaultErrorStrategy.ts
*/
export class MalloyErrorStrategy extends DefaultErrorStrategy {
override sync(parser: Parser) {
const interceptedErrorMessage = checkCustomErrorMessage(
parser as MalloyParser,
undefined,
customErrorCases
);
if (interceptedErrorMessage) {
parser.notifyErrorListeners(
interceptedErrorMessage,
parser.currentToken,
undefined
);

return;
}

super.sync(parser);
}
}
Loading

0 comments on commit 30123a6

Please sign in to comment.