Skip to content

Commit 0fd1e64

Browse files
authored
Switch to a stream parser infrastructure sharable with wiki process (#4)
1 parent eb8185b commit 0fd1e64

File tree

10 files changed

+272
-111
lines changed

10 files changed

+272
-111
lines changed

grammar/twelf.grammar

Lines changed: 0 additions & 24 deletions
This file was deleted.

package-lock.json

Lines changed: 2 additions & 25 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,19 @@
44
"description": "",
55
"main": "index.js",
66
"scripts": {
7-
"test": "echo \"Error: no test specified\" && exit 1",
8-
"build-parser": "lezer-generator grammar/twelf.grammar -o src/twelf-parser.ts"
7+
"test": "echo \"Error: no test specified\" && exit 1"
98
},
109
"keywords": [],
1110
"author": "",
1211
"license": "ISC",
1312
"dependencies": {
14-
"@lezer/lr": "^1.4.0",
1513
"@types/codemirror": "^5.60.15",
1614
"codemirror": "^6.0.1",
1715
"esbuild": "^0.20.1"
1816
},
1917
"devDependencies": {
20-
"@lezer/generator": "^1.6.0",
18+
"@codemirror/language": "^6.10.1",
19+
"@lezer/highlight": "^1.2.0",
2120
"typescript": "^5.3.3"
2221
}
2322
}

src/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { syntaxHighlighting } from '@codemirror/language';
22
import { Diagnostic, lintGutter, setDiagnostics } from '@codemirror/lint';
33
import { EditorView, basicSetup } from "codemirror";
44
import { decode, encode } from "./encoding";
5-
import { twelfHighlightStyle, twelf as twelfMode } from './twelf-mode';
5+
import { twelfHighlightStyle, twelfLanguage } from './twelf-mode';
66
import { Status, TwelfError } from './twelf-worker-types';
77
import { TwelfWorker, mkTwelfWorker } from './twelf-worker';
88

@@ -31,7 +31,7 @@ function initEditor(): EditorView {
3131
const editor = new EditorView({
3232
extensions: [basicSetup,
3333
syntaxHighlighting(twelfHighlightStyle),
34-
twelfMode(),
34+
twelfLanguage,
3535
lintGutter(),
3636
// These css tweaks came from the "See this example" in
3737
// https://discuss.codemirror.net/t/fill-a-div-with-the-editor/5248/2
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import { StreamParser } from "@codemirror/language";
2+
import {
3+
StreamParser as MiniStreamParser,
4+
StringStream,
5+
} from "./tokenizer-types";
6+
import { Tag } from "@lezer/highlight";
7+
8+
export function mkStreamParser<State, Tree>(
9+
parser: MiniStreamParser<State, Tree>,
10+
config: {
11+
name?: string;
12+
languageData?: any;
13+
tokenTable?: { [token: string]: Tag };
14+
} = {}
15+
): StreamParser<{ contents: State }> {
16+
let state = { contents: parser.startState };
17+
return {
18+
name: config.name,
19+
startState() {
20+
return state;
21+
},
22+
token(stream, state) {
23+
const adapter: StringStream = {
24+
eat(pattern) {
25+
const result = stream.match(pattern);
26+
if (!result) return null;
27+
if (result === true) return pattern as string;
28+
return result[0];
29+
},
30+
peek(pattern) {
31+
const fragment = stream.string.slice(stream.pos);
32+
if (typeof pattern === "string") {
33+
return fragment.startsWith(pattern) ? pattern : null;
34+
}
35+
return fragment.match(pattern)?.[0] ?? null;
36+
},
37+
sol: () => stream.sol(),
38+
eol: () => stream.eol(),
39+
matchedLocation: () => ({
40+
start: { line: 1, column: 1 },
41+
end: { line: 1, column: 2 },
42+
}),
43+
};
44+
45+
const response = parser.advance(adapter, state.contents);
46+
state.contents = response.state;
47+
return response.tag || null;
48+
},
49+
blankLine(state, indentUnit) {
50+
const adapter: StringStream = {
51+
eat: () => null,
52+
peek: () => null,
53+
sol: () => true,
54+
eol: () => true,
55+
matchedLocation: () => ({
56+
start: { line: 1, column: 1 },
57+
end: { line: 1, column: 2 },
58+
}),
59+
};
60+
61+
const response = parser.advance(adapter, state.contents);
62+
state.contents = response.state;
63+
},
64+
copyState: ({ contents }) => ({ contents }),
65+
indent: () => null,
66+
languageData: {},
67+
tokenTable: config.tokenTable,
68+
};
69+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
export interface SourcePosition {
2+
line: number; // >= 1
3+
column: number; // >= 1
4+
}
5+
6+
export interface SourceLocation {
7+
start: SourcePosition;
8+
end: SourcePosition;
9+
}
10+
11+
export interface StringStream {
12+
/** Matches a string or a regexp (which must start with ^ to only
13+
* match the start of the string) and advances the current position
14+
* if found. Returns a non-empty matched string, or null.
15+
*/
16+
eat(match: string | RegExp): string | null;
17+
18+
/** Same as eat(), but doesn't advance the current position. */
19+
peek(match: string | RegExp): string | null;
20+
21+
/** True if at the start of a line. */
22+
sol(): boolean;
23+
24+
/** True if at the end of a line. */
25+
eol(): boolean;
26+
27+
/** Returns the SourceLocation covered since the streamstring
28+
* was initialized (which, in the stream parser, always happens
29+
* immediately before advance() is called).
30+
*/
31+
matchedLocation(): SourceLocation;
32+
}
33+
34+
export type Tag = string;
35+
36+
export interface Issue {
37+
type: "Issue";
38+
msg: string;
39+
loc?: SourceLocation;
40+
}
41+
42+
export interface ParserResponse<State, Tree> {
43+
state: State;
44+
tag?: Tag;
45+
tree?: Tree;
46+
issues?: Issue[];
47+
}
48+
49+
export interface StreamParser<State, Tree> {
50+
startState: State;
51+
52+
/** Called to advance the stream state and the parser state. It is
53+
* okay to return a zero-length token, but only if the state
54+
* changes (otherwise this is definitely an infinite loop).
55+
*
56+
* Will be called exactly once on an empty line. Except in that
57+
* case, stream.eol() will never be true when this function is
58+
* initially called: if you advance to the end of the line, the
59+
* next call will be advance() on the subsequent line, or will be
60+
* handleEof().
61+
*/
62+
advance(stream: StringStream, state: State): ParserResponse<State, Tree>;
63+
64+
/** Once the end of the file is reached, this function is called
65+
* repeatedly until it returns null in order for any cleanup
66+
* needed to happen.
67+
*/
68+
handleEof(state: State): null | ParserResponse<State, Tree>;
69+
}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import type { ParserResponse, StreamParser } from "./tokenizer-types";
2+
3+
const IDCHARS = /^[_!&$^+/<=>?@~|#*`;,\-\\a-zA-Z0-9'\u{80}-\u{10FFFF}]+/u;
4+
5+
type State =
6+
| { type: "Toplevel" }
7+
| { type: "Eolcomment" }
8+
| { type: "Multilinecomment"; stack: number };
9+
10+
export const twelfTokenizer: StreamParser<State, null> = {
11+
startState: { type: "Toplevel" },
12+
13+
handleEof: () => null,
14+
15+
advance: (stream, state): ParserResponse<State, null> => {
16+
let tok: string | null;
17+
18+
if (stream.eol()) {
19+
// Ignore possible errors
20+
return { state };
21+
}
22+
23+
if (state.type === "Eolcomment") {
24+
if (stream.eat(/^.+/)) {
25+
return { state, tag: "comment" };
26+
}
27+
return { state };
28+
}
29+
30+
if (state.type === "Multilinecomment") {
31+
if (stream.eat("}")) {
32+
if (stream.eat("%")) {
33+
return {
34+
state:
35+
state.stack === 1
36+
? { type: "Toplevel" }
37+
: { type: "Multilinecomment", stack: state.stack - 1 },
38+
tag: "comment",
39+
};
40+
}
41+
}
42+
if (stream.eat("%")) {
43+
if (stream.eat("{")) {
44+
return { state: { ...state, stack: state.stack + 1 } };
45+
}
46+
}
47+
stream.eat(/^[^}%]*/);
48+
return { state, tag: "comment" };
49+
}
50+
51+
if (stream.eat(/^\s+/)) {
52+
return { state };
53+
}
54+
55+
if ((tok = stream.eat(/^[:.()\[\]{}]/))) {
56+
return {
57+
state,
58+
tag: "punctuation",
59+
};
60+
}
61+
62+
if (stream.eat("%")) {
63+
if (stream.eat(".")) {
64+
return { state: { type: "Eolcomment" }, tag: "comment" };
65+
}
66+
if (stream.eat("{")) {
67+
return {
68+
state: { type: "Multilinecomment", stack: 1 },
69+
tag: "comment",
70+
};
71+
}
72+
if (stream.eat(/^[\s%].*/) || stream.eol()) {
73+
return { state, tag: "comment" };
74+
}
75+
if (stream.eat(IDCHARS)) {
76+
return { state, tag: "keyword" };
77+
}
78+
stream.eat("/^./");
79+
return { state, tag: "invalid" };
80+
}
81+
82+
if (stream.eat('"')) {
83+
if (stream.eat(/^[^"]+"/)) {
84+
return { state, tag: "literal" };
85+
}
86+
stream.eat(/^.*/);
87+
return { state, tag: "invalid" };
88+
}
89+
90+
if ((tok = stream.eat(IDCHARS))) {
91+
switch (tok) {
92+
case "<-":
93+
return { state, tag: "punctuation" };
94+
case "->":
95+
return { state, tag: "punctuation" };
96+
case "_":
97+
return { state, tag: "punctuation" };
98+
case "=":
99+
return { state, tag: "punctuation" };
100+
case "type":
101+
return { state, tag: "keyword" };
102+
default:
103+
return {
104+
state,
105+
tag: tok.match(/^[A-Z_]/) ? "variableName" : "atom",
106+
};
107+
}
108+
}
109+
110+
stream.eat(/^./);
111+
return { state, tag: "invalid" };
112+
},
113+
};

0 commit comments

Comments
 (0)