jcreedcmu
diff --git a/‎grammar/twelf.grammar
Lines changed: 0 additions & 24 deletions b/‎grammar/twelf.grammar
Lines changed: 0 additions & 24 deletions
diff --git a/‎package-lock.json
Lines changed: 2 additions & 25 deletions b/‎package-lock.json
Lines changed: 2 additions & 25 deletions
diff --git a/‎package.json
Lines changed: 3 additions & 4 deletions b/‎package.json
Lines changed: 3 additions & 4 deletions
diff --git a/‎src/index.ts
Lines changed: 2 additions & 2 deletions b/‎src/index.ts
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/twelf-highlight/tokenizer-codemirror-adapter.ts
Lines changed: 69 additions & 0 deletions b/‎src/twelf-highlight/tokenizer-codemirror-adapter.ts
Lines changed: 69 additions & 0 deletions
diff --git a/‎src/twelf-highlight/tokenizer-types.ts
Lines changed: 69 additions & 0 deletions b/‎src/twelf-highlight/tokenizer-types.ts
Lines changed: 69 additions & 0 deletions
diff --git a/‎src/twelf-highlight/twelf-tokenizer.ts
Lines changed: 113 additions & 0 deletions b/‎src/twelf-highlight/twelf-tokenizer.ts
Lines changed: 113 additions & 0 deletions
@@ -4,20 +4,19 @@
   "description": "",
   "main": "index.js",
   "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1",
-    "build-parser": "lezer-generator grammar/twelf.grammar -o src/twelf-parser.ts"
+    "test": "echo \"Error: no test specified\" && exit 1"
   },
   "keywords": [],
   "author": "",
   "license": "ISC",
   "dependencies": {
-    "@lezer/lr": "^1.4.0",
     "@types/codemirror": "^5.60.15",
     "codemirror": "^6.0.1",
     "esbuild": "^0.20.1"
   },
   "devDependencies": {
-    "@lezer/generator": "^1.6.0",
+    "@codemirror/language": "^6.10.1",
+    "@lezer/highlight": "^1.2.0",
     "typescript": "^5.3.3"
   }
 }
@@ -2,7 +2,7 @@ import { syntaxHighlighting } from '@codemirror/language';
 import { Diagnostic, lintGutter, setDiagnostics } from '@codemirror/lint';
 import { EditorView, basicSetup } from "codemirror";
 import { decode, encode } from "./encoding";
-import { twelfHighlightStyle, twelf as twelfMode } from './twelf-mode';
+import { twelfHighlightStyle, twelfLanguage } from './twelf-mode';
 import { Status, TwelfError } from './twelf-worker-types';
 import { TwelfWorker, mkTwelfWorker } from './twelf-worker';
 
@@ -31,7 +31,7 @@ function initEditor(): EditorView {
   const editor = new EditorView({
     extensions: [basicSetup,
       syntaxHighlighting(twelfHighlightStyle),
-      twelfMode(),
+      twelfLanguage,
       lintGutter(),
       // These css tweaks came from the "See this example" in
       // https://discuss.codemirror.net/t/fill-a-div-with-the-editor/5248/2
 
@@ -0,0 +1,69 @@
+import { StreamParser } from "@codemirror/language";
+import {
+  StreamParser as MiniStreamParser,
+  StringStream,
+} from "./tokenizer-types";
+import { Tag } from "@lezer/highlight";
+
+export function mkStreamParser<State, Tree>(
+  parser: MiniStreamParser<State, Tree>,
+  config: {
+    name?: string;
+    languageData?: any;
+    tokenTable?: { [token: string]: Tag };
+  } = {}
+): StreamParser<{ contents: State }> {
+  let state = { contents: parser.startState };
+  return {
+    name: config.name,
+    startState() {
+      return state;
+    },
+    token(stream, state) {
+      const adapter: StringStream = {
+        eat(pattern) {
+          const result = stream.match(pattern);
+          if (!result) return null;
+          if (result === true) return pattern as string;
+          return result[0];
+        },
+        peek(pattern) {
+          const fragment = stream.string.slice(stream.pos);
+          if (typeof pattern === "string") {
+            return fragment.startsWith(pattern) ? pattern : null;
+          }
+          return fragment.match(pattern)?.[0] ?? null;
+        },
+        sol: () => stream.sol(),
+        eol: () => stream.eol(),
+        matchedLocation: () => ({
+          start: { line: 1, column: 1 },
+          end: { line: 1, column: 2 },
+        }),
+      };
+
+      const response = parser.advance(adapter, state.contents);
+      state.contents = response.state;
+      return response.tag || null;
+    },
+    blankLine(state, indentUnit) {
+      const adapter: StringStream = {
+        eat: () => null,
+        peek: () => null,
+        sol: () => true,
+        eol: () => true,
+        matchedLocation: () => ({
+          start: { line: 1, column: 1 },
+          end: { line: 1, column: 2 },
+        }),
+      };
+
+      const response = parser.advance(adapter, state.contents);
+      state.contents = response.state;
+    },
+    copyState: ({ contents }) => ({ contents }),
+    indent: () => null,
+    languageData: {},
+    tokenTable: config.tokenTable,
+  };
+}
@@ -0,0 +1,69 @@
+export interface SourcePosition {
+  line: number; // >= 1
+  column: number; // >= 1
+}
+
+export interface SourceLocation {
+  start: SourcePosition;
+  end: SourcePosition;
+}
+
+export interface StringStream {
+  /** Matches a string or a regexp (which must start with ^ to only
+   * match the start of the string) and advances the current position
+   * if found. Returns a non-empty matched string, or null.
+   */
+  eat(match: string | RegExp): string | null;
+
+  /** Same as eat(), but doesn't advance the current position. */
+  peek(match: string | RegExp): string | null;
+
+  /** True if at the start of a line. */
+  sol(): boolean;
+
+  /** True if at the end of a line. */
+  eol(): boolean;
+
+  /** Returns the SourceLocation covered since the streamstring
+   * was initialized (which, in the stream parser, always happens
+   * immediately before advance() is called).
+   */
+  matchedLocation(): SourceLocation;
+}
+
+export type Tag = string;
+
+export interface Issue {
+  type: "Issue";
+  msg: string;
+  loc?: SourceLocation;
+}
+
+export interface ParserResponse<State, Tree> {
+  state: State;
+  tag?: Tag;
+  tree?: Tree;
+  issues?: Issue[];
+}
+
+export interface StreamParser<State, Tree> {
+  startState: State;
+
+  /** Called to advance the stream state and the parser state. It is
+   * okay to return a zero-length token, but only if the state
+   * changes (otherwise this is definitely an infinite loop).
+   *
+   * Will be called exactly once on an empty line. Except in that
+   * case, stream.eol() will never be true when this function is
+   * initially called: if you advance to the end of the line, the
+   * next call will be advance() on the subsequent line, or will be
+   * handleEof().
+   */
+  advance(stream: StringStream, state: State): ParserResponse<State, Tree>;
+
+  /** Once the end of the file is reached, this function is called
+   * repeatedly until it returns null in order for any cleanup
+   * needed to happen.
+   */
+  handleEof(state: State): null | ParserResponse<State, Tree>;
+}
@@ -0,0 +1,113 @@
+import type { ParserResponse, StreamParser } from "./tokenizer-types";
+
+const IDCHARS = /^[_!&$^+/<=>?@~|#*`;,\-\\a-zA-Z0-9'\u{80}-\u{10FFFF}]+/u;
+
+type State =
+  | { type: "Toplevel" }
+  | { type: "Eolcomment" }
+  | { type: "Multilinecomment"; stack: number };
+
+export const twelfTokenizer: StreamParser<State, null> = {
+  startState: { type: "Toplevel" },
+
+  handleEof: () => null,
+
+  advance: (stream, state): ParserResponse<State, null> => {
+    let tok: string | null;
+
+    if (stream.eol()) {
+      // Ignore possible errors
+      return { state };
+    }
+
+    if (state.type === "Eolcomment") {
+      if (stream.eat(/^.+/)) {
+        return { state, tag: "comment" };
+      }
+      return { state };
+    }
+
+    if (state.type === "Multilinecomment") {
+      if (stream.eat("}")) {
+        if (stream.eat("%")) {
+          return {
+            state:
+              state.stack === 1
+                ? { type: "Toplevel" }
+                : { type: "Multilinecomment", stack: state.stack - 1 },
+            tag: "comment",
+          };
+        }
+      }
+      if (stream.eat("%")) {
+        if (stream.eat("{")) {
+          return { state: { ...state, stack: state.stack + 1 } };
+        }
+      }
+      stream.eat(/^[^}%]*/);
+      return { state, tag: "comment" };
+    }
+
+    if (stream.eat(/^\s+/)) {
+      return { state };
+    }
+
+    if ((tok = stream.eat(/^[:.()\[\]{}]/))) {
+      return {
+        state,
+        tag: "punctuation",
+      };
+    }
+
+    if (stream.eat("%")) {
+      if (stream.eat(".")) {
+        return { state: { type: "Eolcomment" }, tag: "comment" };
+      }
+      if (stream.eat("{")) {
+        return {
+          state: { type: "Multilinecomment", stack: 1 },
+          tag: "comment",
+        };
+      }
+      if (stream.eat(/^[\s%].*/) || stream.eol()) {
+        return { state, tag: "comment" };
+      }
+      if (stream.eat(IDCHARS)) {
+        return { state, tag: "keyword" };
+      }
+      stream.eat("/^./");
+      return { state, tag: "invalid" };
+    }
+
+    if (stream.eat('"')) {
+      if (stream.eat(/^[^"]+"/)) {
+        return { state, tag: "literal" };
+      }
+      stream.eat(/^.*/);
+      return { state, tag: "invalid" };
+    }
+
+    if ((tok = stream.eat(IDCHARS))) {
+      switch (tok) {
+        case "<-":
+          return { state, tag: "punctuation" };
+        case "->":
+          return { state, tag: "punctuation" };
+        case "_":
+          return { state, tag: "punctuation" };
+        case "=":
+          return { state, tag: "punctuation" };
+        case "type":
+          return { state, tag: "keyword" };
+        default:
+          return {
+            state,
+            tag: tok.match(/^[A-Z_]/) ? "variableName" : "atom",
+          };
+      }
+    }
+
+    stream.eat(/^./);
+    return { state, tag: "invalid" };
+  },
+};