From a19dfa1b25201a4b3acfcb7560641c84b5daa3b5 Mon Sep 17 00:00:00 2001
From: Michael Toy <66150587+mtoy-googly-moogly@users.noreply.github.com>
Date: Fri, 22 Nov 2024 20:09:13 -0800
Subject: [PATCH] inch a little with snowflake

---
 .../src/snowflake_connection.ts               | 165 ++++++----
 .../malloy-db-trino/src/trino_connection.ts   | 302 ++++++------------
 packages/malloy/src/dialect/index.ts          |   2 +
 .../malloy/src/dialect/snowflake/snowflake.ts |  12 +-
 packages/malloy/src/dialect/tiny_parser.ts    | 136 ++++++++
 packages/malloy/src/index.ts                  |   2 +
 6 files changed, 345 insertions(+), 274 deletions(-)
 create mode 100644 packages/malloy/src/dialect/tiny_parser.ts

diff --git a/packages/malloy-db-snowflake/src/snowflake_connection.ts b/packages/malloy-db-snowflake/src/snowflake_connection.ts
index 5dde6b40e..f62880b9e 100644
--- a/packages/malloy-db-snowflake/src/snowflake_connection.ts
+++ b/packages/malloy-db-snowflake/src/snowflake_connection.ts
@@ -37,7 +37,7 @@ import {
   SnowflakeDialect,
   TestableConnection,
   arrayEachFields,
-  LeafAtomicTypeDef,
+  TinyParser,
 } from '@malloydata/malloy';
 import {BaseConnection} from '@malloydata/malloy/connection';
 
@@ -63,19 +63,19 @@ export interface SnowflakeConnectionOptions {
 
 class StructMap {
   fieldMap = new Map<string, StructMap>();
-  type = 'record';
-  isArray = false;
 
-  constructor(type: string, isArray: boolean) {
-    this.type = type;
-    this.isArray = isArray;
-  }
+  constructor(public type: string) {}
 
-  addChild(name: string, type: string): StructMap {
-    const s = new StructMap(type, false);
+  setChild(name: string, type: string) {
+    // Really only need a fieldmap for array or object types, but whatever
+    const s = new StructMap(type);
     this.fieldMap.set(name, s);
     return s;
   }
+
+  getChild(name: string) {
+    return this.fieldMap.get(name);
+  }
 }
 
 export class SnowflakeConnection
@@ -179,21 +179,13 @@ export class SnowflakeConnection
     structDef: StructDef,
     structMap: StructMap
   ): void {
-    if (structMap.fieldMap.size === 0) return;
     for (const [field, value] of structMap.fieldMap) {
       const type = value.type;
       const name = field;
 
-      // check for an array
-      if (value.isArray && type !== 'object') {
-        // Apparently there can only be arrays of integers, strings, or unknowns?
-        // TODO is this true or is this just all that got implemented?
-        const malloyType: LeafAtomicTypeDef =
-          type === 'integer'
-            ? {type: 'number', numberType: 'integer'}
-            : type === 'varchar'
-            ? {type: 'string'}
-            : {type: 'sql native', rawType: type};
+      const inArray = structMap.type === 'array';
+      if (inArray && type !== 'object') {
+        const malloyType = this.dialect.sqlTypeToMalloyType(type);
         const innerStructDef: StructDef = {
           type: 'array',
           name,
@@ -205,7 +197,7 @@ export class SnowflakeConnection
         structDef.fields.push(innerStructDef);
       } else if (type === 'object') {
         const structParts = {name, dialect: this.dialectName, fields: []};
-        const innerStructDef: StructDef = value.isArray
+        const innerStructDef: StructDef = inArray
           ? {
               ...structParts,
               type: 'array',
@@ -218,7 +210,7 @@ export class SnowflakeConnection
               join: 'one',
             };
         this.addFieldsToStructDef(innerStructDef, value);
-        structDef.fields.push(innerStructDef);
+        structDef.fields.push({...innerStructDef, name});
       } else {
         const malloyType = this.dialect.sqlTypeToMalloyType(type);
         structDef.fields.push({...malloyType, name});
@@ -236,29 +228,20 @@ export class SnowflakeConnection
     const notVariant = new Map<string, boolean>();
     for (const row of rows) {
       // data types look like `VARCHAR(1234)`
-      let snowflakeDataType = row['type'] as string;
-      snowflakeDataType = snowflakeDataType.toLocaleLowerCase().split('(')[0];
-      const s = structDef;
-      const malloyType = this.dialect.sqlTypeToMalloyType(snowflakeDataType);
+      const snowflakeDataType = (row['type'] as string)
+        .toLocaleLowerCase()
+        .split('(')[0];
       const name = row['name'] as string;
 
-      if (snowflakeDataType === 'variant' || snowflakeDataType === 'array') {
+      if (['variant', 'array', 'object'].includes(snowflakeDataType)) {
         variants.push(name);
-        continue;
-      }
-
-      notVariant.set(name, true);
-      if (malloyType) {
-        s.fields.push({...malloyType, name});
       } else {
-        s.fields.push({
-          type: 'sql native',
-          rawType: snowflakeDataType,
-          name,
-        });
+        notVariant.set(name, true);
+        const malloyType = this.dialect.sqlTypeToMalloyType(snowflakeDataType);
+        structDef.fields.push({...malloyType, name});
       }
     }
-    // if we have variants, sample the data
+    // For these things, we need to sample the data to know the schema
     if (variants.length > 0) {
       const sampleQuery = `
         SELECT regexp_replace(PATH, '\\\\[[0-9]*\\\\]', '') as PATH, lower(TYPEOF(value)) as type
@@ -271,32 +254,33 @@ export class SnowflakeConnection
 
       // take the schema in list form an convert it into a tree.
 
-      const structMap = new StructMap('object', true);
+      const structMap = new StructMap('object');
 
       for (const f of fieldPathRows) {
         const pathString = f['PATH']?.valueOf().toString();
         const fieldType = f['TYPE']?.valueOf().toString();
         if (pathString === undefined || fieldType === undefined) continue;
-        const path = pathString.split('.');
-        let parent = structMap;
-
+        const pathParser = new PathParser(pathString);
+        const zPath = pathParser.pathChain();
         // ignore the fields we've already added.
-        if (path.length === 1 && notVariant.get(pathString)) continue;
-
-        let index = 0;
-        for (const segment of path) {
-          let thisNode = parent.fieldMap.get(segment);
-          if (thisNode === undefined) {
-            thisNode = parent.addChild(segment, fieldType);
-          }
-          if (fieldType === 'array') {
-            thisNode.isArray = true;
-            // if this is the last
-          } else if (index === path.length - 1) {
-            thisNode.type = fieldType;
+        if (zPath.next === undefined && notVariant.get(zPath.name)) continue;
+
+        for (
+          let segment: PathChain | undefined = zPath, parent = structMap;
+          segment;
+          segment = segment.next
+        ) {
+          if (segment.next === undefined) {
+            // if this is the last element in the path, that is where the type goes
+            parent.setChild(segment.name, fieldType);
+          } else {
+            // just walking the tree to part the knows
+            const nxtP = parent.getChild(segment.name);
+            if (!nxtP) {
+              throw new Error('paarse pickle spfndkjlfsd');
+            }
+            parent = nxtP;
           }
-          parent = thisNode;
-          index += 1;
         }
       }
       this.addFieldsToStructDef(structDef, structMap);
@@ -338,3 +322,68 @@ export class SnowflakeConnection
     return tableName;
   }
 }
+
+/**
+ * Instead of an array of names, we return a path as a linked list.
+ */
+interface PathChain {
+  name: string;
+  next?: PathChain;
+}
+
+export class PathParser extends TinyParser {
+  constructor(pathName: string) {
+    super(pathName, {
+      quoted: /^'(\\'|[^'])*'/,
+      char: /^[[.\]]/,
+      number: /^\d+/,
+      word: /^\w+/,
+    });
+  }
+
+  getName() {
+    const nameStart = this.next();
+    if (nameStart.type === 'word') {
+      return nameStart.text;
+    }
+    if (nameStart.type === '[') {
+      const quotedName = this.next('quoted');
+      this.next(']');
+      return quotedName.text;
+    }
+    throw this.parseError('Expected column name');
+  }
+
+  getSubscript(node: PathChain): PathChain {
+    const index = this.next();
+    if (index.type === 'number') {
+      node.next = {name: index.text};
+      return node.next;
+    } else if (index.type === 'quoted') {
+      node.next = {name: index.text};
+      return node.next;
+    } else {
+      throw this.parseError(`Unexpected ${index.type}`);
+    }
+  }
+
+  pathChain(): PathChain {
+    const chain: PathChain = {name: this.getName()};
+    let node = chain;
+    for (;;) {
+      const sep = this.next();
+      if (sep.type === 'eof') {
+        return chain;
+      }
+      if (sep.type === '.') {
+        node.next = {name: this.next('word').text};
+        node = node.next;
+      } else if (sep.type === '[') {
+        node = this.getSubscript(node);
+        this.next(']');
+      } else {
+        throw this.parseError(`Unexpected ${sep.type}`);
+      }
+    }
+  }
+}
diff --git a/packages/malloy-db-trino/src/trino_connection.ts b/packages/malloy-db-trino/src/trino_connection.ts
index 68da4e1dc..77e9c49ab 100644
--- a/packages/malloy-db-trino/src/trino_connection.ts
+++ b/packages/malloy-db-trino/src/trino_connection.ts
@@ -45,6 +45,7 @@ import {
   Dialect,
   ArrayTypeDef,
   FieldDef,
+  TinyParser,
 } from '@malloydata/malloy';
 
 import {BaseConnection} from '@malloydata/malloy/connection';
@@ -580,68 +581,8 @@ export class PrestoConnection extends TrinoPrestoConnection {
       );
     }
 
-    /*
-     * Here's a hand built parser for schema lines, roughly this grammar
-     * SCHEMA_LINE: PrestoExplainParser => [TYPE_LIST]
-     * NAME_LIST: NAME (, NAME)*
-     * TYPE_LIST: TYPE_SPEC (, TYPE_SPEC)*
-     * TYPE_SPEC: exprN ':' TYPE
-     * TYPE: REC_TYPE | ARRAY_TYPE | SQL_TYPE
-     * ARRAY_TYPE: ARRAY '(' TYPE ')'
-     * REC_TYPE: REC '(' "name" TYPE (, "name" TYPE)* ')'
-     */
     const schemaDesc = new PrestoExplainParser(lines[0], this.dialect);
-    if (schemaDesc.containsNo(']') || schemaDesc.missingExpected('[')) {
-      throw schemaDesc.parseError(
-        "Expected something like '- Output [PlanName N] [NAME_LIST]'"
-      );
-    }
-    const fieldNames: string[] = [];
-    for (;;) {
-      const nmToken = schemaDesc.next();
-      if (nmToken.type !== 'id') {
-        throw schemaDesc.parseError('Expected name of field');
-      }
-      fieldNames.push(nmToken.text);
-      const sep = schemaDesc.next();
-      if (sep.type === ',') {
-        continue;
-      }
-      if (sep.type !== ']') {
-        throw schemaDesc.parseError(
-          `Unexpected '${sep.text}' while getting name list`
-        );
-      }
-      break;
-    }
-    if (schemaDesc.missingExpected('arrow', '[')) {
-      throw schemaDesc.parseError("Expected '=> [' to begin type definition");
-    }
-    for (let nameIndex = 0; ; nameIndex += 1) {
-      const name = fieldNames[nameIndex];
-      if (schemaDesc.missingExpected('id', ':')) {
-        throw schemaDesc.parseError(
-          "Expected 'exprN:' before each type in schema"
-        );
-      }
-      const nextType = schemaDesc.typeDef();
-      structDef.fields.push({...nextType, name});
-      const sep = schemaDesc.next();
-      if (sep.text === ',') {
-        continue;
-      }
-      if (sep.text !== ']') {
-        throw schemaDesc.parseError(
-          `Unexpected '${sep.text}' between field types`
-        );
-      }
-      break;
-    }
-    if (structDef.fields.length !== fieldNames.length) {
-      throw new Error(
-        `presto schema error mismatched ${structDef.fields.length} types and ${fieldNames.length} fields`
-      );
-    }
+    structDef.fields = schemaDesc.parseExplain();
   }
 
   unpackArray(data: unknown): unknown[] {
@@ -681,77 +622,76 @@ export class TrinoConnection extends TrinoPrestoConnection {
   }
 }
 
-interface Token {
-  type: string;
-  text: string;
-}
-
-class PrestoExplainParser {
-  tokens: Generator<Token>;
-  parseCursor = 0;
-  peeked?: Token;
+/**
+ * A hand built parser for schema lines, roughly this grammar
+ * SCHEMA_LINE: - Output [PlanName N] [NAME_LIST] => [TYPE_LIST]
+ * NAME_LIST: NAME (, NAME)*
+ * TYPE_LIST: TYPE_SPEC (, TYPE_SPEC)*
+ * TYPE_SPEC: exprN ':' TYPE
+ * TYPE: REC_TYPE | ARRAY_TYPE | SQL_TYPE
+ * ARRAY_TYPE: ARRAY '(' TYPE ')'
+ * REC_TYPE: REC '(' "name" TYPE (, "name" TYPE)* ')'
+ */
+class PrestoExplainParser extends TinyParser {
   constructor(
     readonly input: string,
     readonly dialect: Dialect
   ) {
-    this.tokens = this.tokenize(input);
-  }
-
-  parseError(str: string) {
-    const errText =
-      `INTERAL ERROR parsing presto schema: ${str}\n` +
-      `${this.input}\n` +
-      `${' '.repeat(this.parseCursor)}^`;
-    return new Error(errText);
-  }
-
-  peek(): Token {
-    if (this.peeked) {
-      return this.peeked;
-    } else {
-      const {value} = this.tokens.next();
-      const peekVal = value ?? {type: 'eof', text: ''};
-      this.peeked = peekVal;
-      return peekVal;
-    }
-  }
-
-  next(): Token {
-    let next = this.peeked;
-    if (next) {
-      this.peeked = undefined;
-      return next;
-    } else {
-      next = this.peek();
-      this.peeked = undefined;
-      return next;
-    }
+    super(input, {
+      space: /^\s+/,
+      arrow: /^=>/,
+      char: /^[,:[\]()-]/,
+      id: /^\w+/,
+      // mtoy todo check what happens if a name has quotes in it
+      quoted_name: /^"\w+"/,
+    });
   }
 
-  hasExpected(...type: string[]) {
-    for (const t of type) {
-      const next = this.next();
-      if (next.type !== t) {
-        return false;
+  fieldNameList(): string[] {
+    this.skipTo(']'); // Skip to end of plan
+    this.next('['); // Expect start of name list
+    const fieldNames: string[] = [];
+    for (;;) {
+      const nmToken = this.next('id');
+      fieldNames.push(nmToken.text);
+      const sep = this.next();
+      if (sep.type === ',') {
+        continue;
+      }
+      if (sep.type !== ']') {
+        throw this.parseError(
+          `Unexpected '${sep.text}' while getting field name list`
+        );
       }
+      break;
     }
-    return true;
+    return fieldNames;
   }
 
-  containsNo(type: string) {
-    for (;;) {
-      const next = this.next();
-      if (next.type === 'eof') {
-        return true;
+  parseExplain(): FieldDef[] {
+    const fieldNames = this.fieldNameList();
+    const fields: FieldDef[] = [];
+    this.next('arrow', '[');
+    for (let nameIndex = 0; ; nameIndex += 1) {
+      const name = fieldNames[nameIndex];
+      this.next('id', ':');
+      const nextType = this.typeDef();
+      fields.push({...nextType, name});
+      const sep = this.next();
+      if (sep.text === ',') {
+        continue;
       }
-      if (next.type === type) {
-        return false;
+      if (sep.text !== ']') {
+        throw this.parseError(`Unexpected '${sep.text}' between field types`);
       }
+      break;
     }
-  }
-
-  missingExpected(...type: string[]) {
-    return !this.hasExpected(...type);
+    if (fields.length !== fieldNames.length) {
+      throw new Error(
+        `Presto schema error mismatched ${fields.length} types and ${fieldNames.length} fields`
+      );
+    }
+    return fields;
   }
 
   typeDef(): AtomicTypeDef {
@@ -760,58 +700,42 @@ class PrestoExplainParser {
       throw this.parseError(
         'Unexpected EOF parsing type, expected a type name'
       );
-    } else if (typToken.text === 'row') {
-      if (this.hasExpected('(')) {
-        const fields: FieldDef[] = [];
-        for (;;) {
-          const name = this.next();
-          if (name.type !== 'name') {
-            throw this.parseError('Expected quoted "name" for record property');
-          }
-          const getDef = this.typeDef();
-          fields.push({...getDef, name: name.text});
-          const sep = this.next();
-          if (sep.text === ')') {
-            break;
-          }
-          if (sep.text === ',') {
-            continue;
-          }
-          throw this.parseError(
-            `Unexpected '${sep.text}' while parsing record type`
-          );
+    } else if (typToken.text === 'row' && this.next('(')) {
+      const fields: FieldDef[] = [];
+      for (;;) {
+        const name = this.next('quoted_name');
+        const getDef = this.typeDef();
+        fields.push({...getDef, name: name.text});
+        const sep = this.next();
+        if (sep.text === ')') {
+          break;
         }
-        const def: RecordTypeDef = {
-          type: 'record',
-          name: '',
-          join: 'one',
-          dialect: this.dialect.name,
-          fields,
-        };
-        return def;
-      } else {
-        throw new Error('Expected rec followed by(');
-      }
-    } else if (typToken.text === 'array') {
-      if (this.hasExpected('(')) {
-        const elType = this.typeDef();
-        if (this.missingExpected(')')) {
-          throw this.parseError("Expected ')' at end of array type");
+        if (sep.text === ',') {
+          continue;
         }
-        const def: ArrayTypeDef = {
-          type: 'array',
-          name: '',
-          dialect: this.dialect.name,
-          join: 'many',
-          elementTypeDef:
-            elType.type === 'record' ? {type: 'record_element'} : elType,
-          fields:
-            elType.type === 'record' ? elType.fields : arrayEachFields(elType),
-        };
-        return def;
-      } else {
-        throw this.parseError('Expected array followed by (');
       }
+      const def: RecordTypeDef = {
+        type: 'record',
+        name: '',
+        join: 'one',
+        dialect: this.dialect.name,
+        fields,
+      };
+      return def;
+    } else if (typToken.text === 'array' && this.next('(')) {
+      const elType = this.typeDef();
+      this.next(')');
+      const def: ArrayTypeDef = {
+        type: 'array',
+        name: '',
+        dialect: this.dialect.name,
+        join: 'many',
+        elementTypeDef:
+          elType.type === 'record' ? {type: 'record_element'} : elType,
+        fields:
+          elType.type === 'record' ? elType.fields : arrayEachFields(elType),
+      };
+      return def;
     } else if (typToken.type === 'id') {
       const sqlType = typToken.text;
       const def = this.dialect.sqlTypeToMalloyType(sqlType);
@@ -820,9 +744,7 @@ class PrestoExplainParser {
       }
       if (sqlType === 'varchar') {
         if (this.peek().type === '(') {
-          if (this.missingExpected('(', 'id', ')')) {
-            throw this.parseError('Error parsing varchar()');
-          }
+          this.next('(', 'id', ')');
         }
       }
       return def;
@@ -831,42 +753,4 @@ class PrestoExplainParser {
       `'${typToken.text}' unexpected while looking for a type`
     );
   }
-
-  private *tokenize(src: string): Generator<Token> {
-    const tokenRegex = {
-      space: /^\s+/,
-      arrow: /^=>/,
-      char: /^[,:[\]()-]/,
-      id: /^\w+/,
-      name: /^"\w+"/,
-    };
-    for (;;) {
-      let notFound = true;
-      for (const tokenType in tokenRegex) {
-        const foundToken = src.match(tokenRegex[tokenType]);
-        if (foundToken) {
-          let tokenText = foundToken[0];
-          src = src.slice(tokenText.length);
-          this.parseCursor = this.input.length - src.length;
-          if (tokenType !== 'space') {
-            if (tokenType === 'name') {
-              tokenText = tokenText.slice(1, -1); // strip quotes
-            }
-            yield {
-              type: tokenType === 'char' ? tokenText : tokenType,
-              text: tokenText,
-            };
-            notFound = false;
-          }
-        }
-      }
-      if (notFound) {
-        yield {type: 'unexpected token', text: src};
-        return;
-      }
-      if (src === '') {
-        return;
-      }
-    }
-  }
 }
diff --git a/packages/malloy/src/dialect/index.ts b/packages/malloy/src/dialect/index.ts
index 140716594..15c70a24f 100644
--- a/packages/malloy/src/dialect/index.ts
+++ b/packages/malloy/src/dialect/index.ts
@@ -52,3 +52,5 @@ export {MySQLDialect} from './mysql';
 export {getDialect, registerDialect} from './dialect_map';
 export {getMalloyStandardFunctions} from './functions';
 export type {MalloyStandardFunctionImplementations} from './functions';
+export type {TinyToken} from './tiny_parser';
+export {TinyParser} from './tiny_parser';
diff --git a/packages/malloy/src/dialect/snowflake/snowflake.ts b/packages/malloy/src/dialect/snowflake/snowflake.ts
index f44909f17..92ead8cfa 100644
--- a/packages/malloy/src/dialect/snowflake/snowflake.ts
+++ b/packages/malloy/src/dialect/snowflake/snowflake.ts
@@ -521,14 +521,12 @@ ${indent(sql)}
   sqlLiteralRecord(lit: RecordLiteralNode): string {
     const rowVals: string[] = [];
     for (const f of lit.typeDef.fields) {
-      if (isAtomic(f)) {
-        const name = f.as ?? f.name;
-        const propName = `'${name}'`;
-        const propVal = lit.kids[name].sql ?? 'internal-error-record-literal';
-        rowVals.push(`${propName}:${propVal}`);
-      }
+      const name = f.as ?? f.name;
+      const propName = `'${name}'`;
+      const propVal = lit.kids[name].sql ?? 'internal-error-record-literal';
+      rowVals.push(`${propName}:${propVal}`);
     }
-    return `{${rowVals.join(',')}}::${this.malloyTypeToSQLType(lit.typeDef)}`;
+    return `{${rowVals.join(',')}}`;
   }
 
   sqlLiteralArray(lit: ArrayLiteralNode): string {
diff --git a/packages/malloy/src/dialect/tiny_parser.ts b/packages/malloy/src/dialect/tiny_parser.ts
new file mode 100644
index 000000000..01765a9a7
--- /dev/null
+++ b/packages/malloy/src/dialect/tiny_parser.ts
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+export interface TinyToken {
+  type: string;
+  text: string;
+}
+
+/**
+ * Simple framework for writing schema parsers. The parsers using this felt
+ * better than the more ad-hoc code they replaced, and are smaller than
+ * using a parser generator.
+ *
+ * NOTE: All parse errors are exceptions.
+ */
+export class TinyParser {
+  private tokens: Generator<TinyToken>;
+  private parseCursor = 0;
+  private lookAhead?: TinyToken;
+  private tokenMap: Record<string, RegExp>;
+
+  /**
+   * The token map is tested in order. Return TinyToken
+   * is {type: tokenMapKey, text: matchingText }, except
+   * for the special tokenMapKeys:
+   * * space: skipped and never returned
+   * * char: matched string return in both .type and .text
+   * * q*: any token name starting with 'q' is assumed to be
+   *   a quoted string and the text will have the first and
+   *   last characters stripped
+   */
+  constructor(
+    readonly input: string,
+    tokenMap?: Record<string, RegExp>
+  ) {
+    this.tokens = this.tokenize(input);
+    this.tokenMap = tokenMap ?? {
+      space: /^\s+/,
+      char: /^[,:[\]()-]/,
+      id: /^\w+/,
+      qstr: /^"\w+"/,
+    };
+  }
+
+  parseError(str: string) {
+    const errText =
+      `INTERNAL ERROR parsing schema: ${str}\n` +
+      `${this.input}\n` +
+      `${' '.repeat(this.parseCursor)}^`;
+    return new Error(errText);
+  }
+
+  peek(): TinyToken {
+    if (this.lookAhead) {
+      return this.lookAhead;
+    } else {
+      const {value} = this.tokens.next();
+      const peekVal = value ?? {type: 'eof', text: ''};
+      this.lookAhead = peekVal;
+      return peekVal;
+    }
+  }
+
+  private getNext(): TinyToken {
+    const next = this.lookAhead ?? this.peek();
+    this.lookAhead = undefined;
+    return next;
+  }
+
+  /**
+   * Return next token, if any token types are passed, read and require those
+   * tokens, then return the last one.
+   * @param types list of token types
+   * @returns The last token read
+   */
+  next(...types: string[]): TinyToken {
+    if (types.length === 0) return this.getNext();
+    let next: TinyToken | undefined = undefined;
+    let expected = types[0];
+    for (const typ of types) {
+      next = this.getNext();
+      expected = typ;
+      if (next.type !== typ) {
+        next = undefined;
+        break;
+      }
+    }
+    if (next) return next;
+    throw this.parseError(`Expected ${expected}`);
+  }
+
+  skipTo(type: string) {
+    for (;;) {
+      const next = this.next();
+      if (next.type === 'eof') {
+        throw this.parseError(`Expected token '${type}`);
+      }
+      if (next.type === type) {
+        return;
+      }
+    }
+  }
+
+  private *tokenize(src: string): Generator<TinyToken> {
+    const tokenList = this.tokenMap;
+    while (this.parseCursor < src.length) {
+      let notFound = true;
+      for (const tokenType in tokenList) {
+        const srcAtCursor = src.slice(this.parseCursor);
+        const foundToken = srcAtCursor.match(tokenList[tokenType]);
+        if (foundToken) {
+          notFound = false;
+          let tokenText = foundToken[0];
+          this.parseCursor += tokenText.length;
+          if (tokenType !== 'space') {
+            if (tokenType[0] === 'q') {
+              tokenText = tokenText.slice(1, -1); // strip quotes
+            }
+            yield {
+              type: tokenType === 'char' ? tokenText : tokenType,
+              text: tokenText,
+            };
+          }
+        }
+      }
+      if (notFound) {
+        yield {type: 'unexpected token', text: src};
+        return;
+      }
+    }
+  }
+}
diff --git a/packages/malloy/src/index.ts b/packages/malloy/src/index.ts
index c123c28c6..10715428c 100644
--- a/packages/malloy/src/index.ts
+++ b/packages/malloy/src/index.ts
@@ -42,6 +42,7 @@ export {
   literal,
   spread,
   Dialect,
+  TinyParser,
 } from './dialect';
 export type {
   DialectFieldList,
@@ -51,6 +52,7 @@ export type {
   DefinitionBlueprint,
   DefinitionBlueprintMap,
   OverloadedDefinitionBlueprint,
+  TinyToken,
 } from './dialect';
 // TODO tighten up exports
 export type {