From bb5c363e3273b85f678b535439d3d32346518c82 Mon Sep 17 00:00:00 2001 From: Leif Shackelford Date: Wed, 13 Mar 2019 21:00:29 -0700 Subject: [PATCH 01/12] wip --- interfaces/interpolate.d.ts | 15 +- interfaces/moss.d.ts | 2 + interfaces/schema.d.ts | 18 + src/async.ts | 34 +- src/interpolate/async.ts | 241 ++++++----- src/interpolate/index.ts | 6 +- src/interpolate/shared.ts | 17 +- src/interpolate/sync.ts | 243 ++++++----- src/{interpolate => parsers}/expression.pegjs | 0 src/parsers/moss.ne | 403 ++++++++++++++++++ src/schema.ts | 59 +++ src/sync.ts | 61 ++- 12 files changed, 828 insertions(+), 271 deletions(-) create mode 100644 interfaces/schema.d.ts rename src/{interpolate => parsers}/expression.pegjs (100%) create mode 100644 src/parsers/moss.ne create mode 100644 src/schema.ts diff --git a/interfaces/interpolate.d.ts b/interfaces/interpolate.d.ts index 22ae5fb..771a399 100644 --- a/interfaces/interpolate.d.ts +++ b/interfaces/interpolate.d.ts @@ -2,27 +2,26 @@ declare namespace Expand { type Key = '$' | '=' | '^'; type Open = '{' | '('; type Terminal = '}' | ')' | ' ' | '__null__'; - type Op = 'v' | 's' | 'e' | 'f'; + type Op = 'v' | 's' | 'e' | 'f' | 'n'; interface State { detecting?: Key header?: Key op?: Op - terminal?: Terminal; + terminal?: Terminal dirty?: boolean escape?: boolean - sourceMap: number[] + escaped?: string + sourceMap?: number[] } - interface Elem { - state: State - raw: any[] + interface Elem extends State { + out: any[] source: any[] - subst: any[] } export interface Options { - dereferenceSync?: (sub: string, sourceMap?: number[]) => any + dereferenceSync?: (sub: string, sourceMap?: number[]) => any dereference?: (sub: string, sourceMap?: number[]) => any call?: (sub: any, sourceMap?: number[]) => any fetch?: (sub: any, sourceMap?: number[]) => any diff --git a/interfaces/moss.d.ts b/interfaces/moss.d.ts index fe228a8..04e8dd8 100644 --- a/interfaces/moss.d.ts +++ b/interfaces/moss.d.ts @@ -1,4 +1,5 @@ /// +/// interface MossError { name: 'MossError', @@ -20,6 +21,7 @@ declare namespace Moss { auto?: any stack?: any selectors?: any + schema?: any merge?: { operator: Merge.Operator, precedence: { [x: string]: number } diff --git a/interfaces/schema.d.ts b/interfaces/schema.d.ts new file mode 100644 index 0000000..d8abbfb --- /dev/null +++ b/interfaces/schema.d.ts @@ -0,0 +1,18 @@ +declare namespace Moss { + namespace Schema { + // interface Options { + // scalarType?: string + // singleType?: Schema.Options + // multiType?: { [x: string]: Schema.Options } | Schema.Options[] + // isArray?: boolean + // isMap?: boolean + // } + interface Options { + type: string, + properties?: {[x: string]: Options}, + items?: Options[], + $id?: string + } + type Description = Options | [Options] | string + } +} \ No newline at end of file diff --git a/src/async.ts b/src/async.ts index 0dfe54f..a69f0a9 100644 --- a/src/async.ts +++ b/src/async.ts @@ -1,7 +1,7 @@ /// -import { merge, mergeArray, mergeObject, amap as map, aokmap as okmap, arrayify, extend, check, clone, each, union, difference, sum, valueForKeyPath, all, isEqual, unflatten, flatObject, unsetKeyPath, setValueForKeyPath, mergeOrReturnAssignment } from 'typed-json-transform'; -import { interpolateAsync as __interpolate } from './interpolate'; +import { merge, mergeArray, mergeObject, okmap as okmapSync, amap as map, aokmap as okmap, arrayify, extend, check, clone, each, union, difference, sum, valueForKeyPath, all, isEqual, unflatten, flatObject, unsetKeyPath, setValueForKeyPath, mergeOrReturnAssignment, contains } from 'typed-json-transform'; +import { interpolateAsync as __interpolate, reservedKeys } from './interpolate'; import { cascadeAsync as _cascade, shouldConstruct, select, parseSelectors } from './cascade'; import * as yaml from 'js-yaml'; @@ -17,6 +17,7 @@ import { import { handleError } from './util'; import { Sync } from './sync'; +import { parseDescription } from './schema'; export namespace Async { type Functions = Moss.Async.Functions; @@ -35,7 +36,9 @@ export namespace Async { const target = state.target || current.data; let res; - for (const _key of Object.keys(source)) { + const keys = Object.keys(source); + console.log('evaluate keys', keys); + for (const _key of keys) { if (!_key) { continue; } @@ -70,14 +73,11 @@ export namespace Async { } else { let val = source[_key]; if (_key[0] === '$') { - key = (await interpolate(current, _key)).data; - } else if (_key[0] == '\\') { - key = key.slice(1); - } else if (_key.indexOf('.') != -1) { - const [first, ...kp] = _key.split('.') - key = first; - val = {}; - setValueForKeyPath(source[_key], kp.join('.'), val); + if (!contains(reservedKeys, _key)) { + key = (await interpolate(current, _key)).data; + } else { + key = _key; + } } else { key = _key; } @@ -87,10 +87,14 @@ export namespace Async { if (key) { state.auto[key] = res; state.autoMap[key] = currentErrorPath(state).path.join('.'); + target[key] = res; } currentErrorPath(state).path.pop(); } + if (current.state.schema) { + // check + } return current; } @@ -209,6 +213,14 @@ export namespace Async { $: async (current: Moss.ReturnValue, args: any) => { await parseNextStructure(current, args); }, + schema: async (current: Moss.ReturnValue, args: any) => { + const description = await continueWithNewFrame(current, args); + current.state.schema = parseDescription(description.data); + current.data = current.state.schema; + }, + validate: async (current: Moss.ReturnValue, args: any) => { + const schema = await continueWithNewFrame(current, args); + }, extend: async (parent: Moss.ReturnValue, args: any) => { const layer = await continueWithNewFrame(parent, args); const { data } = layer; diff --git a/src/interpolate/async.ts b/src/interpolate/async.ts index 97664d6..cbc2af3 100644 --- a/src/interpolate/async.ts +++ b/src/interpolate/async.ts @@ -1,7 +1,7 @@ import { check } from 'typed-json-transform'; const expression = require('../../compiled/expression'); -import { newState, parse, reduce, append as _append, pop } from './shared'; +import { newState, parse, reduce, append as _append } from './shared'; export async function tokenize(str: string, options: Expand.Options) { const { dereference, dereferenceSync, call, shell, fetch } = options; @@ -11,45 +11,42 @@ export async function tokenize(str: string, options: Expand.Options) { let x = 0; let y = 0; - const stack: Expand.Elem[][] = [[{ state: { sourceMap: [0, str.length] }, raw: [], subst: [], source: [] }]]; - let ptr = stack[x][y]; + const stack: Expand.Elem[] = [newState()]; + let frame = stack[y]; - - const append = (char: string) => { - let nextChunk = false; - if (ptr.state.op) { - nextChunk = _append(ptr.subst, char) - } else { - nextChunk = _append(ptr.raw, char) + const append = (val: any) => { + if (frame.escape) { + frame.escaped += val; + return; } + let nextChunk = false; + nextChunk = _append(frame.out, val) if (nextChunk) { - ptr.source.push(char); - } else { - ptr.source[(ptr.source.length - 1) || 0] += char; - } - } - - const stepBack = () => { - if (ptr.state.op) { - pop(ptr.subst); + frame.source.push(val); } else { - pop(ptr.raw); + frame.source[(frame.source.length - 1) || 0] += val; } } - const open = (op: Expand.Op, terminal: Expand.Terminal) => { - stepBack(); - ptr.state.header = null; - ptr.state.detecting = null; - const existing = ptr.state.op; - if (existing) { - y++; - stack[x][y] = newState(); - ptr = stack[x][y]; - ptr.raw = []; + const open = (char: string, op: Expand.Op, terminal: Expand.Terminal) => { + const { escape, escaped } = frame; + if (escape) { + frame.escape = null; + frame.escaped = ''; + const directive = escaped.slice(1); + if (!directive){ + throw new Error('explicit interpolate without 2 char prefix directive'); + } + append(directive); + if (char) { + append(char); + } } - ptr.state.op = op; - ptr.state.terminal = terminal; + frame.header = null; + frame.detecting = null; + y++; + stack[y] = newState({ op: escape ? 'n' : op, terminal }); + frame = stack[y]; } const sub = async (fn: (s: string, location: any) => any, str: string, sourceMap?: number[]) => { @@ -85,106 +82,130 @@ export async function tokenize(str: string, options: Expand.Options) { } const close = async () => { - const op = ptr.state.op; - ptr.state.sourceMap = [offset, i + (ptr.state.terminal && ptr.state.terminal.length) - offset]; - ptr.state.op = null; - ptr.state.terminal = null; + const { op, terminal, escape, escaped } = frame; + frame.sourceMap = [offset, i + (terminal && terminal.length) - offset]; + frame.op = null; + frame.terminal = null; + if (escape && escaped) { + frame.escape = false; + frame.escaped = ''; + append(escaped); + } + const swap = reduce(frame.out, frame.source); + let out: any; let res; - const swap = reduce(ptr.subst, ptr.source); if (check(swap, [Object, Array])) { - res = await call(swap); + out = await call(swap); } else { if (op == 'v') { - res = await sub(dereference, swap, ptr.state.sourceMap); + out = await sub(dereference, swap, frame.sourceMap); } else if (op == 's') { - res = await sub(shell, swap, ptr.state.sourceMap); + out = await sub(shell, swap, frame.sourceMap); } else if (op == 'f') { - res = await sub(fetch, swap, ptr.state.sourceMap); + out = await sub(fetch, swap, frame.sourceMap); } else if (op == 'e') { - const deref = (str: string) => subSync(dereferenceSync, str, ptr.state.sourceMap) - res = await sub((s) => expression(deref, check).parse(s), swap, ptr.state.sourceMap) + const deref = (str: string) => subSync(dereferenceSync, str, frame.sourceMap) + out = await sub((s) => expression(deref, check).parse(s), swap, frame.sourceMap) + } else if (op == 'n') { + if (terminal != '__null__') append(terminal); + out = reduce(frame.out, frame.source); } } - if (y > 0) { - delete stack[x][y]; - y--; - ptr = stack[x][y]; - ptr.subst.push(res); - } - else { - if (res) { ptr.state.dirty = true }; - ptr.raw.push(res); - x++; - y = 0; - stack[x] = [newState()]; - ptr = stack[x][y]; - } + // if (out) frame.dirty = true; + // const { out } = frame; + // const out = reduce(frame.out, frame.source); + // delete stack[y]; + stack.length-- + y--; + frame = stack[y]; + append(out); + // frame.source.push(out); } for (i = 0; i != template.length; i++) { const char = template[i]; - if (ptr.state.escape) { - ptr.state.escape = false; - append(char); - } else { - const { detecting, header, op, terminal } = ptr.state; - switch (char) { - case '(': - if (detecting && (detecting == '$')) { - open('s', ')'); - break; - } else { - append(char); - } - break; - case '{': - if (detecting) { - open(detecting == '$' ? 'v' : detecting == '^' ? 'f' : 'e', '}'); - break; - } else { - append(char); - } - break; - case '}': case ')': - if (op && terminal == char) { - await close(); - } else { - append(char); - } + const { detecting, header, op, terminal, escape, escaped } = frame; + switch (char) { + case '(': + if (detecting && (detecting == '$')) { + open(char, 's', ')'); break; - case ' ': - if (op && terminal == char) { - await close(); - } + } else { append(char); + } + break; + case '{': + if (detecting) { + open(char, detecting == '$' ? 'v' : detecting == '^' ? 'f' : 'e', '}'); break; - case '\\': - ptr.state.escape = true; - break; - default: - if (header) { - ptr.state.header = null; - if (header == '=') open('e', '__null__'); - else if (header == '^') open('f', '__null__'); - else if (header == '$') open('v', ' '); - } else if (char == '=' || char == '$' || char == '^') { - if (i < 1) ptr.state.header = char; - ptr.state.detecting = char; - } + } else { + append(char); + } + break; + case '}': case ')': + if (op && terminal == char) { + await close(); + } else { + append(char); + } + break; + case ' ': + if (op && terminal == char) { + await close(); + } + append(char); + break; + case '\\': + if (frame.escape) { + append(char); + } else { + frame.escape = true; + frame.escaped = '\\'; + } + break; + case '=': case '$': case '^': + if (detecting) { + append(detecting); + } + if (!frame.out[0] || (frame.out[0].length < 2)) { + frame.header = char; + } + if (frame.escape) { + append(char); + } + frame.detecting = char; + break; + default: + if (header) { + frame.header = null; + if (header == '=') open(null, 'e', '__null__'); + else if (header == '^') open(null, 'f', '__null__'); + else if (header == '$') open(null, 'v', '__null__'); + } else { if (detecting) { - ptr.state.detecting = null; + frame.detecting = null; + append(detecting); } - append(char); - break; - } + if (escape) { + frame.escape = false; + frame.escaped = null; + // console.log('append escaped', escaped) + append(escaped); + } + } + append(char); + break; } } - while (ptr.state.op) { - if (ptr.state.terminal == '}') throw { message: `expected a closing ${ptr.state.terminal}` } + if (frame.detecting) { + append(frame.detecting); + } + while (frame.op) { + if (frame.terminal == '}') throw { message: `expected a closing ${frame.terminal}` } await close(); } - if (ptr.state.detecting) { - ptr.state.detecting = null; + if (frame.detecting) { + frame.detecting = null; } return stack; }; diff --git a/src/interpolate/index.ts b/src/interpolate/index.ts index b1be6d6..b7697f0 100644 --- a/src/interpolate/index.ts +++ b/src/interpolate/index.ts @@ -1,4 +1,8 @@ import { interpolate as interpolateAsync } from './async' import { interpolate } from './sync'; -export { interpolate, interpolateAsync }; \ No newline at end of file +export { interpolate, interpolateAsync }; + +const jsonSchemaKeys = ['id', 'schema', 'ref', 'comment']; +const mongoKeys = ['set', 'unset', 'push', 'pull', 'gt', 'lt', 'gte', 'lte', 'exists']; +export const reservedKeys = jsonSchemaKeys.concat(mongoKeys); diff --git a/src/interpolate/shared.ts b/src/interpolate/shared.ts index 399b545..ed54194 100644 --- a/src/interpolate/shared.ts +++ b/src/interpolate/shared.ts @@ -110,9 +110,11 @@ export const pop = (stack: any[]) => { const lastIndex = (stack.length - 1) || 0; if (check(stack[lastIndex], String)) { const s = stack[lastIndex]; + const l = s.slice(-1); stack[lastIndex] = s.slice(0, s.length - 1); + return l; } else { - stack.pop(); + return stack.pop(); } } @@ -127,19 +129,14 @@ export const reduce = (raw: any[], source: any[]) => { return res; } -export function newState(): Expand.Elem { - return { state: { sourceMap: [] }, raw: [], subst: [], source: [] }; +export function newState(options?: Partial): Expand.Elem { + return { sourceMap: [], out: [], source: [], ...options }; } -export function parse(tokens: Expand.Elem[][]) { +export function parse(tokens: Expand.Elem[]) { let out = ''; let outSource = ''; let changed = false; - for (const e of tokens) { - const flat = reduce(e[0].raw, e[0].source); - out = join(out, flat, outSource, e[0].source[0]); - outSource = e[0].source.join(''); - if (e[0].state.dirty) changed = true; - } + out = reduce(tokens[0].out, tokens[0].source); return { value: out, changed: changed }; } \ No newline at end of file diff --git a/src/interpolate/sync.ts b/src/interpolate/sync.ts index 5c6aed1..47420a2 100644 --- a/src/interpolate/sync.ts +++ b/src/interpolate/sync.ts @@ -1,55 +1,52 @@ import { check } from 'typed-json-transform'; const expression = require('../../compiled/expression'); -import { newState, parse, reduce, append as _append, pop } from './shared'; +import { newState, parse, reduce, append as _append } from './shared'; export function tokenize(str: string, options: Expand.Options) { - const { dereference, call, shell, fetch } = options; + const { dereference, dereferenceSync, call, shell, fetch } = options; const template = String(str); let i = 0; let offset = i; let x = 0; let y = 0; - const stack: Expand.Elem[][] = [[{ state: { sourceMap: [0, str.length] }, raw: [], subst: [], source: [] }]]; - let ptr = stack[x][y]; + const stack: Expand.Elem[] = [newState()]; + let frame = stack[y]; - - const append = (char: string) => { - let nextChunk = false; - if (ptr.state.op) { - nextChunk = _append(ptr.subst, char) - } else { - nextChunk = _append(ptr.raw, char) + const append = (val: any) => { + if (frame.escape) { + frame.escaped += val; + return; } + let nextChunk = false; + nextChunk = _append(frame.out, val) if (nextChunk) { - ptr.source.push(char); + frame.source.push(val); } else { - ptr.source[(ptr.source.length - 1) || 0] += char; + frame.source[(frame.source.length - 1) || 0] += val; } } - const stepBack = () => { - if (ptr.state.op) { - pop(ptr.subst); - } else { - pop(ptr.raw); - } - } - - const open = (op: Expand.Op, terminal: Expand.Terminal) => { - stepBack(); - ptr.state.header = null; - ptr.state.detecting = null; - const existing = ptr.state.op; - if (existing) { - y++; - stack[x][y] = newState(); - ptr = stack[x][y]; - ptr.raw = []; + const open = (char: string, op: Expand.Op, terminal: Expand.Terminal) => { + const { escape, escaped } = frame; + if (escape) { + frame.escape = null; + frame.escaped = ''; + const directive = escaped.slice(1); + if (!directive){ + throw new Error('explicit interpolate without 2 char prefix directive'); + } + append(directive); + if (char) { + append(char); + } } - ptr.state.op = op; - ptr.state.terminal = terminal; + frame.header = null; + frame.detecting = null; + y++; + stack[y] = newState({ op: escape ? 'n' : op, terminal }); + frame = stack[y]; } const sub = (fn: (s: string, location: any) => any, str: string, sourceMap?: number[]) => { @@ -85,106 +82,130 @@ export function tokenize(str: string, options: Expand.Options) { } const close = () => { - const op = ptr.state.op; - ptr.state.sourceMap = [offset, i + (ptr.state.terminal && ptr.state.terminal.length) - offset]; - ptr.state.op = null; - ptr.state.terminal = null; + const { op, terminal, escape, escaped } = frame; + frame.sourceMap = [offset, i + (terminal && terminal.length) - offset]; + frame.op = null; + frame.terminal = null; + if (escape && escaped) { + frame.escape = false; + frame.escaped = ''; + append(escaped); + } + const swap = reduce(frame.out, frame.source); + let out: any; let res; - const swap = reduce(ptr.subst, ptr.source); if (check(swap, [Object, Array])) { - res = call(swap); + out = call(swap); } else { if (op == 'v') { - res = sub(dereference, swap, ptr.state.sourceMap); + out = sub(dereference, swap, frame.sourceMap); } else if (op == 's') { - res = sub(shell, swap, ptr.state.sourceMap); + out = sub(shell, swap, frame.sourceMap); } else if (op == 'f') { - res = sub(fetch, swap, ptr.state.sourceMap); + out = sub(fetch, swap, frame.sourceMap); } else if (op == 'e') { - const deref = (str: string) => subSync(dereference, str, ptr.state.sourceMap) - res = sub((s) => expression(deref, check).parse(s), swap, ptr.state.sourceMap) + const deref = (str: string) => subSync(dereferenceSync, str, frame.sourceMap) + out = sub((s) => expression(deref, check).parse(s), swap, frame.sourceMap) + } else if (op == 'n') { + if (terminal != '__null__') append(terminal); + out = reduce(frame.out, frame.source); } } - if (y > 0) { - delete stack[x][y]; - y--; - ptr = stack[x][y]; - ptr.subst.push(res); - } - else { - if (res) { ptr.state.dirty = true }; - ptr.raw.push(res); - x++; - y = 0; - stack[x] = [newState()]; - ptr = stack[x][y]; - } + // if (out) frame.dirty = true; + // const { out } = frame; + // const out = reduce(frame.out, frame.source); + // delete stack[y]; + stack.length-- + y--; + frame = stack[y]; + append(out); + // frame.source.push(out); } for (i = 0; i != template.length; i++) { const char = template[i]; - if (ptr.state.escape) { - ptr.state.escape = false; - append(char); - } else { - const { detecting, header, op, terminal } = ptr.state; - switch (char) { - case '(': - if (detecting) { - open('s', ')'); - break; - } else { - append(char); - } + const { detecting, header, op, terminal, escape, escaped } = frame; + switch (char) { + case '(': + if (detecting && (detecting == '$')) { + open(char, 's', ')'); break; - case '{': - if (detecting) { - open(detecting == '$' ? 'v' : detecting == '^' ? 'f' : 'e', '}'); - break; - } else { - append(char); - } - break; - case '}': case ')': - if (op && terminal == char) { - close(); - } else { - append(char); - } - break; - case ' ': - if (op && terminal == char) { - close(); - } + } else { append(char); + } + break; + case '{': + if (detecting) { + open(char, detecting == '$' ? 'v' : detecting == '^' ? 'f' : 'e', '}'); break; - case '\\': - ptr.state.escape = true; - break; - default: - if (header) { - ptr.state.header = null; - if (header == '=') open('e', '__null__'); - else if (header == '^') open('f', '__null__'); - else if (header == '$') open('v', ' '); - } else if (char == '=' || char == '$' || char == '^') { - if (i < 1) ptr.state.header = char; - ptr.state.detecting = char; - } + } else { + append(char); + } + break; + case '}': case ')': + if (op && terminal == char) { + close(); + } else { + append(char); + } + break; + case ' ': + if (op && terminal == char) { + close(); + } + append(char); + break; + case '\\': + if (frame.escape) { + append(char); + } else { + frame.escape = true; + frame.escaped = '\\'; + } + break; + case '=': case '$': case '^': + if (detecting) { + append(detecting); + } + if (!frame.out[0] || (frame.out[0].length < 2)) { + frame.header = char; + } + if (frame.escape) { + append(char); + } + frame.detecting = char; + break; + default: + if (header) { + frame.header = null; + if (header == '=') open(null, 'e', '__null__'); + else if (header == '^') open(null, 'f', '__null__'); + else if (header == '$') open(null, 'v', '__null__'); + } else { if (detecting) { - ptr.state.detecting = null; + frame.detecting = null; + append(detecting); } - append(char); - break; - } + if (escape) { + frame.escape = false; + frame.escaped = null; + // console.log('append escaped', escaped) + append(escaped); + } + } + append(char); + break; } } - while (ptr.state.op) { - if (ptr.state.terminal == '}') throw { message: `expected a closing ${ptr.state.terminal}` } + if (frame.detecting) { + append(frame.detecting); + } + while (frame.op) { + if (frame.terminal == '}') throw { message: `expected a closing ${frame.terminal}` } close(); } - if (ptr.state.detecting) { - ptr.state.detecting = null; + if (frame.detecting) { + frame.detecting = null; } return stack; }; diff --git a/src/interpolate/expression.pegjs b/src/parsers/expression.pegjs similarity index 100% rename from src/interpolate/expression.pegjs rename to src/parsers/expression.pegjs diff --git a/src/parsers/moss.ne b/src/parsers/moss.ne new file mode 100644 index 0000000..0fd310f --- /dev/null +++ b/src/parsers/moss.ne @@ -0,0 +1,403 @@ +@{% + + +// implementation + +function* indented(lexer, source, info) { + let iter = peekable(lexer.reset(source, info)) + let stack = [] + + // absorb initial blank lines and indentation + let indent = iter.nextIndent() + + for (let tok; tok = iter.next(); ) { + if (tok.type === 'nl') { + const newIndent = iter.nextIndent() + if (newIndent == null) break // eof + else if (newIndent === indent) { + yield {type: 'nl'} + + } else if (newIndent > indent) { + stack.push(indent) + indent = newIndent + yield {type: 'indent', value: indent} + + } else { + while (newIndent < indent) { + indent = stack.pop() + yield {type: 'dedent', value: indent} + } + if (newIndent !== indent) { + throw new Error('inconsistent indentation') + } + } + indent = newIndent + } else { + yield tok + } + } + + // dedent remaining blocks at eof + for (let i = stack.length; i--;) { + yield {type: 'dedent'} + } + + yield Object.assign({ + type: 'eof' + }, { + toString() { return this.value }, + offset: lexer.index, + size: 0, + lineBreaks: 0, + line: lexer.line, + col: lexer.col, + }) +} + +function peekable(lexer) { + let here = lexer.next() + return { + next() { + const old = here + here = lexer.next() + return old + }, + peek() { + return here + }, + nextIndent() { + for (let tok; tok = this.peek(); ) { + if (tok.type === 'nl') { + this.next(); + } + else if (tok.type === 'space') { + const indent = tok.value.length + this.next() + + const next = this.peek() + if (!next) return + if (next.type === 'nl') { + this.next() + continue + } + return indent + } + return 0 + } + }, + } +} + + +function StreamLexer() { + this.lexer = moo.compile(rules); +} + +StreamLexer.prototype.next = function() { + const { value } = this.generator.next(); + if (value){ + return value; + } +} + +StreamLexer.prototype.save = function() { +} + +StreamLexer.prototype.getTokenTypes = function(source) { + const types = []; + const iter = indented(this.lexer, source); + const arr = []; + for (const t of iter){ + arr.push(t); + } + return arr.map(t => { + switch (t.type){ + case "char": return t.value; + case "indent": return "->"; + case "dedent": return "<-"; + case "nl": return "\n"; + case "eof": return "eof"; + case "space": return " "; + } + }) +} + +StreamLexer.prototype.reset = function(source, info) { + console.log('types', this.getTokenTypes(source)) + this.generator = indented(this.lexer, source, info); + this.initialized = true; +} + +StreamLexer.prototype.formatError = function(token) { + return this.lexer.formatError(token); +} + +StreamLexer.prototype.has = function(name) { + if (name == 'indent') return true; + if (name == 'dedent') return true; + if (name == 'eof') return true; + return this.lexer.has(name); +} + +const rules = { + nl: {match: /[\n\r]+/, lineBreaks: true }, + space: /[ ]+/, + char: /./ +}; + +const lexer = new StreamLexer(); + +%} + + +@lexer lexer + +root -> scope {% function(d) { + return d[0]; +} %} + +scope -> + map {% id %} + +map -> (mapEntry):+ eol {% function(d) { + let map = new Map(); + const entries = d[0]; + for (const mapEntry of entries) { + const [key, valuePair] = mapEntry[0]; + const value = valuePair ? valuePair[0] : null; + if(key) { + if (map.get(key)){ + throw new Error(`duplicate key ${key}`); + } + map.set(key, value) + } + } + return map; +} %} + +mapEntry + -> mapKey __ {% function(d) { + return d[0]; + } %} + | mapKey mapClass {% function(d) { + const [key, mapClass] = d; + console.log('parse map val', key[0], mapClass); + const pair = [key[0], mapClass]; + return pair; + } %} + + +eol -> comment:? _ (%nl | %eof) + +mapKey -> label | number + +mapClass -> + separator __ mapKey {% function(d) { return d[2]; } %} + + #| separator _ mapDescriptor {% function(d) { return d[2][0]; } %} + +mapDescriptor -> + _ directive "text" _ %indent nestedMultilineString {% function(d) { return d[4]; } %} + | _ %indent nestedMap {% function(d) { return d[2]; } %} + + +nestedMap -> scope %dedent {% function(d) { return d[1]; } %} +nestedMultilineString -> multilineString %dedent {% function(d) { return d[1]; } %} +nestedValue -> + nonStringLike + | stringLike {% id %} + | jsonRoot + | jsonArray + +jsonRoot -> + jsObject + | "{" _ jsonPair (_ jsonPair):+ _ "}" {% missingComma %} + +jsObject -> "{" ((nl | indent | null) jsonPair ",":?):* dedent:? "}" {% function map(d) { + let output = {}; + for (let i in d[1]) { + const pair = d[1][i]; + const key = pair[1][0]; + const value = pair[1][1]; + if(key) { + if (output[key]){ + throw new Error(`duplicate key ${key}`); + } + output[key] = value[0]; + } + } + return output; +} %} + +jsonPair -> _ dqString _ separator _ dqString _ {% function(d) { return [d[1], d[5]]; } %} + +jsonArray -> "[" _ "]" {% function(d) { return []; } %} + | "[" _ jsonRoot (_ "," _ jsonRoot):* _ "]" {% extractArray %} + +# MultiLine String + +multilineString -> (lineBreak _ multilineEntry):* %dedent {% function(d) { + const indent = d[0][0][0][0]; + + const lines = d[2].map(segment => { + const relativeIndent = segment[0] - indent; + let base = ''; + if (relativeIndent > 0){ + for (let i = 0; i < relativeIndent; i++){ + base = base + ' '; + } + } + return base + segment[1]; + }).join('\\n'); + return lines; +} %} + +multilineEntry + -> stringLike _ comment:? {% function(d) { return d[0]; } %} + | stringLike __ (stringLike | nonStringLike | __):+ comment:? + {% function(d) { + const head = d[0][0]; + const tail = reduce(d[2]); + const stringLine = reduceN(head, d[1], tail); + console.log({stringLine}); + return stringLine; + } %} + +stringLike + -> label {% id %} + | symbol {% id %} + | escapedString {% id %} + +nonStringLike -> + number {% id %} + | "true" {% function(d) { return true; } %} + | "false" {% function(d) { return false; } %} + | "null" {% function(d) { return null; } %} + +lineBreak -> nl | indent | dedent +indent -> %indent {% function(d) { return d[0].value } %} +dedent -> %dedent {% function(d) { return d[0].value } %} +nl -> %nl {% function(d) { } %} + +# Numbers + +number -> _number {% function(d) {return parseFloat(d[0])} %} + +_posint -> + [0-9] {% id %} + | _posint [0-9] {% function(d) {return d[0] + d[1]} %} + +_int -> + "-" _posint {% function(d) {return d[0] + d[1]; }%} + | _posint {% id %} + +_float -> + _int {% id %} + | _int "." _posint {% function(d) {return d[0] + d[1] + d[2]; }%} + +_number -> + _float {% id %} + | _float "e" _int {% function(d){return d[0] + d[1] + d[2]; } %} + + +#Strings + +dqString -> "\"" _string "\"" {% function(d) {return d[1]; } %} +escapedString -> "`" _string "`" {% function(d) {return d[1]; } %} + +_string -> + null {% function() {return ""; } %} + | _string _stringchar {% function(d) {return d[0] + d[1];} %} + +_stringchar -> + [^\\"] {% id %} + | "\\" [^] {% function(d) {return JSON.parse("\"" + d[0] + d[1] + "\""); } %} + +label -> _label {% function(d) {return d[0]} %} + +_label -> + [a-zA-Z<$] _labelChar:* {% function(d) { + const label = d[0] + (d[1] ? d[1].join('') : '') + return label + } %} + +_labelChar -> + [a-zA-Z0-9<>$] {% function(d) { return d[0] } %} + +# Simple + +cssLabel -> selector (label ",":? __:?):* {% function(d) { return [d[0], d[1]] } %} + +symbol -> + directive + | selector + | separator {% function(d) { } %} + +selector -> "=" {% function(d) { } %} +directive -> "@" {% function(d) { } %} +separator -> ":" {% function(d) { } %} + +# Whitespace +_ -> null | _ %space {% function() {} %} +__ -> %space | __ %space {% function() { return d[0]} %} + +comment -> "#" _ [\.*] {% function(d) { return null; } %} + +@{% +// errors + +function missingComma(){ + throw new Error("missing comma"); +} + +function addPairToMap(pair, map){ + const [key, value] = pair; + if(key) { + if (map[key]){ + throw new Error(`duplicate key ${key}`); + } + map.set(key, value); + } +} + +function join(list, rhs){ + if (!list) return rhs; + if (typeof list == 'string'){ + return list + rhs; + } + return list + rhs; +} + +function reduceN(...list){ + if (list.length == 1){ + return list[0]; + } + let memo; + for (const item of list){ + memo = join(memo, item); + } + return memo; +} + +function reduce(list){ + return reduceN(...list); +} + +function extractObject(d) { + const map = new Map(); + addPairToMap(d[2], output); + for (let i in d[3]) { + addPairToMap(d[3][i][3], output); + } + return map; +} + +function extractArray(d) { + let array = [d[2]]; + for (let i in d[3]) { + array.push(d[3][i][3]); + } + return array; +} + +%} \ No newline at end of file diff --git a/src/schema.ts b/src/schema.ts new file mode 100644 index 0000000..3509f42 --- /dev/null +++ b/src/schema.ts @@ -0,0 +1,59 @@ +import { check, all, every, okmap, endsWith } from 'typed-json-transform'; + +interface ValidateState { + data?: any + path?: string +} + + +export function validate(description: Moss.Schema.Options, state: ValidateState) { + +} + +export function parseDescription(description: Moss.Schema.Description): Moss.Schema.Options { + if (check(description, String)) { + return { type: description as string } + } + if (check(description, Array)) { + const descriptionArray = description as Moss.Schema.Description[]; + // if (descriptionArray.length == 1) { + // return { + // type: "array", + // items: parseDescription(descriptionArray[0]), + // } + // } + // if (descriptionArray.length == 2 && (descriptionArray[0] == 'map')) { + // return { + // isMap: true, + // singleType: parseDescription(descriptionArray[1]), + // } + // } + return { + type: "array", + // isArray: true, + items: descriptionArray.map(d => parseDescription(d)), + } + } + if (check(description, Object)) { + const options: Moss.Schema.Options = description as any; + if (!options.type) { + return okmap(options, (d: Moss.Schema.Description, key: string) => { + return parseDescription(d) + }); + } + return options; + } +} + +export function validateArray(schema: Moss.Schema.Options, { data, path }: ValidateState) { + +} + +export function validateObject(current: any, { data, path }: ValidateState) { + +} + +export function validateScalar(current: any, { data, path }: ValidateState) { + +} + diff --git a/src/sync.ts b/src/sync.ts index bff2c44..b3c208a 100644 --- a/src/sync.ts +++ b/src/sync.ts @@ -1,15 +1,16 @@ /// import { - map as map, okmap as okmap, arrayify, extend, + map as map, okmap as okmapSync, okmap, arrayify, extend, check, clone, each, setValueForKeyPath, sum, valueForKeyPath, - all, isEqual, mergeArray, mergeOrReturnAssignment + all, isEqual, mergeArray, mergeOrReturnAssignment, contains } from 'typed-json-transform'; -import { interpolate as __interpolate } from './interpolate'; +import { interpolate as __interpolate, reservedKeys } from './interpolate'; import { cascade as _cascade, shouldConstruct, select, parseSelectors } from './cascade'; import * as yaml from 'js-yaml'; import { getBranchSync as getBranch } from './resolvers'; +import { parseDescription } from './schema'; import { newLayer, @@ -73,14 +74,16 @@ export namespace Sync { } else { let val = source[_key]; if (_key[0] === '$') { - key = (interpolate(current, _key)).data; - } else if (_key[0] == '\\') { - key = key.slice(1); - } else if (_key.indexOf('.') != -1) { - const [first, ...kp] = _key.split('.') - key = first; - val = {}; - setValueForKeyPath(source[_key], kp.join('.'), val); + if (!contains(reservedKeys, _key)) { + key = (interpolate(current, _key)).data; + } else { + key = _key; + } + // } else if (_key.indexOf('.') != -1) { + // const [first, ...kp] = _key.split('.') + // key = first; + // val = {}; + // setValueForKeyPath(source[_key], kp.join('.'), val); } else { key = _key; } @@ -94,6 +97,9 @@ export namespace Sync { } currentErrorPath(state).path.pop(); } + if (current.state.schema) { + // check + } return current; } @@ -126,8 +132,6 @@ export namespace Sync { } catch (e) { handleError(e, layer, input) } } - - export const onMatch = (rv: Moss.ReturnValue, setter: any, operator: Merge.Operator, key: string) => { let { state, data: lhs } = rv; currentErrorPath(state).path.push(key); @@ -214,6 +218,12 @@ export namespace Sync { $: (current: Moss.ReturnValue, args: any) => { parseNextStructure(current, args); }, + schema: (current: Moss.ReturnValue, args: any) => { + const description = continueWithNewFrame(current, args); + current.state.schema = parseDescription(description.data); + console.log(current.state.schema); + current.data = current.state.schema; + }, extend: (parent: Moss.ReturnValue, args: any) => { const layer = continueWithNewFrame(parent, args); const { data } = layer; @@ -422,9 +432,13 @@ export namespace Sync { } }); + const jsonSchemaKeywords = ['id', 'schema', 'ref', 'comment']; + const mongoKeywords = ['set', 'unset', 'push', 'pull', 'gt', 'lt', 'gte', 'lte', 'exists']; + const keywords = okmapSync(jsonSchemaKeywords.concat(mongoKeywords), (key) => ({ key, value: key })); + function interpolate(layer: Moss.ReturnValue, input: any) { const { data, state } = layer; - const dictionary = { ...state.auto, stack: state.stack } + const dictionary = { ...state.auto, ...keywords, stack: state.stack } const res = _interpolate(layer, input, dictionary); return { data: res, state: layer.state } as Moss.ReturnValue; } @@ -479,18 +493,25 @@ export namespace Sync { path: sourceMap, rhs: true }); - const b = getBranch(uris, resolvers, layer); - if (!b) { + let resolvedBranch; + try { + resolvedBranch = getBranch(uris, resolvers, layer); + } catch (e) { + throw ({ + message: `Can't resolve ${uris}\n ${e.message}`, + }) + } + if (!resolvedBranch) { throw ({ message: `Can't resolve ${uris}\nNone of the available resolvers found a match.\n[${(map(resolvers, (r) => r.name)).filter(e => e).join(', ')}] `, }) } - if (b.data) { + if (resolvedBranch.data) { popAll++; - pushErrorPath(layer.state, { path: ['^' + b.path] }) - const res: Moss.ReturnValue = parseNextStructure(layer, b.data); + pushErrorPath(layer.state, { path: ['^' + resolvedBranch.path] }) + const res: Moss.ReturnValue = parseNextStructure(layer, resolvedBranch.data); const { data, state: { auto, stack, selectors, merge } } = res; - b.intermediate = { data, state: { auto, stack, selectors, merge } }; + resolvedBranch.intermediate = { data, state: { auto, stack, selectors, merge } }; return data; } }, From 2ba7618433dd8d6cc0dff9777ddba778fcfa7124 Mon Sep 17 00:00:00 2001 From: Leif Shackelford Date: Thu, 14 Mar 2019 13:47:16 -0700 Subject: [PATCH 02/12] wip --- src/parsers/moss.ne | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/parsers/moss.ne b/src/parsers/moss.ne index 0fd310f..e20f3c5 100644 --- a/src/parsers/moss.ne +++ b/src/parsers/moss.ne @@ -159,12 +159,12 @@ root -> scope {% function(d) { scope -> map {% id %} -map -> (mapEntry):+ eol {% function(d) { +map -> (mapEntry):+ {% function(d) { let map = new Map(); const entries = d[0]; for (const mapEntry of entries) { const [key, valuePair] = mapEntry[0]; - const value = valuePair ? valuePair[0] : null; + const value = valuePair ? valuePair : null; if(key) { if (map.get(key)){ throw new Error(`duplicate key ${key}`); @@ -175,33 +175,34 @@ map -> (mapEntry):+ eol {% function(d) { return map; } %} + mapEntry - -> mapKey __ {% function(d) { + -> dataType endStatement {% function(d) { return d[0]; } %} - | mapKey mapClass {% function(d) { + | dataType mapClass {% function(d) { const [key, mapClass] = d; - console.log('parse map val', key[0], mapClass); + console.log('pair', key[0], mapClass); const pair = [key[0], mapClass]; return pair; } %} -eol -> comment:? _ (%nl | %eof) +pushScope -> separator %indent:? +popScope -> endStatement dedent:? %eof:? +endStatement -> ("," _) | __ | endLine +endLine -> comment:? _ (nl | %eof) -mapKey -> label | number +dataType -> label | number mapClass -> - separator __ mapKey {% function(d) { return d[2]; } %} - - #| separator _ mapDescriptor {% function(d) { return d[2][0]; } %} + pushScope __ mapEntry popScope {% function(d) { return d[2]; } %} + #| separator mapDescriptor {% function(d) { return d[1]; } %} mapDescriptor -> - _ directive "text" _ %indent nestedMultilineString {% function(d) { return d[4]; } %} - | _ %indent nestedMap {% function(d) { return d[2]; } %} - + pushScope scope popScope {% function(d) { return d[1]; } %} + | __ directive "text" _ %indent nestedMultilineString {% function(d) { return d[5]; } %} -nestedMap -> scope %dedent {% function(d) { return d[1]; } %} nestedMultilineString -> multilineString %dedent {% function(d) { return d[1]; } %} nestedValue -> nonStringLike @@ -277,7 +278,7 @@ nonStringLike -> lineBreak -> nl | indent | dedent indent -> %indent {% function(d) { return d[0].value } %} dedent -> %dedent {% function(d) { return d[0].value } %} -nl -> %nl {% function(d) { } %} +nl -> %nl {% function(d) { return null} %} # Numbers From 765e3b12ea989752611ff0f105877c58948cb052 Mon Sep 17 00:00:00 2001 From: Leif Shackelford Date: Fri, 15 Mar 2019 01:01:50 -0700 Subject: [PATCH 03/12] moss grammar --- src/parsers/moss.ne | 519 ++++++++++++++++++++++---------------------- 1 file changed, 257 insertions(+), 262 deletions(-) diff --git a/src/parsers/moss.ne b/src/parsers/moss.ne index e20f3c5..c28daf4 100644 --- a/src/parsers/moss.ne +++ b/src/parsers/moss.ne @@ -1,7 +1,249 @@ +@lexer lexer + +root + -> scope eof + {% ([scope]) => scope %} + +scope + -> map {% id %} + +map + -> map mapPairConstructor + {% ([map, nextMatch]) => { + if (nextMatch) { + console.log('addPairToMap', nextMatch); + addPairToMap(nextMatch, map); + } + return map; + } %} + | mapPairConstructor + {% ([initialMatch]) => { + const map = new Map(); + mapId++; + if (initialMatch) { + console.log('newMap', mapId, initialMatch); + addPairToMap(initialMatch, map); + } + return map; + } %} + + +mapPairConstructor + # valid + -> dataValue endGroup + {% ([dv]) => { + console.log('label', dv); + return [dv, true] + }%} + + | dataValue separator __ dataValue endGroup + {% ([key, _1, _2, value]) => { + console.log('pair', [key, value]); + return [key, value] + } + %} + + | dataValue pushScope scope popScope + {% ([key, _1, scope]) => { + return [key, scope] + } %} + + | commentLine {% () => null %} + + # error cases + | dataValue nl indent scope popScope + {% expectedScopeOperator %} + +inlineContextDescription -> _ {% () => null %} + +# MultiLine String + +multilineString + -> ((nl | dedent) _string):* dedent + {% function(d) { + const indent = d[0][0][0][0]; + + const lines = d[2].map(segment => { + const relativeIndent = segment[0] - indent; + let base = ''; + if (relativeIndent > 0){ + for (let i = 0; i < relativeIndent; i++){ + base = base + ' '; + } + } + return base + segment[1]; + }).join('\\n'); + return lines; + } %} + +# HighLevel + +dataValue -> + label {% id %} + | number {% id %} + +# Numbers + +number + -> _number {% ([numberString]) => parseFloat(numberString) %} + +_number + -> _float {% id %} + | _float "e" _int {% ([lhs, operator, rhs]) => lhs + operator + rhs %} + +_float + -> _int {% id %} + | _int "." _posint {% ([lhs, operator, rhs]) => lhs + operator + rhs %} + +_int + -> "-" _posint {% ([lhs, rhs]) => lhs + rhs %} + | _posint {% id %} + +_posint + -> [0-9] {% id %} + | _posint [0-9] {% ([lhs, rhs]) => lhs + rhs %} + + +#Strings + +label + -> label [\w] {% ([l, r]) => { return l + r.value; }%} + | [a-zA-Z$_] {% ([char]) => char.value %} + +dqString + -> "\"" _string "\"" {% function(d) {return d[1]; } %} +escapedString + -> "`" _escapedString "`" {% function(d) {return d[1]; } %} + +_string + -> null {% function() {return ""; } %} + | _string _stringchar {% ([lhs, rhs]) => lhs + rhs %} +_stringchar + -> [^\\"] {% id %} + | "\\" [^] {% ([lhs, rhs]) => lhs + rhs %} + +_escapedString + -> null {% function() {return ""; } %} + | _escapedString _escapedStringChar {% ([lhs, rhs]) => lhs + rhs %} + +_escapedStringChar + -> [^] {% id %} + | "\\" [^] {% ([lhs, rhs]) => lhs + rhs %} + + +# Simple + +cssLabel -> + selector (label ",":? __:?):* {% function(d) { return [d[0], d[1]] } %} + +symbol + -> directive {% id %} + | selector {% id %} + | separator {% id %} + +selector + -> "=" {% () => '' %} +directive + -> "@" {% () => '' %} +separator + -> ":" {% () => '' %} + +# Formatting +pushScope + -> separator nl indent {% () => null %} + +popScope + -> comment:? dedent {% () => null %} + +indent + -> %indent {% () => null %} +dedent + -> %dedent {% () => null %} + +endGroup + -> "," | endLine {% () => null %} + +endLine + -> comment:? _ nl {% () => null %} + +commentLine + -> comment nl {% () => null %} +comment + -> "#" _escapedString {% () => null %} + +eof -> %eof + +nl + -> %nl {% () => null %} +_ + -> null | _ %space {% () => null %} +__ + -> %space | __ %space {% ([ws]) => ws %} + + @{% +// Errors +function missingComma(){ + throw new Error("missing comma"); +} + +function expectedScopeOperator(){ + throw new Error("nested scope without scope operator"); +} + +function missingRhs(){ + throw new Error("rhs of pair assignment missing"); +} + +function unknownOrEmpty(){ + throw new Error("unknown or empty"); +} + +// Value Reducers + +function addPairToMap([key, value], map){ + if (map.get(key)){ + throw new Error(`duplicate key ${key}`); + } + map.set(key, value); +} + +function join(list, rhs){ + if (!list) return rhs; + if (typeof list == 'string'){ + return list + rhs; + } + return list + rhs; +} + +function reduceN(...list){ + if (list.length == 1){ + return list[0]; + } + let memo; + for (const item of list){ + memo = join(memo, item); + } + return memo; +} -// implementation +function reduce(list){ + return reduceN(...list); +} + +function map2Object(map){ + const object = {}; + for (const pair of map){ + const [key] = pair; + object[key] = map.get(key); + } + return object; +} +%} + +@{% +// Lexer function* indented(lexer, source, info) { let iter = peekable(lexer.reset(source, info)) @@ -15,17 +257,19 @@ function* indented(lexer, source, info) { const newIndent = iter.nextIndent() if (newIndent == null) break // eof else if (newIndent === indent) { - yield {type: 'nl'} + yield {...tok, type: 'nl'} } else if (newIndent > indent) { stack.push(indent) indent = newIndent - yield {type: 'indent', value: indent} + yield {...tok, type: 'nl'} + yield {...tok, type: 'indent', value: indent} } else { while (newIndent < indent) { indent = stack.pop() - yield {type: 'dedent', value: indent} + yield {...tok, type: 'nl'} + yield {...tok, type: 'dedent', value: indent} } if (newIndent !== indent) { throw new Error('inconsistent indentation') @@ -39,7 +283,9 @@ function* indented(lexer, source, info) { // dedent remaining blocks at eof for (let i = stack.length; i--;) { - yield {type: 'dedent'} + indent = stack.pop(); + yield {type: 'nl'} + yield {value: indent, type: 'dedent', text: '@dedent'} } yield Object.assign({ @@ -112,12 +358,13 @@ StreamLexer.prototype.getTokenTypes = function(source) { } return arr.map(t => { switch (t.type){ + case "nl": return "\n"; + case "space": return " "; case "char": return t.value; + case "indent": return "->"; case "dedent": return "<-"; - case "nl": return "\n"; case "eof": return "eof"; - case "space": return " "; } }) } @@ -126,7 +373,7 @@ StreamLexer.prototype.reset = function(source, info) { console.log('types', this.getTokenTypes(source)) this.generator = indented(this.lexer, source, info); this.initialized = true; -} +} StreamLexer.prototype.formatError = function(token) { return this.lexer.formatError(token); @@ -139,6 +386,8 @@ StreamLexer.prototype.has = function(name) { return this.lexer.has(name); } +let mapId = 0; + const rules = { nl: {match: /[\n\r]+/, lineBreaks: true }, space: /[ ]+/, @@ -147,258 +396,4 @@ const rules = { const lexer = new StreamLexer(); -%} - - -@lexer lexer - -root -> scope {% function(d) { - return d[0]; -} %} - -scope -> - map {% id %} - -map -> (mapEntry):+ {% function(d) { - let map = new Map(); - const entries = d[0]; - for (const mapEntry of entries) { - const [key, valuePair] = mapEntry[0]; - const value = valuePair ? valuePair : null; - if(key) { - if (map.get(key)){ - throw new Error(`duplicate key ${key}`); - } - map.set(key, value) - } - } - return map; -} %} - - -mapEntry - -> dataType endStatement {% function(d) { - return d[0]; - } %} - | dataType mapClass {% function(d) { - const [key, mapClass] = d; - console.log('pair', key[0], mapClass); - const pair = [key[0], mapClass]; - return pair; - } %} - - -pushScope -> separator %indent:? -popScope -> endStatement dedent:? %eof:? -endStatement -> ("," _) | __ | endLine -endLine -> comment:? _ (nl | %eof) - -dataType -> label | number - -mapClass -> - pushScope __ mapEntry popScope {% function(d) { return d[2]; } %} - #| separator mapDescriptor {% function(d) { return d[1]; } %} - -mapDescriptor -> - pushScope scope popScope {% function(d) { return d[1]; } %} - | __ directive "text" _ %indent nestedMultilineString {% function(d) { return d[5]; } %} - -nestedMultilineString -> multilineString %dedent {% function(d) { return d[1]; } %} -nestedValue -> - nonStringLike - | stringLike {% id %} - | jsonRoot - | jsonArray - -jsonRoot -> - jsObject - | "{" _ jsonPair (_ jsonPair):+ _ "}" {% missingComma %} - -jsObject -> "{" ((nl | indent | null) jsonPair ",":?):* dedent:? "}" {% function map(d) { - let output = {}; - for (let i in d[1]) { - const pair = d[1][i]; - const key = pair[1][0]; - const value = pair[1][1]; - if(key) { - if (output[key]){ - throw new Error(`duplicate key ${key}`); - } - output[key] = value[0]; - } - } - return output; -} %} - -jsonPair -> _ dqString _ separator _ dqString _ {% function(d) { return [d[1], d[5]]; } %} - -jsonArray -> "[" _ "]" {% function(d) { return []; } %} - | "[" _ jsonRoot (_ "," _ jsonRoot):* _ "]" {% extractArray %} - -# MultiLine String - -multilineString -> (lineBreak _ multilineEntry):* %dedent {% function(d) { - const indent = d[0][0][0][0]; - - const lines = d[2].map(segment => { - const relativeIndent = segment[0] - indent; - let base = ''; - if (relativeIndent > 0){ - for (let i = 0; i < relativeIndent; i++){ - base = base + ' '; - } - } - return base + segment[1]; - }).join('\\n'); - return lines; -} %} - -multilineEntry - -> stringLike _ comment:? {% function(d) { return d[0]; } %} - | stringLike __ (stringLike | nonStringLike | __):+ comment:? - {% function(d) { - const head = d[0][0]; - const tail = reduce(d[2]); - const stringLine = reduceN(head, d[1], tail); - console.log({stringLine}); - return stringLine; - } %} - -stringLike - -> label {% id %} - | symbol {% id %} - | escapedString {% id %} - -nonStringLike -> - number {% id %} - | "true" {% function(d) { return true; } %} - | "false" {% function(d) { return false; } %} - | "null" {% function(d) { return null; } %} - -lineBreak -> nl | indent | dedent -indent -> %indent {% function(d) { return d[0].value } %} -dedent -> %dedent {% function(d) { return d[0].value } %} -nl -> %nl {% function(d) { return null} %} - -# Numbers - -number -> _number {% function(d) {return parseFloat(d[0])} %} - -_posint -> - [0-9] {% id %} - | _posint [0-9] {% function(d) {return d[0] + d[1]} %} - -_int -> - "-" _posint {% function(d) {return d[0] + d[1]; }%} - | _posint {% id %} - -_float -> - _int {% id %} - | _int "." _posint {% function(d) {return d[0] + d[1] + d[2]; }%} - -_number -> - _float {% id %} - | _float "e" _int {% function(d){return d[0] + d[1] + d[2]; } %} - - -#Strings - -dqString -> "\"" _string "\"" {% function(d) {return d[1]; } %} -escapedString -> "`" _string "`" {% function(d) {return d[1]; } %} - -_string -> - null {% function() {return ""; } %} - | _string _stringchar {% function(d) {return d[0] + d[1];} %} - -_stringchar -> - [^\\"] {% id %} - | "\\" [^] {% function(d) {return JSON.parse("\"" + d[0] + d[1] + "\""); } %} - -label -> _label {% function(d) {return d[0]} %} - -_label -> - [a-zA-Z<$] _labelChar:* {% function(d) { - const label = d[0] + (d[1] ? d[1].join('') : '') - return label - } %} - -_labelChar -> - [a-zA-Z0-9<>$] {% function(d) { return d[0] } %} - -# Simple - -cssLabel -> selector (label ",":? __:?):* {% function(d) { return [d[0], d[1]] } %} - -symbol -> - directive - | selector - | separator {% function(d) { } %} - -selector -> "=" {% function(d) { } %} -directive -> "@" {% function(d) { } %} -separator -> ":" {% function(d) { } %} - -# Whitespace -_ -> null | _ %space {% function() {} %} -__ -> %space | __ %space {% function() { return d[0]} %} - -comment -> "#" _ [\.*] {% function(d) { return null; } %} - -@{% -// errors - -function missingComma(){ - throw new Error("missing comma"); -} - -function addPairToMap(pair, map){ - const [key, value] = pair; - if(key) { - if (map[key]){ - throw new Error(`duplicate key ${key}`); - } - map.set(key, value); - } -} - -function join(list, rhs){ - if (!list) return rhs; - if (typeof list == 'string'){ - return list + rhs; - } - return list + rhs; -} - -function reduceN(...list){ - if (list.length == 1){ - return list[0]; - } - let memo; - for (const item of list){ - memo = join(memo, item); - } - return memo; -} - -function reduce(list){ - return reduceN(...list); -} - -function extractObject(d) { - const map = new Map(); - addPairToMap(d[2], output); - for (let i in d[3]) { - addPairToMap(d[3][i][3], output); - } - return map; -} - -function extractArray(d) { - let array = [d[2]]; - for (let i in d[3]) { - array.push(d[3][i][3]); - } - return array; -} - %} \ No newline at end of file From 4f4bbda767d73cb09d898d8b5df58391629e4c41 Mon Sep 17 00:00:00 2001 From: Leif Shackelford Date: Fri, 15 Mar 2019 04:05:01 -0700 Subject: [PATCH 04/12] moss grammar --- src/parsers/moss.ne | 186 +++++++++++++++++++++++++++----------------- 1 file changed, 116 insertions(+), 70 deletions(-) diff --git a/src/parsers/moss.ne b/src/parsers/moss.ne index c28daf4..a31e6d9 100644 --- a/src/parsers/moss.ne +++ b/src/parsers/moss.ne @@ -1,17 +1,17 @@ @lexer lexer root - -> scope eof + -> scope nl:* {% ([scope]) => scope %} scope -> map {% id %} - + map - -> map mapPairConstructor - {% ([map, nextMatch]) => { + -> map _ mapPairConstructor + {% ([map, ws, nextMatch]) => { if (nextMatch) { - console.log('addPairToMap', nextMatch); + //console.log('addPairToMap', nextMatch); addPairToMap(nextMatch, map); } return map; @@ -21,7 +21,7 @@ map const map = new Map(); mapId++; if (initialMatch) { - console.log('newMap', mapId, initialMatch); + //console.log('newMap', mapId, initialMatch); addPairToMap(initialMatch, map); } return map; @@ -29,32 +29,84 @@ map mapPairConstructor - # valid - -> dataValue endGroup - {% ([dv]) => { - console.log('label', dv); - return [dv, true] - }%} + # nested constrained scope + -> label scopeOperator __ ((constraintMap (__ | pushScope)) | pushScope) scope popScope + {% ([key, _1, _2, scopeConstaints, scope]) => { + console.log('scopeConstaints', scopeConstaints[0]) + return [key, scope] + } %} - | dataValue separator __ dataValue endGroup - {% ([key, _1, _2, value]) => { - console.log('pair', [key, value]); - return [key, value] - } - %} - | dataValue pushScope scope popScope + # nested scope + | label scopeOperator pushScope scope popScope {% ([key, _1, scope]) => { - return [key, scope] + return [key, scope] } %} + + # map pair, optionally constrained + | label scopeOperator __ (constraintMap __):? label endGroup + {% ([key, _1, _2, constraintMap, value]) => { + //console.log('pair', [key, value]); + return [key, value] + } + %} + + # default simple value + | label endGroup + {% ([dv]) => { + //console.log('label', dv); + return [dv, true] + }%} | commentLine {% () => null %} - + # error cases - | dataValue nl indent scope popScope + | label pushScope scope popScope {% expectedScopeOperator %} -inlineContextDescription -> _ {% () => null %} + +constraintMap + -> constraintMap __ constraint + {% ([map, ws, nextMatch]) => { + if (nextMatch) { + addPairToMap(nextMatch, map); + } + return map; + } %} + | constraint + {% ([initialMatch]) => { + const map = new Map(); + mapId++; + if (initialMatch) { + //console.log('add prop', initialMatch) + addPairToMap(initialMatch, map); + } + return map; + } %} + +constraint + -> "@" label "[" ((scope "]") | (inlineScope "]")) + {% ([_0, property, _2, scopeSelector]) => { + return [property, scopeSelector[0][0][2]] + }%} + | "@" label + {% ([_0, property]) => [property, true] %} + +inlineScope + -> nl indent scope dedent + +list + -> list "," _ label + {% ([list, _1, _2, item]) => { + if (item) { + list.push(item); + } + return list; + } %} + | label + {% ([value]) => { + return [value]; + } %} # MultiLine String @@ -78,10 +130,12 @@ multilineString # HighLevel -dataValue -> - label {% id %} +label -> + word {% id %} + | escapedString {% id %} + | dqString {% id %} | number {% id %} - + # Numbers number @@ -100,22 +154,23 @@ _int | _posint {% id %} _posint - -> [0-9] {% id %} - | _posint [0-9] {% ([lhs, rhs]) => lhs + rhs %} + -> %digitChar {% id %} + | _posint %digitChar {% ([lhs, rhs]) => lhs + rhs %} #Strings - -label - -> label [\w] {% ([l, r]) => { return l + r.value; }%} - | [a-zA-Z$_] {% ([char]) => char.value %} +word + -> word (%wordChar | %digitChar) + {% ([l, r]) => { return l + r; }%} + | %wordChar {% ([char]) => char.value %} + dqString -> "\"" _string "\"" {% function(d) {return d[1]; } %} escapedString -> "`" _escapedString "`" {% function(d) {return d[1]; } %} -_string +_string -> null {% function() {return ""; } %} | _string _stringchar {% ([lhs, rhs]) => lhs + rhs %} _stringchar @@ -124,44 +179,39 @@ _stringchar _escapedString -> null {% function() {return ""; } %} - | _escapedString _escapedStringChar {% ([lhs, rhs]) => lhs + rhs %} - -_escapedStringChar - -> [^] {% id %} - | "\\" [^] {% ([lhs, rhs]) => lhs + rhs %} - + | _escapedString (%wordChar | %digitChar | %anyChar | %space) {% ([lhs, rhs]) => lhs + rhs %} # Simple -cssLabel -> - selector (label ",":? __:?):* {% function(d) { return [d[0], d[1]] } %} +cssword -> + selector (word ",":? __:?):* {% function(d) { return [d[0], d[1]] } %} symbol -> directive {% id %} | selector {% id %} - | separator {% id %} + | scopeOperator {% id %} selector - -> "=" {% () => '' %} + -> "=" {% () => '=' %} directive - -> "@" {% () => '' %} -separator - -> ":" {% () => '' %} + -> "@" {% () => '@' %} +scopeOperator + -> ":" {% () => ':' %} # Formatting pushScope - -> separator nl indent {% () => null %} - + -> nl indent {% () => null %} + popScope -> comment:? dedent {% () => null %} - + indent -> %indent {% () => null %} dedent -> %dedent {% () => null %} endGroup - -> "," | endLine {% () => null %} + -> (",") | endLine {% () => null %} endLine -> comment:? _ nl {% () => null %} @@ -263,18 +313,20 @@ function* indented(lexer, source, info) { stack.push(indent) indent = newIndent yield {...tok, type: 'nl'} - yield {...tok, type: 'indent', value: indent} + yield {...tok, type: 'indent', value: 'indent', indent: indent} - } else { + } else if (newIndent < indent){ while (newIndent < indent) { indent = stack.pop() yield {...tok, type: 'nl'} - yield {...tok, type: 'dedent', value: indent} + yield {...tok, type: 'dedent', value: null, indent: indent} } if (newIndent !== indent) { throw new Error('inconsistent indentation') } - } + } else { + yield {...tok, type: 'nl'} + } indent = newIndent } else { yield tok @@ -284,20 +336,12 @@ function* indented(lexer, source, info) { // dedent remaining blocks at eof for (let i = stack.length; i--;) { indent = stack.pop(); - yield {type: 'nl'} - yield {value: indent, type: 'dedent', text: '@dedent'} + yield {type: 'nl', value: '' } + yield {type: 'dedent', value: '' } } - - yield Object.assign({ - type: 'eof' - }, { - toString() { return this.value }, - offset: lexer.index, - size: 0, - lineBreaks: 0, - line: lexer.line, - col: lexer.col, - }) + + yield { type: 'nl', value: '' } + } function peekable(lexer) { @@ -360,11 +404,10 @@ StreamLexer.prototype.getTokenTypes = function(source) { switch (t.type){ case "nl": return "\n"; case "space": return " "; - case "char": return t.value; - case "indent": return "->"; case "dedent": return "<-"; case "eof": return "eof"; + default: return t.text; } }) } @@ -391,7 +434,10 @@ let mapId = 0; const rules = { nl: {match: /[\n\r]+/, lineBreaks: true }, space: /[ ]+/, - char: /./ + operator: /[+\-*?|\/]/, + wordChar: /[a-zA-Z$_]/, + digitChar: /[0-9]/, + anyChar: /./, }; const lexer = new StreamLexer(); From 30473986d078350f7074c8d8bc2caf1196041b75 Mon Sep 17 00:00:00 2001 From: Leif Shackelford Date: Sat, 16 Mar 2019 03:00:00 -0700 Subject: [PATCH 05/12] wip expressions --- src/parsers/moss.ne | 346 ++++++++++++++++++++++---------------------- 1 file changed, 174 insertions(+), 172 deletions(-) diff --git a/src/parsers/moss.ne b/src/parsers/moss.ne index a31e6d9..5316dff 100644 --- a/src/parsers/moss.ne +++ b/src/parsers/moss.ne @@ -1,17 +1,13 @@ @lexer lexer - -root - -> scope nl:* - {% ([scope]) => scope %} -scope - -> map {% id %} - +scope + -> map nl:* {% id %} + map - -> map _ mapPairConstructor - {% ([map, ws, nextMatch]) => { + -> map mapPairConstructor + {% ([_map, nextMatch]) => { + const map = new Map(_map); if (nextMatch) { - //console.log('addPairToMap', nextMatch); addPairToMap(nextMatch, map); } return map; @@ -19,15 +15,12 @@ map | mapPairConstructor {% ([initialMatch]) => { const map = new Map(); - mapId++; if (initialMatch) { - //console.log('newMap', mapId, initialMatch); addPairToMap(initialMatch, map); } return map; } %} - mapPairConstructor # nested constrained scope -> label scopeOperator __ ((constraintMap (__ | pushScope)) | pushScope) scope popScope @@ -38,30 +31,31 @@ mapPairConstructor # nested scope - | label scopeOperator pushScope scope popScope - {% ([key, _1, scope]) => { + | label scopeOperator pushScope scope popScope + {% ([key, _1, _2, scope]) => { return [key, scope] } %} - + # map pair, optionally constrained - | label scopeOperator __ (constraintMap __):? label endGroup + | label scopeOperator __ (constraintMap __):? expression endGroup {% ([key, _1, _2, constraintMap, value]) => { - //console.log('pair', [key, value]); + console.log('pair', [key, value]); return [key, value] } %} # default simple value - | label endGroup - {% ([dv]) => { - //console.log('label', dv); - return [dv, true] + | expression endGroup + {% ([value]) => { + return [value, true] }%} - | commentLine {% () => null %} + | commentLine {% ([comment]) => { + return ['comment', comment] + }%} # error cases - | label pushScope scope popScope + | label pushScope scope {% expectedScopeOperator %} @@ -76,7 +70,6 @@ constraintMap | constraint {% ([initialMatch]) => { const map = new Map(); - mapId++; if (initialMatch) { //console.log('add prop', initialMatch) addPairToMap(initialMatch, map); @@ -85,15 +78,23 @@ constraintMap } %} constraint - -> "@" label "[" ((scope "]") | (inlineScope "]")) + -> "@" label "[" scope "]" + {% ([_0, property, _2, scopeSelector]) => { + return [property, scopeSelector[2]] + }%} + | "@" "[" (scope | inlineScope) "]" + {% ([_0, _1, scopeSelector]) => { + return scopeSelector[0][2] + }%} + | "@[" _ "]" {% ([_0, property, _2, scopeSelector]) => { - return [property, scopeSelector[0][0][2]] + return [property, true] }%} | "@" label {% ([_0, property]) => [property, true] %} inlineScope - -> nl indent scope dedent + -> pushScope scope popScope list -> list "," _ label @@ -130,46 +131,60 @@ multilineString # HighLevel -label -> - word {% id %} - | escapedString {% id %} - | dqString {% id %} - | number {% id %} +# Maths + +expression + -> add {% id %} + +add + -> add _ ("+"|"-") _ multiply {% (d) => {console.log(d); return d.join('')} %} + | multiply {% id %} + +multiply + -> multiply _ ("*"|"/") _ term {% (d) => d.join('') %} + | term {% id %} + +group + -> "(" expression ")" {% (d) => d.join('') %} + +term + -> group {% id %} + | label {% id %} # Numbers number - -> _number {% ([numberString]) => parseFloat(numberString) %} + -> _number {% ([n]) => parseFloat(n) %} _number - -> _float {% id %} - | _float "e" _int {% ([lhs, operator, rhs]) => lhs + operator + rhs %} - -_float - -> _int {% id %} - | _int "." _posint {% ([lhs, operator, rhs]) => lhs + operator + rhs %} + -> _float "e" _int {% ([lhs, operator, rhs]) => lhs + operator + rhs %} + | _float {% id %} + +_float + -> _int "." %number {% ([lhs, operator, rhs]) => lhs + operator + rhs %} + | _int {% id %} _int - -> "-" _posint {% ([lhs, rhs]) => lhs + rhs %} - | _posint {% id %} - -_posint - -> %digitChar {% id %} - | _posint %digitChar {% ([lhs, rhs]) => lhs + rhs %} - + -> "-" %number {% ([lhs, rhs]) => lhs + rhs %} + | %number {% ([n]) => n %} + +# Words +label -> + word {% id %} + | escapedString {% id %} + | dqString {% id %} + | number {% id %} #Strings - -word - -> word (%wordChar | %digitChar) - {% ([l, r]) => { return l + r; }%} - | %wordChar {% ([char]) => char.value %} + +word -> %word {% ([n]) => n.value || "" %} dqString -> "\"" _string "\"" {% function(d) {return d[1]; } %} + escapedString -> "`" _escapedString "`" {% function(d) {return d[1]; } %} - + _string -> null {% function() {return ""; } %} | _string _stringchar {% ([lhs, rhs]) => lhs + rhs %} @@ -178,32 +193,20 @@ _stringchar | "\\" [^] {% ([lhs, rhs]) => lhs + rhs %} _escapedString - -> null {% function() {return ""; } %} - | _escapedString (%wordChar | %digitChar | %anyChar | %space) {% ([lhs, rhs]) => lhs + rhs %} - -# Simple - -cssword -> - selector (word ",":? __:?):* {% function(d) { return [d[0], d[1]] } %} - -symbol - -> directive {% id %} - | selector {% id %} - | scopeOperator {% id %} + -> _escapedString (%word | %number | %space) {% ([lhs, rhs]) => lhs + rhs[0] %} -selector - -> "=" {% () => '=' %} directive -> "@" {% () => '@' %} + scopeOperator -> ":" {% () => ':' %} # Formatting pushScope - -> nl indent {% () => null %} + -> nl indent _ {% () => null %} popScope - -> comment:? dedent {% () => null %} + -> dedent {% () => null %} indent -> %indent {% () => null %} @@ -211,86 +214,27 @@ dedent -> %dedent {% () => null %} endGroup - -> (",") | endLine {% () => null %} + -> _ (("," _) | endLine) {% () => null %} endLine - -> comment:? _ nl {% () => null %} + -> comment:? nl {% () => null %} commentLine - -> comment nl {% () => null %} -comment - -> "#" _escapedString {% () => null %} - -eof -> %eof - -nl - -> %nl {% () => null %} -_ - -> null | _ %space {% () => null %} -__ - -> %space | __ %space {% ([ws]) => ws %} - + -> comment nl {% ([comment]) => (comment) %} -@{% -// Errors - -function missingComma(){ - throw new Error("missing comma"); -} - -function expectedScopeOperator(){ - throw new Error("nested scope without scope operator"); -} - -function missingRhs(){ - throw new Error("rhs of pair assignment missing"); -} - -function unknownOrEmpty(){ - throw new Error("unknown or empty"); -} - -// Value Reducers - -function addPairToMap([key, value], map){ - if (map.get(key)){ - throw new Error(`duplicate key ${key}`); - } - map.set(key, value); -} - -function join(list, rhs){ - if (!list) return rhs; - if (typeof list == 'string'){ - return list + rhs; - } - return list + rhs; -} +comment + -> "#" _escapedString {% ([_, comment]) => (comment) %} -function reduceN(...list){ - if (list.length == 1){ - return list[0]; - } - let memo; - for (const item of list){ - memo = join(memo, item); - } - return memo; -} +nl -> %nl {% id %} +_ + -> _ space {% ([e]) => { + return e ? e + ' ': ''; + } %} + | null -function reduce(list){ - return reduceN(...list); -} +__ -> space {% id %} -function map2Object(map){ - const object = {}; - for (const pair of map){ - const [key] = pair; - object[key] = map.get(key); - } - return object; -} -%} +space -> %space {% ([d]) => d.value %} @{% // Lexer @@ -308,18 +252,17 @@ function* indented(lexer, source, info) { if (newIndent == null) break // eof else if (newIndent === indent) { yield {...tok, type: 'nl'} - } else if (newIndent > indent) { stack.push(indent) indent = newIndent yield {...tok, type: 'nl'} - yield {...tok, type: 'indent', value: 'indent', indent: indent} + yield {...tok, type: 'indent', indent: indent} } else if (newIndent < indent){ while (newIndent < indent) { indent = stack.pop() yield {...tok, type: 'nl'} - yield {...tok, type: 'dedent', value: null, indent: indent} + yield {...tok, type: 'dedent', indent: indent} } if (newIndent !== indent) { throw new Error('inconsistent indentation') @@ -329,7 +272,7 @@ function* indented(lexer, source, info) { } indent = newIndent } else { - yield tok + yield { ...tok, indent: indent} } } @@ -339,9 +282,8 @@ function* indented(lexer, source, info) { yield {type: 'nl', value: '' } yield {type: 'dedent', value: '' } } - - yield { type: 'nl', value: '' } - + + yield {type: 'nl', value: '' } } function peekable(lexer) { @@ -362,15 +304,22 @@ function peekable(lexer) { } else if (tok.type === 'space') { const indent = tok.value.length - this.next() - - const next = this.peek() - if (!next) return - if (next.type === 'nl') { - this.next() - continue - } - return indent + console.log(indent); + const recur = (indent) => { + this.next() + const next = this.peek() + if (!next) return indent + if (next.type === 'nl') { + this.next() + return indent + } else if (next.type === 'space') { + console.log(indent); + + return recur(indent + 1); + } + return indent + } + return recur(1); } return 0 } @@ -385,9 +334,7 @@ function StreamLexer() { StreamLexer.prototype.next = function() { const { value } = this.generator.next(); - if (value){ - return value; - } + return value; } StreamLexer.prototype.save = function() { @@ -395,7 +342,7 @@ StreamLexer.prototype.save = function() { StreamLexer.prototype.getTokenTypes = function(source) { const types = []; - const iter = indented(this.lexer, source); + const iter = indented( moo.compile(rules), source); const arr = []; for (const t of iter){ arr.push(t); @@ -406,16 +353,14 @@ StreamLexer.prototype.getTokenTypes = function(source) { case "space": return " "; case "indent": return "->"; case "dedent": return "<-"; - case "eof": return "eof"; default: return t.text; } }) } StreamLexer.prototype.reset = function(source, info) { - console.log('types', this.getTokenTypes(source)) + //console.log('tokens', this.getTokenTypes(source)) this.generator = indented(this.lexer, source, info); - this.initialized = true; } StreamLexer.prototype.formatError = function(token) { @@ -425,21 +370,78 @@ StreamLexer.prototype.formatError = function(token) { StreamLexer.prototype.has = function(name) { if (name == 'indent') return true; if (name == 'dedent') return true; - if (name == 'eof') return true; return this.lexer.has(name); } -let mapId = 0; - const rules = { + space: /[ ]/, nl: {match: /[\n\r]+/, lineBreaks: true }, - space: /[ ]+/, - operator: /[+\-*?|\/]/, - wordChar: /[a-zA-Z$_]/, - digitChar: /[0-9]/, - anyChar: /./, + word: /[a-zA-Z$_][a-zA-Z0-9$_]*/, + number: /[0-9]+/, + operator: /[\+]/, + anyChar: /./ + //anyChar: /[a-zA-Z0-9\.\+\-*\?\|\/ \():]/, }; const lexer = new StreamLexer(); +// Errors + +function missingComma(){ + throw new Error("missing comma"); +} + +function expectedScopeOperator(){ + throw new Error("nested scope without scope operator"); +} + +function missingRhs(){ + throw new Error("rhs of pair assignment missing"); +} + +function unknownOrEmpty(){ + throw new Error("unknown or empty"); +} + +// Value Reducers + +function addPairToMap([key, value], map){ + if (map.get(key)){ + throw new Error(`duplicate key ${key}`); + } + map.set(key, value); +} + +function join(list, rhs){ + if (!list) return rhs; + if (typeof list == 'string'){ + return list + rhs; + } + return list + rhs; +} + +function reduceN(...list){ + if (list.length == 1){ + return list[0]; + } + let memo; + for (const item of list){ + memo = join(memo, item); + } + return memo; +} + +function reduce(list){ + return reduceN(...list); +} + +function map2Object(map){ + const object = {}; + for (const pair of map){ + const [key] = pair; + object[key] = map.get(key); + } + return object; +} + %} \ No newline at end of file From 730a1a76360e11b54891c9e2f7c8be26647e541a Mon Sep 17 00:00:00 2001 From: Leif Shackelford Date: Sun, 17 Mar 2019 22:18:11 -0700 Subject: [PATCH 06/12] url parser --- src/parsers/moss.ne | 404 +++++++++++++++++++++++++++++++------------- 1 file changed, 290 insertions(+), 114 deletions(-) diff --git a/src/parsers/moss.ne b/src/parsers/moss.ne index 5316dff..f084cdd 100644 --- a/src/parsers/moss.ne +++ b/src/parsers/moss.ne @@ -1,11 +1,15 @@ @lexer lexer + +root + -> scope %eof {% id %} scope - -> map nl:* {% id %} - + -> map {% id %} + map -> map mapPairConstructor {% ([_map, nextMatch]) => { + //console.log({nextMatch}); const map = new Map(_map); if (nextMatch) { addPairToMap(nextMatch, map); @@ -15,6 +19,7 @@ map | mapPairConstructor {% ([initialMatch]) => { const map = new Map(); + //console.log({initialMatch}); if (initialMatch) { addPairToMap(initialMatch, map); } @@ -23,44 +28,41 @@ map mapPairConstructor # nested constrained scope - -> label scopeOperator __ ((constraintMap (__ | pushScope)) | pushScope) scope popScope - {% ([key, _1, _2, scopeConstaints, scope]) => { - console.log('scopeConstaints', scopeConstaints[0]) + -> key space ((constraintMap (space | pushScope)) | pushScope) scope popScope + {% ([key, space, scopeConstaints, scope]) => { return [key, scope] } %} - # nested scope - | label scopeOperator pushScope scope popScope - {% ([key, _1, _2, scope]) => { + | key pushScope scope popScope + {% ([key, push, scope]) => { return [key, scope] } %} # map pair, optionally constrained - | label scopeOperator __ (constraintMap __):? expression endGroup - {% ([key, _1, _2, constraintMap, value]) => { - console.log('pair', [key, value]); - return [key, value] + | key space (constraintMap space):? expression expressionTerminator + {% ([key, space, constraintMap, expression]) => { + return [key, expression] } %} # default simple value - | expression endGroup - {% ([value]) => { - return [value, true] + | expression expressionTerminator + {% ([expression]) => { + return [expression, true] }%} - - | commentLine {% ([comment]) => { - return ['comment', comment] - }%} - + + | expressionTerminator {% id %} + | commentedLine {% id %} # error cases + | key space ((constraintMap (space | pushScope)) | pushScope) expressionTerminator + {% expectedRhs %} | label pushScope scope {% expectedScopeOperator %} constraintMap - -> constraintMap __ constraint + -> constraintMap space constraint {% ([map, ws, nextMatch]) => { if (nextMatch) { addPairToMap(nextMatch, map); @@ -78,26 +80,24 @@ constraintMap } %} constraint - -> "@" label "[" scope "]" - {% ([_0, property, _2, scopeSelector]) => { - return [property, scopeSelector[2]] - }%} - | "@" "[" (scope | inlineScope) "]" + -> "@" "[" (scope | (nestedScope "]")) {% ([_0, _1, scopeSelector]) => { return scopeSelector[0][2] }%} - | "@[" _ "]" + | "@" label "[" list "]" {% ([_0, property, _2, scopeSelector]) => { - return [property, true] + return [property, scopeSelector[2]] }%} | "@" label {% ([_0, property]) => [property, true] %} - -inlineScope - -> pushScope scope popScope + + # error cases + | "@" "[" space:+ nl {% extraSpace %} + | "@" "[" (_ | nl) "]" {% emptyScope %} + | "@" label "[" _ "]" {% emptyScope %} list - -> list "," _ label + -> list listSeparator label {% ([list, _1, _2, item]) => { if (item) { list.push(item); @@ -128,58 +128,181 @@ multilineString }).join('\\n'); return lines; } %} - -# HighLevel - + # Maths expression -> add {% id %} add - -> add _ ("+"|"-") _ multiply {% (d) => {console.log(d); return d.join('')} %} + -> add space ("+"|"-") space multiply {% reduce %} | multiply {% id %} multiply - -> multiply _ ("*"|"/") _ term {% (d) => d.join('') %} + -> multiply space ("*"|"/") space term {% reduce %} | term {% id %} group -> "(" expression ")" {% (d) => d.join('') %} - + +key + -> label ":" {% lhs %} + term - -> group {% id %} - | label {% id %} + -> label {% id %} + | group {% id %} + | uri {% id %} +# | chunk {% id %} + +# Operators +directive + -> "@" {% () => '@' %} + +# Formatting +nestedScope + -> pushScope scope popScope + +pushScope + -> (inlineComment | nl) indent {% () => null %} +expressionTerminator + -> listSeparator {% id %} + | endLine {% id %} + | "]" {% id %} + +listSeparator + -> "," space {% reduce %} + +endLine + -> inlineComment {% id %} + | nl {% id %} + +commentedLine + -> space:* comment {% id %} + +inlineComment + -> space comment {% id %} + +comment + -> "#" _escapedString:? nl {% ([operator, comment]) => (comment) %} + +popScope + -> dedent {% () => null %} + # Numbers number -> _number {% ([n]) => parseFloat(n) %} _number - -> _float "e" _int {% ([lhs, operator, rhs]) => lhs + operator + rhs %} + -> _float "e" _int {% reduce %} | _float {% id %} _float - -> _int "." %number {% ([lhs, operator, rhs]) => lhs + operator + rhs %} + -> _int "." digit {% reduce %} | _int {% id %} _int - -> "-" %number {% ([lhs, rhs]) => lhs + rhs %} - | %number {% ([n]) => n %} - + -> "-" digit {% concat %} + | digit {% ([n]) => n %} + +digit -> [0-9] {% ([tok]) => tok.value %} + # Words -label -> - word {% id %} - | escapedString {% id %} + +label + -> escapedString {% id %} | dqString {% id %} + | word {% id %} | number {% id %} -#Strings +# URL = scheme:[//authority]path[?query][#fragment] +uri + -> url {% id %} + | urx {% id %} + +url + -> urlScheme urx {% pickBest %} + +urlScheme + -> urlSafe ":" "/" "/" {% reduce %} + +urx + -> urlCredentials "@" urd {% reduce %} + | urd {% reduce %} + +urd + -> tld urlPath uriQuery {% reduce %} + | tld urlPath {% reduce %} + | tld {% reduce %} + +urlCredentials + -> emailCredentials {% id %} + | userCredentials {% id %} + +urlPath + -> "/" relativePath {% concat %} + +relativePath -> + relativePath "/" fileName {% reduce %} + | fileName {% id %} + +fileName -> + fileName "." word {% reduce %} + | word {% id %} + +pathElement + -> pathElement "/" urlSafe {% reduce %} + | urlSafe {% id %} + +emailCredentials + -> emailCredentials ":" password {% reduce %} + | email {% reduce %} + +userCredentials + -> userCredentials ":" password {% reduce %} + | urlSafe {% reduce %} + +password + -> urlSafePlusEncoded {% reduce %} + +email + -> emailLhs "@" tld {% reduce %} + +emailLhs + -> urlSafe "." emailLhs {% id %} + | urlSafe {% id %} + +tld + -> domain "." urlSafe {% reduce %} -word -> %word {% ([n]) => n.value || "" %} +domain -> + urlSafe "." domain {% reduce %} + | urlSafe {% id %} -dqString +uriQuery + -> "?" queryList {% id %} + +queryList + -> queryList "&" queryFragment {% reduce %} + | queryFragment {% id %} + +queryFragment + -> queryFragment "=" urlSafe {% reduce %} + | urlSafe {% id %} + +word + -> word wordSafeChar {% concat %} + | wordStartChar {% id %} + +wordSafeChar + -> wordStartChar {% id %} + | [0-9] {% ([tok]) => tok.value %} + +wordStartChar + -> [a-zA-Z$_] {% ([tok]) => tok.value %} + +dqString -> "\"" _string "\"" {% function(d) {return d[1]; } %} escapedString @@ -188,57 +311,64 @@ escapedString _string -> null {% function() {return ""; } %} | _string _stringchar {% ([lhs, rhs]) => lhs + rhs %} + _stringchar -> [^\\"] {% id %} - | "\\" [^] {% ([lhs, rhs]) => lhs + rhs %} + | "\\" [^] {% concat %} -_escapedString - -> _escapedString (%word | %number | %space) {% ([lhs, rhs]) => lhs + rhs[0] %} +urlSafePlusEncoded + -> urlSafePlusEncoded urlSafePlusEncodedChars {% reduce %} + | urlSafePlusEncodedChars {% id %} -directive - -> "@" {% () => '@' %} +urlSafePlusEncodedChars + -> "%" hexDigit hexDigit {% reduce %} + | "&" "a" "m" "p" ";" {% reduce %} + | urlSafeChar {% id %} + +hexDigit -> [0-9a-fA-F] {% id %} -scopeOperator - -> ":" {% () => ':' %} +urlSafe + -> urlSafe urlSafeChar {% concat %} + | urlSafeChar {% id %} -# Formatting -pushScope - -> nl indent _ {% () => null %} +# [0-9a-zA-Z$\-_.+!*'()] but we skip the dot as it is meaningfully parsed in rules +urlSafeChar -> [0-9a-zA-Z$\-_+!*'()] {% ([tok]) => tok.value %} -popScope - -> dedent {% () => null %} - +chunk + -> chunk chunkChar {% concat %} + | chunkChar {% id %} + +chunkChar + -> [a-zA-Z0-9\+\-*\?\|\/\()\\:] {% ([tok]) => tok.value %} + +_escapedString + -> _escapedString escapedChar {% concat %} + | escapedChar {% id %} +escapedChar + -> %space {% ([tok]) => tok.value %} + | %any {% ([tok]) => tok.value %} + +# syntactic whitespace +nl -> %nl {% ([tok]) => null %} indent -> %indent {% () => null %} dedent -> %dedent {% () => null %} - -endGroup - -> _ (("," _) | endLine) {% () => null %} - -endLine - -> comment:? nl {% () => null %} - -commentLine - -> comment nl {% ([comment]) => (comment) %} - -comment - -> "#" _escapedString {% ([_, comment]) => (comment) %} +space -> %space {% ([tok]) => tok.value %} -nl -> %nl {% id %} -_ +# ignored whitespace or chars +_ -> _ space {% ([e]) => { return e ? e + ' ': ''; } %} | null -__ -> space {% id %} - -space -> %space {% ([d]) => d.value %} - @{% // Lexer +const nl = () => ({type: 'nl', offset: 2, toString: () => ''}); +const eof = () => ({type: 'eof'}); + function* indented(lexer, source, info) { let iter = peekable(lexer.reset(source, info)) let stack = [] @@ -246,29 +376,32 @@ function* indented(lexer, source, info) { // absorb initial blank lines and indentation let indent = iter.nextIndent() + for (let tok; tok = iter.next(); ) { if (tok.type === 'nl') { const newIndent = iter.nextIndent() - if (newIndent == null) break // eof + if (newIndent == null) { + break; + }// eof else if (newIndent === indent) { - yield {...tok, type: 'nl'} + yield nl(); } else if (newIndent > indent) { stack.push(indent) indent = newIndent - yield {...tok, type: 'nl'} + yield nl(); yield {...tok, type: 'indent', indent: indent} } else if (newIndent < indent){ while (newIndent < indent) { indent = stack.pop() - yield {...tok, type: 'nl'} + yield nl(); yield {...tok, type: 'dedent', indent: indent} } if (newIndent !== indent) { throw new Error('inconsistent indentation') } } else { - yield {...tok, type: 'nl'} + yield nl(); } indent = newIndent } else { @@ -279,11 +412,12 @@ function* indented(lexer, source, info) { // dedent remaining blocks at eof for (let i = stack.length; i--;) { indent = stack.pop(); - yield {type: 'nl', value: '' } - yield {type: 'dedent', value: '' } + yield nl(); + yield {type: 'dedent', indent: indent } } - yield {type: 'nl', value: '' } + yield nl(); + yield eof(); } function peekable(lexer) { @@ -304,7 +438,6 @@ function peekable(lexer) { } else if (tok.type === 'space') { const indent = tok.value.length - console.log(indent); const recur = (indent) => { this.next() const next = this.peek() @@ -313,8 +446,6 @@ function peekable(lexer) { this.next() return indent } else if (next.type === 'space') { - console.log(indent); - return recur(indent + 1); } return indent @@ -327,14 +458,27 @@ function peekable(lexer) { } } +const printToken = (t) => { + switch (t.type){ + case "nl": return "\n"; + case "space": return " "; + case "indent": return "->"; + case "dedent": return "<-"; + case "eof": return ""; + default: return t.text; + } +} function StreamLexer() { this.lexer = moo.compile(rules); } StreamLexer.prototype.next = function() { - const { value } = this.generator.next(); - return value; + const tok = this.generator.next().value; + if (tok){ + //console.log(printToken(tok), tok); + return tok; + } } StreamLexer.prototype.save = function() { @@ -345,21 +489,23 @@ StreamLexer.prototype.getTokenTypes = function(source) { const iter = indented( moo.compile(rules), source); const arr = []; for (const t of iter){ - arr.push(t); - } - return arr.map(t => { - switch (t.type){ - case "nl": return "\n"; - case "space": return " "; - case "indent": return "->"; - case "dedent": return "<-"; - default: return t.text; + if (t.type == 'any'){ + const back = arr[arr.length - 1]; + if (back && back.type == 'any'){ + back.value += t.value; + back.text += t.text; + } else { + arr.push(t); + } + } else { + arr.push(t); } - }) + } + return arr.map(t => printToken(t)) } StreamLexer.prototype.reset = function(source, info) { - //console.log('tokens', this.getTokenTypes(source)) + console.log('tokens', this.getTokenTypes(source)) this.generator = indented(this.lexer, source, info); } @@ -376,16 +522,37 @@ StreamLexer.prototype.has = function(name) { const rules = { space: /[ ]/, nl: {match: /[\n\r]+/, lineBreaks: true }, - word: /[a-zA-Z$_][a-zA-Z0-9$_]*/, - number: /[0-9]+/, - operator: /[\+]/, - anyChar: /./ - //anyChar: /[a-zA-Z0-9\.\+\-*\?\|\/ \():]/, + //word: /[a-zA-Z$_][a-zA-Z0-9$_]*/, + //number: /[0-9]/, + //urlUnsafe: /["<>#%{}|\\^~[]`]/, + //urlReserved: /[;/?:@=&]/, + //urlSafe: /[0-9a-zA-Z$\-_.+!*'()]/, + // chunk: /[a-zA-Z0-9\+\-*\?\|\/\()\\:]/, + any: /[^\s]/ }; const lexer = new StreamLexer(); // Errors +function emptyScope(){ + throw new Error("empty scope"); +} + +function expectedRhs(){ + throw new Error("no value for rhs"); +} + +function expectedTerminator(){ + throw new Error("missing map pair terminator"); +} + +function extraSpace(){ + throw new Error("unused space at end of line"); +} + +function genericContextError(){ + throw new Error("@context error"); +} function missingComma(){ throw new Error("missing comma"); @@ -405,6 +572,13 @@ function unknownOrEmpty(){ // Value Reducers +const joinExpressionOperator = ([lhs, s1, op, s2, rhs]) => lhs + s1 + op + s2 + rhs +const joinSeparatedChunks = ([lhs, op, rhs]) => lhs + op + rhs +const concat = ([lhs, rhs]) => lhs + rhs +const lhs = ([lhs, rhs]) => lhs +const rhs = ([lhs, rhs]) => rhs +const back = (d) => d[d.length - 1] + function addPairToMap([key, value], map){ if (map.get(key)){ throw new Error(`duplicate key ${key}`); @@ -414,13 +588,14 @@ function addPairToMap([key, value], map){ function join(list, rhs){ if (!list) return rhs; + if (!rhs) return list; if (typeof list == 'string'){ return list + rhs; } return list + rhs; } -function reduceN(...list){ +function reduce(list){ if (list.length == 1){ return list[0]; } @@ -428,11 +603,12 @@ function reduceN(...list){ for (const item of list){ memo = join(memo, item); } + console.log(memo); return memo; } -function reduce(list){ - return reduceN(...list); +function pickBest(list){ + return reduce(list); } function map2Object(map){ From 692e1b7b2f0093bf75eb4cb6e5dd3c108b87d487 Mon Sep 17 00:00:00 2001 From: Leif Shackelford Date: Mon, 18 Mar 2019 05:22:39 -0700 Subject: [PATCH 07/12] stable-ish on list / map notation --- src/parsers/moss.ne | 279 +++++++++++++++++++++++--------------------- 1 file changed, 147 insertions(+), 132 deletions(-) diff --git a/src/parsers/moss.ne b/src/parsers/moss.ne index f084cdd..eb7987c 100644 --- a/src/parsers/moss.ne +++ b/src/parsers/moss.ne @@ -1,17 +1,17 @@ @lexer lexer - -root - -> scope %eof {% id %} + +start + -> sof scope eof {% ([sof, scope]) => scope %} scope -> map {% id %} -map +map -> map mapPairConstructor {% ([_map, nextMatch]) => { //console.log({nextMatch}); const map = new Map(_map); - if (nextMatch) { + if (nextMatch && (nextMatch[0] !== undefined)) { addPairToMap(nextMatch, map); } return map; @@ -19,49 +19,41 @@ map | mapPairConstructor {% ([initialMatch]) => { const map = new Map(); - //console.log({initialMatch}); - if (initialMatch) { + console.log({initialMatch}); + if (initialMatch && (initialMatch[0] !== undefined)) { addPairToMap(initialMatch, map); } return map; } %} -mapPairConstructor +mapPairConstructor # nested constrained scope - -> key space ((constraintMap (space | pushScope)) | pushScope) scope popScope - {% ([key, space, scopeConstaints, scope]) => { + -> key ((space constraintMap pushScope) | pushScope) scope popScope + {% ([key, scopeConstaints, scope]) => { return [key, scope] } %} - # nested scope - | key pushScope scope popScope - {% ([key, push, scope]) => { - return [key, scope] - } %} - # map pair, optionally constrained - | key space (constraintMap space):? expression expressionTerminator - {% ([key, space, constraintMap, expression]) => { - return [key, expression] + | key (space constraintMap):? list + {% ([key, constraintMap, list]) => { + return [key, list] } %} # default simple value - | expression expressionTerminator - {% ([expression]) => { - return [expression, true] + | list + {% ([list]) => { + return [list, true] }%} - | expressionTerminator {% id %} - | commentedLine {% id %} + | sol eol {% () => null %} + | sol comment {% () => null %} # error cases - | key space ((constraintMap (space | pushScope)) | pushScope) expressionTerminator - {% expectedRhs %} | label pushScope scope {% expectedScopeOperator %} -constraintMap +constraintMap -> constraintMap space constraint {% ([map, ws, nextMatch]) => { if (nextMatch) { @@ -80,11 +72,9 @@ constraintMap } %} constraint - -> "@" "[" (scope | (nestedScope "]")) - {% ([_0, _1, scopeSelector]) => { - return scopeSelector[0][2] - }%} - | "@" label "[" list "]" + -> "@" bracketedScope + {% ([directive, scope]) => scope %} + | "@" label "[" space listLoop space "]" {% ([_0, property, _2, scopeSelector]) => { return [property, scopeSelector[2]] }%} @@ -92,67 +82,57 @@ constraint {% ([_0, property]) => [property, true] %} # error cases - | "@" "[" space:+ nl {% extraSpace %} - | "@" "[" (_ | nl) "]" {% emptyScope %} + | "@" "[" space:+ eol {% extraSpace %} + | "@" "[" (_ | eol) "]" {% emptyScope %} | "@" label "[" _ "]" {% emptyScope %} -list - -> list listSeparator label - {% ([list, _1, _2, item]) => { - if (item) { - list.push(item); - } - return list; - } %} - | label - {% ([value]) => { - return [value]; - } %} - -# MultiLine String +bracketedScope + -> "[" nestedScope sol "]" {% ([bracket, scope]) => scope %} + | "[" scope {% rhs %} -multilineString - -> ((nl | dedent) _string):* dedent - {% function(d) { - const indent = d[0][0][0][0]; +# Map +key + -> (sol | space) listLoop ":" {% ([pre, label, scopeOperator]) => label %} - const lines = d[2].map(segment => { - const relativeIndent = segment[0] - indent; - let base = ''; - if (relativeIndent > 0){ - for (let i = 0; i < relativeIndent; i++){ - base = base + ' '; - } - } - return base + segment[1]; - }).join('\\n'); - return lines; - } %} +# List +list + -> (sol | space) listLoop ("," | endLine | (" " "]")) + {% ([pre, list]) => list %} -# Maths +listLoop + -> listValue (space listValue):* {% + ([head, tail]) => { + if (tail && tail.length){ + return head + reduce(tail.map(reduce)); + } + return head; + }%} + +listValue + -> label {% id %} + | uri {% id %} -expression +#Math + +expression -> add {% id %} add - -> add space ("+"|"-") space multiply {% reduce %} + -> add ("+"|"-") multiply {% reduce %} | multiply {% id %} multiply - -> multiply space ("*"|"/") space term {% reduce %} + -> multiply ("*"|"/") term {% reduce %} | term {% id %} -group - -> "(" expression ")" {% (d) => d.join('') %} +term + -> group {% id %} + | label {% id %} + +group + -> "(" expression ")" {% reduce %} + | label {% id %} -key - -> label ":" {% lhs %} - -term - -> label {% id %} - | group {% id %} - | uri {% id %} -# | chunk {% id %} # Operators directive @@ -160,34 +140,23 @@ directive # Formatting nestedScope - -> pushScope scope popScope + -> pushScope scope popScope {% ([push, scope]) => scope %} pushScope - -> (inlineComment | nl) indent {% () => null %} - -expressionTerminator - -> listSeparator {% id %} - | endLine {% id %} - | "]" {% id %} - -listSeparator - -> "," space {% reduce %} + -> (inlineComment | eol) indent {% () => null %} +popScope + -> dedent {% () => null %} + endLine -> inlineComment {% id %} - | nl {% id %} + | eol {% id %} -commentedLine - -> space:* comment {% id %} - inlineComment -> space comment {% id %} comment - -> "#" _escapedString:? nl {% ([operator, comment]) => (comment) %} - -popScope - -> dedent {% () => null %} + -> "#" _escapedString:? eol {% ([operator, comment]) => (comment) %} # Numbers @@ -206,14 +175,16 @@ _int -> "-" digit {% concat %} | digit {% ([n]) => n %} -digit -> [0-9] {% ([tok]) => tok.value %} +digit + -> digit [0-9] {% concat %} + | [0-9] {% ([tok]) => tok %} # Words label -> escapedString {% id %} | dqString {% id %} - | word {% id %} + | singleWord {% id %} | number {% id %} # URL = scheme:[//authority]path[?query][#fragment] @@ -222,7 +193,7 @@ uri | urx {% id %} url - -> urlScheme urx {% pickBest %} + -> urlScheme urx {% reduce %} urlScheme -> urlSafe ":" "/" "/" {% reduce %} @@ -232,9 +203,7 @@ urx | urd {% reduce %} urd - -> tld urlPath uriQuery {% reduce %} - | tld urlPath {% reduce %} - | tld {% reduce %} + -> tld urlPath:? uriQuery:? {% reduce %} urlCredentials -> emailCredentials {% id %} @@ -281,15 +250,19 @@ domain -> | urlSafe {% id %} uriQuery - -> "?" queryList {% id %} + -> "?" queryList {% reduce %} queryList -> queryList "&" queryFragment {% reduce %} | queryFragment {% id %} queryFragment - -> queryFragment "=" urlSafe {% reduce %} - | urlSafe {% id %} + -> queryFragment "=" urlSafePlusEncoded {% reduce %} + | urlSafePlusEncoded {% id %} + +singleWord + -> [a-zA-Z$_] [a-zA-Z$_0-9]:* + {% optionalTail %} word -> word wordSafeChar {% concat %} @@ -302,6 +275,26 @@ wordSafeChar wordStartChar -> [a-zA-Z$_] {% ([tok]) => tok.value %} +# MultiLine String + +multilineString + -> ((eol | dedent) _string):* dedent + {% function(d) { + const indent = d[0][0][0][0]; + + const lines = d[2].map(segment => { + const relativeIndent = segment[0] - indent; + let base = ''; + if (relativeIndent > 0){ + for (let i = 0; i < relativeIndent; i++){ + base = base + ' '; + } + } + return base + segment[1]; + }).join('\\n'); + return lines; + } %} + dqString -> "\"" _string "\"" {% function(d) {return d[1]; } %} @@ -338,8 +331,8 @@ chunk -> chunk chunkChar {% concat %} | chunkChar {% id %} -chunkChar - -> [a-zA-Z0-9\+\-*\?\|\/\()\\:] {% ([tok]) => tok.value %} +chunkChar + -> [a-zA-Z0-9@+\-*?|/()\\:] {% ([tok]) => tok.value %} _escapedString -> _escapedString escapedChar {% concat %} @@ -349,11 +342,14 @@ escapedChar | %any {% ([tok]) => tok.value %} # syntactic whitespace -nl -> %nl {% ([tok]) => null %} +sof -> %sof {% ([tok]) => tok.value %} +eof -> %eof {% ([tok]) => tok.value %} +sol -> %sol {% ([tok]) => tok.value %} +eol -> %eol {% ([tok]) => tok.value %} indent - -> %indent {% () => null %} + -> %indent {% ([tok]) => tok.value %} dedent - -> %dedent {% () => null %} + -> %dedent {% ([tok]) => tok.value %} space -> %space {% ([tok]) => tok.value %} # ignored whitespace or chars @@ -366,8 +362,11 @@ _ @{% // Lexer -const nl = () => ({type: 'nl', offset: 2, toString: () => ''}); -const eof = () => ({type: 'eof'}); +const makeToken = (type, text) => ({type, text, value: text, toString: () => text}); +const makeEol = () => makeToken('eol', '\n'); +const makeEof = () => makeToken('eof', 'eof'); +const makeSol = () => makeToken('sol', '\n'); +const makeSof = () => makeToken('sof', 'sof'); function* indented(lexer, source, info) { let iter = peekable(lexer.reset(source, info)) @@ -376,32 +375,38 @@ function* indented(lexer, source, info) { // absorb initial blank lines and indentation let indent = iter.nextIndent() - + yield makeSof(); + yield makeSol(); + for (let tok; tok = iter.next(); ) { - if (tok.type === 'nl') { + if (tok.type === 'eol') { const newIndent = iter.nextIndent() if (newIndent == null) { break; }// eof else if (newIndent === indent) { - yield nl(); + yield makeEol(); + yield makeSol(); } else if (newIndent > indent) { stack.push(indent) indent = newIndent - yield nl(); - yield {...tok, type: 'indent', indent: indent} + yield makeEol(); + yield {...makeToken('indent'), indent: indent} + yield makeSol(); } else if (newIndent < indent){ while (newIndent < indent) { indent = stack.pop() - yield nl(); - yield {...tok, type: 'dedent', indent: indent} + yield makeEol(); + yield {...makeToken('dedent'), indent: indent} + yield makeSol(); } if (newIndent !== indent) { throw new Error('inconsistent indentation') } } else { - yield nl(); + yield makeEol(); + yield makeSol(); } indent = newIndent } else { @@ -412,12 +417,13 @@ function* indented(lexer, source, info) { // dedent remaining blocks at eof for (let i = stack.length; i--;) { indent = stack.pop(); - yield nl(); + yield makeEol(); yield {type: 'dedent', indent: indent } + yield makeSol(); } - yield nl(); - yield eof(); + yield makeEol(); + yield makeEof(); } function peekable(lexer) { @@ -433,7 +439,7 @@ function peekable(lexer) { }, nextIndent() { for (let tok; tok = this.peek(); ) { - if (tok.type === 'nl') { + if (tok.type === 'eol') { this.next(); } else if (tok.type === 'space') { @@ -442,7 +448,7 @@ function peekable(lexer) { this.next() const next = this.peek() if (!next) return indent - if (next.type === 'nl') { + if (next.type === 'eol') { this.next() return indent } else if (next.type === 'space') { @@ -460,11 +466,13 @@ function peekable(lexer) { const printToken = (t) => { switch (t.type){ - case "nl": return "\n"; + case "eol": return "}"; case "space": return " "; case "indent": return "->"; case "dedent": return "<-"; case "eof": return ""; + case "sof": return "<>"; + case "sol": return "{"; default: return t.text; } } @@ -490,7 +498,7 @@ StreamLexer.prototype.getTokenTypes = function(source) { const arr = []; for (const t of iter){ if (t.type == 'any'){ - const back = arr[arr.length - 1]; + const back = arr.length ? arr[arr.length - 1] : null; if (back && back.type == 'any'){ back.value += t.value; back.text += t.text; @@ -516,18 +524,22 @@ StreamLexer.prototype.formatError = function(token) { StreamLexer.prototype.has = function(name) { if (name == 'indent') return true; if (name == 'dedent') return true; + if (name == 'sof') return true; + if (name == 'sol') return true; + if (name == 'eof') return true; + if (name == 'eol') return true; return this.lexer.has(name); } const rules = { space: /[ ]/, - nl: {match: /[\n\r]+/, lineBreaks: true }, + eol: {match: /[\n\r]+/, lineBreaks: true }, //word: /[a-zA-Z$_][a-zA-Z0-9$_]*/, //number: /[0-9]/, //urlUnsafe: /["<>#%{}|\\^~[]`]/, //urlReserved: /[;/?:@=&]/, //urlSafe: /[0-9a-zA-Z$\-_.+!*'()]/, - // chunk: /[a-zA-Z0-9\+\-*\?\|\/\()\\:]/, + //chunk: /[a-zA-Z0-9\+\-*\?\|\/\()\\:]/, any: /[^\s]/ }; @@ -603,12 +615,15 @@ function reduce(list){ for (const item of list){ memo = join(memo, item); } - console.log(memo); return memo; } -function pickBest(list){ - return reduce(list); +function optionalTail(list){ + const [head, tail] = list; + if (tail && tail.length){ + return head.value + reduce(tail); + } + return head.value; f } function map2Object(map){ From e156a0c794f12199b54ed356d4cddb393d2d5d92 Mon Sep 17 00:00:00 2001 From: Leif Shackelford Date: Thu, 21 Mar 2019 17:05:29 -0700 Subject: [PATCH 08/12] moss.ne --- src/parsers/moss.ne | 421 +++++++++++++++++++++++++++++--------------- 1 file changed, 276 insertions(+), 145 deletions(-) diff --git a/src/parsers/moss.ne b/src/parsers/moss.ne index eb7987c..73cfd4f 100644 --- a/src/parsers/moss.ne +++ b/src/parsers/moss.ne @@ -1,60 +1,173 @@ @lexer lexer start - -> sof scope eof {% ([sof, scope]) => scope %} + -> sof rootScope eof {% ([sof, scope]) => scope %} -scope +rootScope -> map {% id %} + | (sol eol "string") multilineString ("\/string") {% ([sol, scope]) => scope %} + | (sol eol "list") list ("\/list") {% ([sol, scope]) => scope %} +scope + -> map {% id %} + map -> map mapPairConstructor - {% ([_map, nextMatch]) => { - //console.log({nextMatch}); - const map = new Map(_map); + {% ([_layer, nextMatch]) => { + const layer = new Map(_layer) if (nextMatch && (nextMatch[0] !== undefined)) { - addPairToMap(nextMatch, map); + addPairToMap(nextMatch, layer) } - return map; + return layer; } %} | mapPairConstructor {% ([initialMatch]) => { - const map = new Map(); - console.log({initialMatch}); + const layer = new Map(); if (initialMatch && (initialMatch[0] !== undefined)) { - addPairToMap(initialMatch, map); + addPairToMap(initialMatch, layer) } - return map; + return layer; } %} mapPairConstructor - # nested constrained scope - -> key ((space constraintMap pushScope) | pushScope) scope popScope - {% ([key, scopeConstaints, scope]) => { - return [key, scope] + # nested explicitly declared list + -> key ((space constraintMap) | space) (eol "list" indent) list popScope "\/list" + {% ([key, context, mode, scope]) => { + if (context){ + return [key, scope, {multiLineString: true, ...context[1]}] + } else { + return [key, scope, {multiLineString: true}] + } } %} - # map pair, optionally constrained - | key (space constraintMap):? list - {% ([key, constraintMap, list]) => { - return [key, list] + # nested explicitly declared multiline string + | key ((space constraintMap) | space) (eol "string" indent) multilineString popScope "\/string" + {% ([key, context, mode, scope]) => { + if (context){ + return [key, scope, {multiLineString: true, ...context[1]}] + } else { + return [key, scope, {multiLineString: true}] } - %} + } %} + # nested constrained scope + | key pushTypedScope scope popScope + {% ([key, context, scope]) => { + return [key, scope] + } %} + + # explicit map pair, rhs is a nested map + | "@no" key ((space constraintMap) | space) "{" nestedScope sol "}" endLine + {% ([directive, bracket, scope]) => scope %} + + # explicit map pair, rhs is a map + | key ((space constraintMap) | space) "{" scope endLine + {% ([key, context, bracket, scope]) => { + return [key, scope] + } %} + + # default map pair, rhs is a statement + | key (space constraintMap):? space statement mapTerminator + {% ([key, context, space, statement]) => { + console.log('pair', [key, statement]) + return [key, statement] + } %} + # default simple value - | list - {% ([list]) => { - return [list, true] + | (sol | space) statement mapTerminator + {% ([prefix, statement]) => { + return [statement, true] }%} | sol eol {% () => null %} | sol comment {% () => null %} # error cases - | label pushScope scope + | literal pushScope scope {% expectedScopeOperator %} + +mapTerminator + -> ((space "}") | "," | endLine) {% id %} + +list + -> list mapPairConstructor + {% ([array, item]) => { + if (item){ + if (item[1] === true) array.push(item[0]); + else { + const map = new Map(); + map.set(item[0], item[1]); + array.push(map); + } + } + return array; + } %} + | mapPairConstructor + {% ([item]) => { + if (item[1] === true) return [item[0]]; + else { + const map = new Map(); + map.set(item[0], item[1]); + return map; + } + } %} + +listConstructor + -> ( sol ) statement endLine + {% ([key, statement]) => { + return statement + } %} + | ( sol ) (space _mls) pushTypedScope multilineString popScope + {% ([key, keyMode, scopeConstaints, indent, scope]) => { + return scope + } %} + + | ( sol ) pushTypedScope scope popScope + {% ([key, scopeConstaints, indent, scope]) => { + return scope + } %} + | sol eol {% () => null %} + + + +multilineString + -> stringLine stringLine:* {% ([head, tail]) => { + const [startIndent, mls] = head; + if (tail.length){ + const res = tail.map(([indent, line]) => { + let margin = ''; + if (indent > startIndent){ + for (let i = 0; i < indent - startIndent; i++){ + margin = margin + ' '; + } + } + if (line){ + return margin + line; + } + return margin; + }); + return [mls, ...res].join('\n'); + } + return mls; + } %} + +stringLine + -> indent multilineString dedent + {% ([indent, mls]) => { + return [indent.indent, mls]; + } %} + | sol _escapedString:? eol + {% ([sol, string]) => { + return [sol.indent, string]; + } %} +pushTypedScope -> + space constraintMap indent {% ([space, constraintMap]) => constraintMap %} + | pushScope {% id %} + + constraintMap - -> constraintMap space constraint + -> constraintMap constraint {% ([map, ws, nextMatch]) => { if (nextMatch) { addPairToMap(nextMatch, map); @@ -65,74 +178,60 @@ constraintMap {% ([initialMatch]) => { const map = new Map(); if (initialMatch) { - //console.log('add prop', initialMatch) addPairToMap(initialMatch, map); } return map; } %} constraint - -> "@" bracketedScope - {% ([directive, scope]) => scope %} - | "@" label "[" space listLoop space "]" - {% ([_0, property, _2, scopeSelector]) => { - return [property, scopeSelector[2]] + -> "@" "{" nestedScope sol "}" endLine + {% ([directive, bracket, scope]) => scope %} + | "@" literal "{" scope (space | endLine) + {% ([directive, literal, bracket, scope]) => [literal, scope] %} + | "@" literal (space | endLine) {% ([directive, property]) => { + return [property, true] }%} - | "@" label - {% ([_0, property]) => [property, true] %} - - # error cases - | "@" "[" space:+ eol {% extraSpace %} - | "@" "[" (_ | eol) "]" {% emptyScope %} - | "@" label "[" _ "]" {% emptyScope %} - -bracketedScope - -> "[" nestedScope sol "]" {% ([bracket, scope]) => scope %} - | "[" scope {% rhs %} # Map + key - -> (sol | space) listLoop ":" {% ([pre, label, scopeOperator]) => label %} + -> (sol | space) keyExpression ":" {% ([pre, key]) => key %} -# List -list - -> (sol | space) listLoop ("," | endLine | (" " "]")) - {% ([pre, list]) => list %} - -listLoop - -> listValue (space listValue):* {% - ([head, tail]) => { - if (tail && tail.length){ - return head + reduce(tail.map(reduce)); - } - return head; - }%} +keyExpression + -> ( "=" | "+" | "|" | "&" | "^" | "-" ) space statement {% reduce %} + | concat {% id %} -listValue - -> label {% id %} - | uri {% id %} +# statement +statement + -> concat {% id %} + +# Operators -#Math +concat + -> concat space boolean {% reduce %} + | boolean {% id %} -expression - -> add {% id %} - -add - -> add ("+"|"-") multiply {% reduce %} +boolean + -> boolean space ( "n" | "|" ) space add {% reduce %} + | add {% id %} + +add + -> add space ( "+"|"-" ) space multiply {% reduce %} | multiply {% id %} - -multiply - -> multiply ("*"|"/") term {% reduce %} - | term {% id %} + +multiply + -> multiply space ("*"|"/") space unaryPrefix {% reduce %} + | unaryPrefix {% id %} -term - -> group {% id %} - | label {% id %} - -group - -> "(" expression ")" {% reduce %} - | label {% id %} +unaryPrefix + -> "+" group {% reduce %} + | "-" group {% reduce %} + | "!" group {% reduce %} + | group {% id %} +group + -> "(" concat ")" {% reduce %} + | literal {% id %} # Operators directive @@ -164,27 +263,24 @@ number -> _number {% ([n]) => parseFloat(n) %} _number - -> _float "e" _int {% reduce %} + -> _float "e" digit {% reduce %} | _float {% id %} _float - -> _int "." digit {% reduce %} - | _int {% id %} + -> digit "." digit {% reduce %} + | digit {% id %} -_int - -> "-" digit {% concat %} - | digit {% ([n]) => n %} - digit -> digit [0-9] {% concat %} | [0-9] {% ([tok]) => tok %} # Words -label +literal -> escapedString {% id %} | dqString {% id %} | singleWord {% id %} + | uri {% id %} | number {% id %} # URL = scheme:[//authority]path[?query][#fragment] @@ -274,26 +370,6 @@ wordSafeChar wordStartChar -> [a-zA-Z$_] {% ([tok]) => tok.value %} - -# MultiLine String - -multilineString - -> ((eol | dedent) _string):* dedent - {% function(d) { - const indent = d[0][0][0][0]; - - const lines = d[2].map(segment => { - const relativeIndent = segment[0] - indent; - let base = ''; - if (relativeIndent > 0){ - for (let i = 0; i < relativeIndent; i++){ - base = base + ' '; - } - } - return base + segment[1]; - }).join('\\n'); - return lines; - } %} dqString -> "\"" _string "\"" {% function(d) {return d[1]; } %} @@ -337,19 +413,26 @@ chunkChar _escapedString -> _escapedString escapedChar {% concat %} | escapedChar {% id %} + escapedChar -> %space {% ([tok]) => tok.value %} | %any {% ([tok]) => tok.value %} +# notation modes +_aa -> "@" "m" "a" "p" {% reduce %} +_ordered -> "@" "l" "i" "s" "t" {% reduce %} +#_ordered -> "@" "list" {% id %} +_mls -> "@" "s" "t" "r" "i" "n" "g" {% reduce %} + # syntactic whitespace sof -> %sof {% ([tok]) => tok.value %} eof -> %eof {% ([tok]) => tok.value %} -sol -> %sol {% ([tok]) => tok.value %} -eol -> %eol {% ([tok]) => tok.value %} +sol -> %sol {% ([tok]) => tok %} +eol -> %eol {% ([tok]) => tok %} indent - -> %indent {% ([tok]) => tok.value %} + -> %indent {% ([tok]) => tok %} dedent - -> %dedent {% ([tok]) => tok.value %} + -> %dedent {% ([tok]) => tok %} space -> %space {% ([tok]) => tok.value %} # ignored whitespace or chars @@ -362,51 +445,79 @@ _ @{% // Lexer -const makeToken = (type, text) => ({type, text, value: text, toString: () => text}); -const makeEol = () => makeToken('eol', '\n'); +const makeToken = (type, text, indent) => ({type, text, value: text, indent, toString: () => text}); +const makeEol = (indent) => makeToken('eol', '\n'); const makeEof = () => makeToken('eof', 'eof'); -const makeSol = () => makeToken('sol', '\n'); +const makeSol = (indent) => makeToken('sol', '\n', indent) +const makeIndent = (indent) => makeToken('indent', 'indent', indent) +const makeDedent = (indent) => makeToken('dedent', 'dedent', indent) const makeSof = () => makeToken('sof', 'sof'); +const doDedent = (ruleMap, indent, nextIndent) => { + const tokens = [makeEol()]; + tokens.push(makeDedent(nextIndent)); + const ruleToken = ruleMap.get(indent); + if (ruleToken) { + tokens.push(makeToken('stopRule', `/${ruleToken.text}`)); + ruleMap.delete(indent) + } + tokens.push(makeSol(nextIndent)); + return tokens; +} + function* indented(lexer, source, info) { let iter = peekable(lexer.reset(source, info)) let stack = [] - + let ruleMap = new Map(); + // absorb initial blank lines and indentation let indent = iter.nextIndent() yield makeSof(); - yield makeSol(); + yield makeSol(indent); for (let tok; tok = iter.next(); ) { - if (tok.type === 'eol') { + if (tok.type === 'eol' || tok.type === 'startRule') { const newIndent = iter.nextIndent() if (newIndent == null) { break; }// eof else if (newIndent === indent) { yield makeEol(); - yield makeSol(); + if (tok.type === 'startRule'){ + if (indent === 0){ + const ruleToken = makeToken('startRule', tok.text.split('<')[0]); + ruleMap.set(indent, ruleToken); + yield ruleToken; + } + } + yield makeSol(indent); } else if (newIndent > indent) { - stack.push(indent) - indent = newIndent + stack.push(indent) yield makeEol(); - yield {...makeToken('indent'), indent: indent} - yield makeSol(); - + indent = newIndent + if (tok.type === 'startRule'){ + const ruleToken = makeToken('startRule', tok.text.split('<')[0]); + ruleMap.set(indent, ruleToken); + yield ruleToken; + } + yield makeIndent(indent) + yield makeSol(indent); } else if (newIndent < indent){ while (newIndent < indent) { - indent = stack.pop() - yield makeEol(); - yield {...makeToken('dedent'), indent: indent} - yield makeSol(); + const nextIndent = stack.pop(); + const dedentTokens = doDedent(ruleMap, indent, nextIndent); + for (const t of dedentTokens){ + yield t; + } + indent = nextIndent; } if (newIndent !== indent) { - throw new Error('inconsistent indentation') + throw new Error(`inconsistent indentation ${newIndent} != ${indent}`) } } else { yield makeEol(); - yield makeSol(); + yield makeSol(indent); } indent = newIndent } else { @@ -416,14 +527,22 @@ function* indented(lexer, source, info) { // dedent remaining blocks at eof for (let i = stack.length; i--;) { - indent = stack.pop(); - yield makeEol(); - yield {type: 'dedent', indent: indent } - yield makeSol(); + const nextIndent = stack.pop() || 0; + const dedentTokens = doDedent(ruleMap, indent, nextIndent); + for (const t of dedentTokens){ + yield t; + } + indent = nextIndent; } - - yield makeEol(); - yield makeEof(); + + yield makeEol(); + const ruleToken = ruleMap.get(0); + if (ruleToken) { + yield makeToken('stopRule', `/${ruleToken.text}`); + ruleMap.delete(0) + } + + yield makeEof(); } function peekable(lexer) { @@ -464,8 +583,22 @@ function peekable(lexer) { } } +const rules = { + space: /[ ]/, + startRule: {match: /[a-zA-Z+\-]+<[\n\r]/, lineBreaks: true }, + eol: {match: /[\n\r]/, lineBreaks: true }, + //word: /[a-zA-Z$_][a-zA-Z0-9$_]*/, + //number: /[0-9]/, + //urlUnsafe: /["<>#%{}|\\^~[]`]/, + //urlReserved: /[;/?:@=&]/, + //urlSafe: /[0-9a-zA-Z$\-_.+!*'()]/, + //chunk: /[a-zA-Z0-9\+\-*\?\|\/\()\\:]/, + any: /[^\s]/ +}; + const printToken = (t) => { switch (t.type){ + case "eol": return "}"; case "eol": return "}"; case "space": return " "; case "indent": return "->"; @@ -531,21 +664,13 @@ StreamLexer.prototype.has = function(name) { return this.lexer.has(name); } -const rules = { - space: /[ ]/, - eol: {match: /[\n\r]+/, lineBreaks: true }, - //word: /[a-zA-Z$_][a-zA-Z0-9$_]*/, - //number: /[0-9]/, - //urlUnsafe: /["<>#%{}|\\^~[]`]/, - //urlReserved: /[;/?:@=&]/, - //urlSafe: /[0-9a-zA-Z$\-_.+!*'()]/, - //chunk: /[a-zA-Z0-9\+\-*\?\|\/\()\\:]/, - any: /[^\s]/ -}; - const lexer = new StreamLexer(); // Errors +function expectedListNotation(){ + throw new Error("expected list notation"); +} + function emptyScope(){ throw new Error("empty scope"); } @@ -592,12 +717,18 @@ const rhs = ([lhs, rhs]) => rhs const back = (d) => d[d.length - 1] function addPairToMap([key, value], map){ + console.log('add to layer', [key, value], map); if (map.get(key)){ throw new Error(`duplicate key ${key}`); } map.set(key, value); } +function addPairToDataAndContext([key, data, context], [dataMap, contextMap]){ + addPairToMap([key, data], dataMap); + addPairToMap([key, context], contextMap) +} + function join(list, rhs){ if (!list) return rhs; if (!rhs) return list; From 76d41a65d35e718ee8c4127b670ac881f4ed89dc Mon Sep 17 00:00:00 2001 From: Leif Shackelford Date: Fri, 22 Mar 2019 15:35:34 -0700 Subject: [PATCH 09/12] wip --- src/parsers/moss.ne | 114 ++++++++++++++++++++++---------------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/src/parsers/moss.ne b/src/parsers/moss.ne index 73cfd4f..61860c2 100644 --- a/src/parsers/moss.ne +++ b/src/parsers/moss.ne @@ -7,7 +7,7 @@ rootScope -> map {% id %} | (sol eol "string") multilineString ("\/string") {% ([sol, scope]) => scope %} | (sol eol "list") list ("\/list") {% ([sol, scope]) => scope %} - + scope -> map {% id %} @@ -35,29 +35,29 @@ mapPairConstructor {% ([key, context, mode, scope]) => { if (context){ return [key, scope, {multiLineString: true, ...context[1]}] - } else { - return [key, scope, {multiLineString: true}] + } else { + return [key, scope, {multiLineString: true}] } } %} - + # nested explicitly declared multiline string | key ((space constraintMap) | space) (eol "string" indent) multilineString popScope "\/string" {% ([key, context, mode, scope]) => { if (context){ return [key, scope, {multiLineString: true, ...context[1]}] - } else { - return [key, scope, {multiLineString: true}] + } else { + return [key, scope, {multiLineString: true}] } } %} - + # nested constrained scope | key pushTypedScope scope popScope {% ([key, context, scope]) => { - return [key, scope] + return [key, scope] } %} - + # explicit map pair, rhs is a nested map - | "@no" key ((space constraintMap) | space) "{" nestedScope sol "}" endLine + | key ((space constraintMap) | space) "{" nestedScope sol "}" endLine {% ([directive, bracket, scope]) => scope %} # explicit map pair, rhs is a map @@ -65,20 +65,19 @@ mapPairConstructor {% ([key, context, bracket, scope]) => { return [key, scope] } %} - + # default map pair, rhs is a statement - | key (space constraintMap):? space statement mapTerminator - {% ([key, context, space, statement]) => { - console.log('pair', [key, statement]) + | key (space constraintMap):? statement mapTerminator + {% ([key, context, statement]) => { return [key, statement] } %} - + # default simple value | (sol | space) statement mapTerminator {% ([prefix, statement]) => { return [statement, true] }%} - + | sol eol {% () => null %} | sol comment {% () => null %} # error cases @@ -87,7 +86,7 @@ mapPairConstructor mapTerminator -> ((space "}") | "," | endLine) {% id %} - + list -> list mapPairConstructor {% ([array, item]) => { @@ -120,7 +119,7 @@ listConstructor {% ([key, keyMode, scopeConstaints, indent, scope]) => { return scope } %} - + | ( sol ) pushTypedScope scope popScope {% ([key, scopeConstaints, indent, scope]) => { return scope @@ -137,7 +136,7 @@ multilineString let margin = ''; if (indent > startIndent){ for (let i = 0; i < indent - startIndent; i++){ - margin = margin + ' '; + margin = margin + ' '; } } if (line){ @@ -149,7 +148,7 @@ multilineString } return mls; } %} - + stringLine -> indent multilineString dedent {% ([indent, mls]) => { @@ -159,16 +158,17 @@ stringLine {% ([sol, string]) => { return [sol.indent, string]; } %} - -pushTypedScope -> - space constraintMap indent {% ([space, constraintMap]) => constraintMap %} + +pushTypedScope -> + space constraintMap indent + {% ([space, constraintMap]) => constraintMap %} | pushScope {% id %} - + constraintMap -> constraintMap constraint - {% ([map, ws, nextMatch]) => { + {% ([map, nextMatch]) => { if (nextMatch) { addPairToMap(nextMatch, map); } @@ -182,7 +182,7 @@ constraintMap } return map; } %} - + constraint -> "@" "{" nestedScope sol "}" endLine {% ([directive, bracket, scope]) => scope %} @@ -204,7 +204,7 @@ keyExpression # statement statement -> concat {% id %} - + # Operators concat @@ -214,11 +214,11 @@ concat boolean -> boolean space ( "n" | "|" ) space add {% reduce %} | add {% id %} - + add -> add space ( "+"|"-" ) space multiply {% reduce %} | multiply {% id %} - + multiply -> multiply space ("*"|"/") space unaryPrefix {% reduce %} | unaryPrefix {% id %} @@ -246,36 +246,36 @@ pushScope popScope -> dedent {% () => null %} - + endLine -> inlineComment {% id %} | eol {% id %} - + inlineComment -> space comment {% id %} comment -> "#" _escapedString:? eol {% ([operator, comment]) => (comment) %} - + # Numbers -number +number -> _number {% ([n]) => parseFloat(n) %} _number -> _float "e" digit {% reduce %} | _float {% id %} - + _float -> digit "." digit {% reduce %} - | digit {% id %} - + | digit {% id %} + digit -> digit [0-9] {% concat %} | [0-9] {% ([tok]) => tok %} # Words - + literal -> escapedString {% id %} | dqString {% id %} @@ -293,14 +293,14 @@ url urlScheme -> urlSafe ":" "/" "/" {% reduce %} - + urx -> urlCredentials "@" urd {% reduce %} | urd {% reduce %} urd -> tld urlPath:? uriQuery:? {% reduce %} - + urlCredentials -> emailCredentials {% id %} | userCredentials {% id %} @@ -311,7 +311,7 @@ urlPath relativePath -> relativePath "/" fileName {% reduce %} | fileName {% id %} - + fileName -> fileName "." word {% reduce %} | word {% id %} @@ -323,7 +323,7 @@ pathElement emailCredentials -> emailCredentials ":" password {% reduce %} | email {% reduce %} - + userCredentials -> userCredentials ":" password {% reduce %} | urlSafe {% reduce %} @@ -351,7 +351,7 @@ uriQuery queryList -> queryList "&" queryFragment {% reduce %} | queryFragment {% id %} - + queryFragment -> queryFragment "=" urlSafePlusEncoded {% reduce %} | urlSafePlusEncoded {% id %} @@ -360,17 +360,17 @@ singleWord -> [a-zA-Z$_] [a-zA-Z$_0-9]:* {% optionalTail %} -word +word -> word wordSafeChar {% concat %} | wordStartChar {% id %} wordSafeChar -> wordStartChar {% id %} | [0-9] {% ([tok]) => tok.value %} - + wordStartChar -> [a-zA-Z$_] {% ([tok]) => tok.value %} - + dqString -> "\"" _string "\"" {% function(d) {return d[1]; } %} @@ -380,7 +380,7 @@ escapedString _string -> null {% function() {return ""; } %} | _string _stringchar {% ([lhs, rhs]) => lhs + rhs %} - + _stringchar -> [^\\"] {% id %} | "\\" [^] {% concat %} @@ -393,7 +393,7 @@ urlSafePlusEncodedChars -> "%" hexDigit hexDigit {% reduce %} | "&" "a" "m" "p" ";" {% reduce %} | urlSafeChar {% id %} - + hexDigit -> [0-9a-fA-F] {% id %} urlSafe @@ -414,8 +414,8 @@ _escapedString -> _escapedString escapedChar {% concat %} | escapedChar {% id %} -escapedChar - -> %space {% ([tok]) => tok.value %} +escapedChar + -> %space {% ([tok]) => tok.value %} | %any {% ([tok]) => tok.value %} # notation modes @@ -467,15 +467,15 @@ const doDedent = (ruleMap, indent, nextIndent) => { function* indented(lexer, source, info) { let iter = peekable(lexer.reset(source, info)) - let stack = [] + let stack = [] let ruleMap = new Map(); - + // absorb initial blank lines and indentation let indent = iter.nextIndent() yield makeSof(); yield makeSol(indent); - + for (let tok; tok = iter.next(); ) { if (tok.type === 'eol' || tok.type === 'startRule') { const newIndent = iter.nextIndent() @@ -535,13 +535,13 @@ function* indented(lexer, source, info) { indent = nextIndent; } - yield makeEol(); + yield makeEol(); const ruleToken = ruleMap.get(0); if (ruleToken) { yield makeToken('stopRule', `/${ruleToken.text}`); ruleMap.delete(0) } - + yield makeEof(); } @@ -593,7 +593,7 @@ const rules = { //urlReserved: /[;/?:@=&]/, //urlSafe: /[0-9a-zA-Z$\-_.+!*'()]/, //chunk: /[a-zA-Z0-9\+\-*\?\|\/\()\\:]/, - any: /[^\s]/ + any: /[^\s]/ }; const printToken = (t) => { @@ -648,7 +648,7 @@ StreamLexer.prototype.getTokenTypes = function(source) { StreamLexer.prototype.reset = function(source, info) { console.log('tokens', this.getTokenTypes(source)) this.generator = indented(this.lexer, source, info); -} +} StreamLexer.prototype.formatError = function(token) { return this.lexer.formatError(token); @@ -766,4 +766,4 @@ function map2Object(map){ return object; } -%} \ No newline at end of file +%} From 22abfb775fd1f87c0cac6a6c074504fdd23507b1 Mon Sep 17 00:00:00 2001 From: Leif Shackelford Date: Fri, 22 Mar 2019 16:52:03 -0700 Subject: [PATCH 10/12] grammar --- src/parsers/moss.ne | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/src/parsers/moss.ne b/src/parsers/moss.ne index 61860c2..8bd38e3 100644 --- a/src/parsers/moss.ne +++ b/src/parsers/moss.ne @@ -5,7 +5,7 @@ start rootScope -> map {% id %} - | (sol eol "string") multilineString ("\/string") {% ([sol, scope]) => scope %} + | (sol eol "text") multilineString ("\/text") {% ([sol, scope]) => scope %} | (sol eol "list") list ("\/list") {% ([sol, scope]) => scope %} scope @@ -30,7 +30,7 @@ map } %} mapPairConstructor - # nested explicitly declared list + # list -> key ((space constraintMap) | space) (eol "list" indent) list popScope "\/list" {% ([key, context, mode, scope]) => { if (context){ @@ -40,8 +40,8 @@ mapPairConstructor } } %} - # nested explicitly declared multiline string - | key ((space constraintMap) | space) (eol "string" indent) multilineString popScope "\/string" + # multiline string + | key ((space constraintMap) | space) (eol "text" indent) multilineString popScope "\/text" {% ([key, context, mode, scope]) => { if (context){ return [key, scope, {multiLineString: true, ...context[1]}] @@ -50,24 +50,26 @@ mapPairConstructor } } %} - # nested constrained scope + # nested map | key pushTypedScope scope popScope {% ([key, context, scope]) => { return [key, scope] } %} - # explicit map pair, rhs is a nested map - | key ((space constraintMap) | space) "{" nestedScope sol "}" endLine - {% ([directive, bracket, scope]) => scope %} - - # explicit map pair, rhs is a map - | key ((space constraintMap) | space) "{" scope endLine + # map + | key ((space constraintMap) | space) "{" scope (endLine | (space "}")) {% ([key, context, bracket, scope]) => { return [key, scope] } %} - - # default map pair, rhs is a statement - | key (space constraintMap):? statement mapTerminator + + # map + | key ((space constraintMap) | space) "[" list (endLine | (space "]")) + {% ([key, context, bracket, scope]) => { + return [key, scope] + } %} + + # statement + | key ((space constraintMap) | space) statement mapTerminator {% ([key, context, statement]) => { return [key, statement] } %} @@ -184,7 +186,7 @@ constraintMap } %} constraint - -> "@" "{" nestedScope sol "}" endLine + -> "@" "{" nestedScope sol "}" (space | endLine) {% ([directive, bracket, scope]) => scope %} | "@" literal "{" scope (space | endLine) {% ([directive, literal, bracket, scope]) => [literal, scope] %} @@ -193,7 +195,6 @@ constraint }%} # Map - key -> (sol | space) keyExpression ":" {% ([pre, key]) => key %} @@ -277,8 +278,7 @@ digit # Words literal - -> escapedString {% id %} - | dqString {% id %} + -> string {% id %} | singleWord {% id %} | uri {% id %} | number {% id %} @@ -371,10 +371,7 @@ wordSafeChar wordStartChar -> [a-zA-Z$_] {% ([tok]) => tok.value %} -dqString - -> "\"" _string "\"" {% function(d) {return d[1]; } %} - -escapedString +string -> "`" _escapedString "`" {% function(d) {return d[1]; } %} _string From cc00e91b79ad51e5d4ea74a40c59c9aced5f165d Mon Sep 17 00:00:00 2001 From: Leif Shackelford Date: Sat, 23 Mar 2019 03:42:37 -0700 Subject: [PATCH 11/12] nested lists --- src/parsers/moss.ne | 214 +++++++++++++++++++++++++------------------- 1 file changed, 122 insertions(+), 92 deletions(-) diff --git a/src/parsers/moss.ne b/src/parsers/moss.ne index 73cfd4f..543a898 100644 --- a/src/parsers/moss.ne +++ b/src/parsers/moss.ne @@ -2,36 +2,69 @@ start -> sof rootScope eof {% ([sof, scope]) => scope %} - + rootScope -> map {% id %} | (sol eol "string") multilineString ("\/string") {% ([sol, scope]) => scope %} - | (sol eol "list") list ("\/list") {% ([sol, scope]) => scope %} - + scope - -> map {% id %} + -> map {% ([layer]) => layer.data %} map -> map mapPairConstructor {% ([_layer, nextMatch]) => { - const layer = new Map(_layer) + const layer = { + data: new Map(_layer.data), + context: {} + } if (nextMatch && (nextMatch[0] !== undefined)) { - addPairToMap(nextMatch, layer) + addPairToMap(nextMatch, layer.data) + } + return layer; + } %} + | map mapList {% ([_layer, list]) => { + const layer = { + data: new Map(_layer.data), + context: {} + } + if (list && list.length) { + for (let i = 0; i < list.length; i++){ + addPairToMap([i, list[i]], layer.data) + } } return layer; } %} | mapPairConstructor {% ([initialMatch]) => { - const layer = new Map(); + const layer = { + data: new Map(), + context: {} + } if (initialMatch && (initialMatch[0] !== undefined)) { - addPairToMap(initialMatch, layer) + addPairToMap(initialMatch, layer.data) + } + return layer; + } %} + | mapList + {% ([list]) => { + const layer = { + data: new Map(), + context: {} + } + if (list && list.length) { + for (let i = 0; i < list.length; i++){ + addPairToMap([i, list[i]], layer.data) + } } return layer; } %} +mapList + -> (sol "-<" endLine) list "\/-<" {% ([prefix, list]) => list %} + mapPairConstructor # nested explicitly declared list - -> key ((space constraintMap) | space) (eol "list" indent) list popScope "\/list" + -> key ((space constraintMap) | space) ("-<" pushScope) list "\/-<" popScope {% ([key, context, mode, scope]) => { if (context){ return [key, scope, {multiLineString: true, ...context[1]}] @@ -56,26 +89,22 @@ mapPairConstructor return [key, scope] } %} - # explicit map pair, rhs is a nested map - | "@no" key ((space constraintMap) | space) "{" nestedScope sol "}" endLine - {% ([directive, bracket, scope]) => scope %} - # explicit map pair, rhs is a map - | key ((space constraintMap) | space) "{" scope endLine + | key ((space constraintMap) | space) "{" scope "}" endLine {% ([key, context, bracket, scope]) => { return [key, scope] } %} # default map pair, rhs is a statement - | key (space constraintMap):? space statement mapTerminator - {% ([key, context, space, statement]) => { + | key ((space constraintMap) | space) statement mapTerminator + {% ([key, context, statement]) => { console.log('pair', [key, statement]) return [key, statement] } %} # default simple value - | (sol | space) statement mapTerminator - {% ([prefix, statement]) => { + | (sol | space) (constraintMap):? statement mapTerminator + {% ([prefix, constraintMap, statement]) => { return [statement, true] }%} @@ -86,48 +115,51 @@ mapPairConstructor {% expectedScopeOperator %} mapTerminator - -> ((space "}") | "," | endLine) {% id %} - + -> (" " | "," | endLine) {% id %} + +listTerminator + -> ("," | endLine) {% id %} + + list - -> list mapPairConstructor + -> list listConstructor {% ([array, item]) => { if (item){ - if (item[1] === true) array.push(item[0]); - else { - const map = new Map(); - map.set(item[0], item[1]); - array.push(map); - } + return [...array, item]; } return array; } %} - | mapPairConstructor + | listConstructor {% ([item]) => { - if (item[1] === true) return [item[0]]; - else { - const map = new Map(); - map.set(item[0], item[1]); - return map; - } + return [ item ]; } %} listConstructor - -> ( sol ) statement endLine - {% ([key, statement]) => { - return statement - } %} - | ( sol ) (space _mls) pushTypedScope multilineString popScope - {% ([key, keyMode, scopeConstaints, indent, scope]) => { - return scope + # nested constrained scope + -> key pushTypedScope scope popScope + {% ([key, context, scope]) => { + return scope } %} - | ( sol ) pushTypedScope scope popScope - {% ([key, scopeConstaints, indent, scope]) => { - return scope - } %} + | key ((space constraintMap) | space) "{" scope "}" endLine + {% ([key, context, bracket, scope]) => { + return scope + } %} + + # default map pair, rhs is a statement + | key ((space constraintMap) | space) statement listTerminator + {% ([key, context, statement]) => { + return statement + } %} + + # default simple value + | (sol | space) (constraintMap):? statement listTerminator + {% ([prefix, constraintMap, statement]) => { + return statement + }%} + | sol eol {% () => null %} - - + | sol comment {% () => null %} multilineString -> stringLine stringLine:* {% ([head, tail]) => { @@ -186,7 +218,7 @@ constraintMap constraint -> "@" "{" nestedScope sol "}" endLine {% ([directive, bracket, scope]) => scope %} - | "@" literal "{" scope (space | endLine) + | "@" literal "{" scope "}" (space | endLine) {% ([directive, literal, bracket, scope]) => [literal, scope] %} | "@" literal (space | endLine) {% ([directive, property]) => { return [property, true] @@ -242,10 +274,10 @@ nestedScope -> pushScope scope popScope {% ([push, scope]) => scope %} pushScope - -> (inlineComment | eol) indent {% () => null %} + -> (inlineComment | eol) indent {% id %} popScope - -> dedent {% () => null %} + -> dedent {% id %} endLine -> inlineComment {% id %} @@ -255,7 +287,7 @@ inlineComment -> space comment {% id %} comment - -> "#" _escapedString:? eol {% ([operator, comment]) => (comment) %} + -> "/" "/" _escapedString:? %eol {% ([operator, comment]) => (comment) %} # Numbers @@ -418,17 +450,11 @@ escapedChar -> %space {% ([tok]) => tok.value %} | %any {% ([tok]) => tok.value %} -# notation modes -_aa -> "@" "m" "a" "p" {% reduce %} -_ordered -> "@" "l" "i" "s" "t" {% reduce %} -#_ordered -> "@" "list" {% id %} -_mls -> "@" "s" "t" "r" "i" "n" "g" {% reduce %} - # syntactic whitespace sof -> %sof {% ([tok]) => tok.value %} eof -> %eof {% ([tok]) => tok.value %} sol -> %sol {% ([tok]) => tok %} -eol -> %eol {% ([tok]) => tok %} +eol -> _ %eol {% ([ws, tok]) => tok %} indent -> %indent {% ([tok]) => tok %} dedent @@ -440,28 +466,35 @@ _ -> _ space {% ([e]) => { return e ? e + ' ': ''; } %} - | null + | null {% () => '' %} @{% // Lexer -const makeToken = (type, text, indent) => ({type, text, value: text, indent, toString: () => text}); -const makeEol = (indent) => makeToken('eol', '\n'); -const makeEof = () => makeToken('eof', 'eof'); -const makeSol = (indent) => makeToken('sol', '\n', indent) -const makeIndent = (indent) => makeToken('indent', 'indent', indent) -const makeDedent = (indent) => makeToken('dedent', 'dedent', indent) +const makeToken = (type, text, sourceMap, indent) => ({...sourceMap, type, text, value: text, indent, toString: () => text}); + +const makeSol = (sourceMap, indent) => { + const t = makeToken('sol', '\n', sourceMap, indent); + //console.log(t); + return t +} +const makeEol = (sourceMap, indent) => makeToken('eol', '\n', sourceMap, indent) + +const makeIndent = (sourceMap, indent) => makeToken('indent', 'indent', sourceMap, indent) +const makeDedent = (sourceMap, indent) => makeToken('dedent', 'dedent', sourceMap, indent) + const makeSof = () => makeToken('sof', 'sof'); +const makeEof = () => makeToken('eof', 'eof'); -const doDedent = (ruleMap, indent, nextIndent) => { - const tokens = [makeEol()]; - tokens.push(makeDedent(nextIndent)); +const doDedent = (ruleMap, indent, nextIndent, sourceMap) => { + const tokens = [makeEol(sourceMap, indent)]; const ruleToken = ruleMap.get(indent); if (ruleToken) { - tokens.push(makeToken('stopRule', `/${ruleToken.text}`)); + tokens.push(makeToken('stopRule', `/${ruleToken.text}`, sourceMap, indent)); ruleMap.delete(indent) } - tokens.push(makeSol(nextIndent)); + tokens.push(makeDedent(sourceMap, nextIndent)); + tokens.push(makeSol(sourceMap, nextIndent)); return tokens; } @@ -477,36 +510,36 @@ function* indented(lexer, source, info) { yield makeSol(indent); for (let tok; tok = iter.next(); ) { + const sourceMap = {line: tok.line, col: tok.col}; + if (tok.type === 'eol' || tok.type === 'startRule') { const newIndent = iter.nextIndent() if (newIndent == null) { break; }// eof else if (newIndent === indent) { - yield makeEol(); if (tok.type === 'startRule'){ - if (indent === 0){ - const ruleToken = makeToken('startRule', tok.text.split('<')[0]); - ruleMap.set(indent, ruleToken); - yield ruleToken; - } + const ruleToken = makeToken('startRule', tok.text.slice(0, tok.text.indexOf('<') + 1)); + ruleMap.set(indent, ruleToken); + yield ruleToken; } - yield makeSol(indent); + yield makeEol(indent, sourceMap); + yield makeSol(sourceMap, indent); } else if (newIndent > indent) { stack.push(indent) - yield makeEol(); indent = newIndent if (tok.type === 'startRule'){ - const ruleToken = makeToken('startRule', tok.text.split('<')[0]); + const ruleToken = makeToken('startRule', tok.text.slice(0, tok.text.indexOf('<') + 1)); ruleMap.set(indent, ruleToken); yield ruleToken; } - yield makeIndent(indent) - yield makeSol(indent); + yield makeEol(sourceMap, indent); + yield makeIndent(sourceMap, indent) + yield makeSol(sourceMap, indent); } else if (newIndent < indent){ while (newIndent < indent) { const nextIndent = stack.pop(); - const dedentTokens = doDedent(ruleMap, indent, nextIndent); + const dedentTokens = doDedent(ruleMap, indent, nextIndent, sourceMap); for (const t of dedentTokens){ yield t; } @@ -516,8 +549,8 @@ function* indented(lexer, source, info) { throw new Error(`inconsistent indentation ${newIndent} != ${indent}`) } } else { - yield makeEol(); - yield makeSol(indent); + yield makeEol(sourceMap, indent); + yield makeSol(sourceMap, indent); } indent = newIndent } else { @@ -528,7 +561,7 @@ function* indented(lexer, source, info) { // dedent remaining blocks at eof for (let i = stack.length; i--;) { const nextIndent = stack.pop() || 0; - const dedentTokens = doDedent(ruleMap, indent, nextIndent); + const dedentTokens = doDedent(ruleMap, indent, nextIndent, {line: 'eof', col: 'eof'}); for (const t of dedentTokens){ yield t; } @@ -585,14 +618,11 @@ function peekable(lexer) { const rules = { space: /[ ]/, - startRule: {match: /[a-zA-Z+\-]+<[\n\r]/, lineBreaks: true }, + startRule: { + match: /[a-zA-Z+\-`]+<[\n\r]|[a-zA-Z+\-`]+< \/\/.*[\n\r]/, + lineBreaks: true + }, eol: {match: /[\n\r]/, lineBreaks: true }, - //word: /[a-zA-Z$_][a-zA-Z0-9$_]*/, - //number: /[0-9]/, - //urlUnsafe: /["<>#%{}|\\^~[]`]/, - //urlReserved: /[;/?:@=&]/, - //urlSafe: /[0-9a-zA-Z$\-_.+!*'()]/, - //chunk: /[a-zA-Z0-9\+\-*\?\|\/\()\\:]/, any: /[^\s]/ }; From de7a7c27ec58ab1427742076f63edb3a71990dbb Mon Sep 17 00:00:00 2001 From: Leif Shackelford Date: Sat, 23 Mar 2019 18:38:16 -0700 Subject: [PATCH 12/12] continue climbing up uri mountain --- src/parsers/moss.ne | 163 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 126 insertions(+), 37 deletions(-) diff --git a/src/parsers/moss.ne b/src/parsers/moss.ne index edb676d..a12cf4e 100644 --- a/src/parsers/moss.ne +++ b/src/parsers/moss.ne @@ -317,64 +317,145 @@ literal # URL = scheme:[//authority]path[?query][#fragment] uri -> url {% id %} - | urx {% id %} + | authority {% id %} url - -> urlScheme urx {% reduce %} + -> urlDomainScheme authority {% reduce %} + | urlScheme uriPathComponent {% reduce %} + | urlScheme urlPath {% reduce %} + +urlDomainScheme + -> urlScheme "/" "/" {% reduce %} + +urlSchemes + -> urlSchemes urlScheme {% reduce %} + | urlScheme {% id %} urlScheme - -> urlSafe ":" "/" "/" {% reduce %} + -> domainComponent ":" {% reduce %} + +authority + -> urlCredentials "@" _authority {% reduce %} + | _authority {% reduce %} -urx - -> urlCredentials "@" urd {% reduce %} - | urd {% reduce %} +_authority + -> uriDomainComponent uriPathComponent:? uriQueries:? uriFragment:? {% reduce %} -urd - -> tld urlPath:? uriQuery:? {% reduce %} +uriQueries + -> uriQueries uriQuery {% reduce %} + | uriQuery {% id %} + +uriPathComponent + -> "/" urlPath {% reduce %} + | "/" {% ([tok]) => tok.value %} urlCredentials - -> emailCredentials {% id %} - | userCredentials {% id %} + -> urlCredentials ":" password {% reduce %} + | email {% id %} + | subdomain {% id %} urlPath - -> "/" relativePath {% concat %} + -> urlPath "/" urlPathName {% reduce %} + | urlPath "/" {% reduce %} + | urlPathName {% id %} -relativePath -> - relativePath "/" fileName {% reduce %} +urlPathName -> + urlPathName "." urlPathWord {% reduce %} + | urlPathWord {% id %} + +urlPathWord + -> urlPathWord urlPathChar {% reduce %} + | urlPathChar {% id %} + +urlPathChar + -> [^ ^/^.^?^;] {% ([tok]) => tok.value %} + +filePath -> + filePath "/" fileName {% reduce %} | fileName {% id %} fileName -> - fileName "." word {% reduce %} - | word {% id %} - -pathElement - -> pathElement "/" urlSafe {% reduce %} - | urlSafe {% id %} + fileName "." fileWord {% reduce %} + | fileWord {% id %} -emailCredentials - -> emailCredentials ":" password {% reduce %} - | email {% reduce %} +fileWord + -> fileWord fileChar {% reduce %} + | fileChar {% id %} -userCredentials - -> userCredentials ":" password {% reduce %} - | urlSafe {% reduce %} +fileChar + -> [^ ^/^.] {% ([tok]) => tok.value %} password -> urlSafePlusEncoded {% reduce %} email - -> emailLhs "@" tld {% reduce %} + -> subdomain "@" domain {% reduce %} + +uriDomainComponent + -> uriDomainComponent uriPortComponent {% reduce %} + | domain {% reduce %} + | "[" ipv6 "]" {% reduce %} + | ipv4 {% id %} + +matchSeven[x] + -> $x $x $x $x $x $x $x {% reduce %} + +matchOneToSeven[x] + -> $x $x $x $x $x $x $x {% reduce %} + | $x $x $x $x $x $x {% reduce %} + | $x $x $x $x $x {% reduce %} + | $x $x $x $x {% reduce %} + | $x $x $x $x {% reduce %} + | $x $x $x {% reduce %} + | $x $x {% reduce %} + | $x {% reduce %} + +ipv6 + -> matchSeven[ipv6Group] ipv6Number {% reduce %} + | matchOneToSeven[ipv6Group] ":" ipv6Number {% reduce %} + +matchOneToFour[x] + -> $x $x $x $x {% reduce %} + | $x $x $x {% reduce %} + | $x $x {% reduce %} + | $x {% reduce %} + +ipv6Group + -> ipv6Number ":" {% reduce %} -emailLhs - -> urlSafe "." emailLhs {% id %} - | urlSafe {% id %} +ipv6Number + -> matchOneToFour[hexDigit] -tld - -> domain "." urlSafe {% reduce %} +ipv4 + -> ipv4Group "." ipv4Group "." ipv4Group "." ipv4Group -domain -> - urlSafe "." domain {% reduce %} - | urlSafe {% id %} +ipv4Group + -> d2 d5 d0_5 {% reduce %} + | d2 d0_4 d0_9 {% reduce %} + | d1 d0_9 d0_9 {% reduce %} + | d0_9 d0_9 {% reduce %} + | d0_9 {% id %} + +d1 -> "1" {% ([tok]) => tok %} +d2 -> "2" {% ([tok]) => tok %} +d5 -> "5" {% ([tok]) => tok %} +d0_4 -> [0-4] {% ([tok]) => tok %} +d0_5 -> [0-5] {% ([tok]) => tok %} +d0_9 -> [0-9] {% ([tok]) => tok %} + +domain + -> subdomain "." domainComponent {% reduce %} + +uriPortComponent + -> ":" number {% reduce %} + +subdomain -> + domainComponent "." subdomain {% reduce %} + | domainComponent {% id %} + +# ! $ & ' ( ) * + , ; = +# are permitted by generic URI syntax to be used unencoded +# in the user information, host, and path as delimiters. uriQuery -> "?" queryList {% reduce %} @@ -387,8 +468,15 @@ queryFragment -> queryFragment "=" urlSafePlusEncoded {% reduce %} | urlSafePlusEncoded {% id %} +uriFragment + -> "#" queryList {% reduce %} + +domainComponent + -> [a-zA-Z] [a-zA-Z0-9\-]:* + {% optionalTail %} + singleWord - -> [a-zA-Z$_] [a-zA-Z$_0-9]:* + -> [a-zA-Z$_] [a-zA-Z0-9$_]:* {% optionalTail %} word @@ -428,8 +516,9 @@ urlSafe -> urlSafe urlSafeChar {% concat %} | urlSafeChar {% id %} -# [0-9a-zA-Z$\-_.+!*'()] but we skip the dot as it is meaningfully parsed in rules -urlSafeChar -> [0-9a-zA-Z$\-_+!*'()] {% ([tok]) => tok.value %} +urlSafeChar -> [a-zA-Z0-9\-] {% ([tok]) => tok.value %} + + chunk -> chunk chunkChar {% concat %}