From 9db31ea49d2d1a1f92062210fd36a62e878e1af1 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 7 Apr 2026 03:14:37 +0000 Subject: [PATCH 1/8] docs(plan): double-quoted strings sugar for [ '...' ] Co-authored-by: Jayson Harshbarger --- _plans/ffm-better-strings.md | 41 ++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/_plans/ffm-better-strings.md b/_plans/ffm-better-strings.md index 38a4728..23133ca 100644 --- a/_plans/ffm-better-strings.md +++ b/_plans/ffm-better-strings.md @@ -2,13 +2,16 @@ ## Overview -Double-quoted strings (e.g. `"hello"`) are compiler sugar for a **quote that pushes character codes**. They are first-class values — a single pointer on the stack, like any other quote. +Double-quoted strings (e.g. `"hello"`) are **compiler sugar for a quote that contains a single single-quoted string literal**. The double-quote form wraps the same text in `[ ... ]` with one `'...'` inside. ``` -"hi" ≡ [ 'h' 'i' ] ≡ [ 104 105 ] +"xyz" ≡ [ 'xyz' ] +"hi" ≡ [ 'hi' ] ``` -The empty string is `0` — the same `0` that already means NOP/nil throughout the language. This gives strings a natural null terminator with no new machinery. +So `"hi"` is **not** sugar for `[ 'h' 'i' ]` (a quote of two character literals); it is exactly one quoted string token inside an outer quote. How the inner `'...'` is represented internally (e.g. cons chain vs literal) is specified below; the sugar rule is purely syntactic. + +The empty string is `0` — the same `0` that already means NOP/nil throughout the language. This gives strings a natural null terminator with no new machinery. The sugar form for an empty double-quoted string is consistent with the rule: `""` ≡ `[ '' ]` (if the lexer accepts empty single-quoted strings); implementations may still normalize or relate that to `0` as the chain terminator. --- @@ -20,11 +23,13 @@ Strings are built from **cons cells**, directly analogous to Lisp. Each cons cel x y cons → ptr, body: [ PUSH x, CALL y ] ``` -A string is a linked chain of cons cells terminating at `0`: +A string is a linked chain of cons cells terminating at `0`. The **inner** single-quoted literal `'hi'` has that shape; double-quoted `"hi"` is sugar for a **quote** whose body is that literal: ``` -"hi" → ptrH, body: [ PUSH 104, CALL ptrI ] - ptrI, body: [ PUSH 105, CALL 0 ] +"hi" ≡ [ 'hi' ] /* outer: one quote value on the stack */ + +'hi' (runtime) → ptrH, body: [ PUSH 104, CALL ptrI ] + ptrI, body: [ PUSH 105, CALL 0 ] ``` Eval'ing the head pointer walks the chain, pushing each character code in order. @@ -63,16 +68,30 @@ The plan originally called this `concat`, but the implementation uses `compose` ## Compiler Sugar +**Double quotes → quote + single-quoted string** + +``` +"hello" ≡ [ 'hello' ] +``` + +The compiler may implement this by emitting the same AST/bytecode as for `[ 'hello' ]` directly, or by lowering later (e.g. to a cons chain) — but the **authoring rule** is: `"..."` is sugar for wrapping the corresponding `'...'` in an outer `[ ... ]`. + +**Previous plan (superseded for `"..."` syntax):** desugaring `"hello"` straight to a `swons` chain at the surface: + ``` -"hello" → desugared at compile time to 0 'o' swons 'l' swons 'l' swons 'e' swons 'h' swons +0 'o' swons 'l' swons 'l' swons 'e' swons 'h' swons ``` -So eval'ing the result pushes characters left to order: `h`, `e`, `l`, `l`, `o`. +That chain remains a valid **manual** way to build the same runtime string shape; it is no longer the specified meaning of the `"hello"` token. + +Eval'ing the result of `[ 'hello' ]` (whether written with double or single quotes inside brackets) should still push character codes in order when that is how single-quoted string literals behave in the VM. + +**Status: ❌ NOT IMPLEMENTED** — No implementation currently desugars `"..."` this way. +Users must currently write an explicit quote around a single-quoted string, or construct strings with `0` and `cons`/`swons`: -**Status: ❌ NOT IMPLEMENTED** - No implementation currently desugars `"..."` syntax. -Users must manually construct strings using `0` and `cons`/`swons`: ``` -0 'i' swons 'h' swons /* creates "hi" */ +[ 'hi' ] /* equivalent target for future "hi" sugar */ +0 'i' swons 'h' swons /* manual cons chain for "hi" */ ``` --- From 837ae1c1d5cf1529550e003772359c1396057768 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 7 Apr 2026 03:25:20 +0000 Subject: [PATCH 2/8] docs(plan): chain single-quote desugar, escaping, no 0 on double-quote path Co-authored-by: Jayson Harshbarger --- _plans/ffm-better-strings.md | 68 +++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/_plans/ffm-better-strings.md b/_plans/ffm-better-strings.md index 23133ca..4fe1819 100644 --- a/_plans/ffm-better-strings.md +++ b/_plans/ffm-better-strings.md @@ -2,16 +2,23 @@ ## Overview -Double-quoted strings (e.g. `"hello"`) are **compiler sugar for a quote that contains a single single-quoted string literal**. The double-quote form wraps the same text in `[ ... ]` with one `'...'` inside. +**Double-quoted** strings (e.g. `"hello"`) are compiler sugar for a **quote that contains a single single-quoted string literal**: the same characters appear inside `[ ... ]` as one `'...'` token. + +**Single-quoted** strings (e.g. `'hi'`) are themselves sugar for **pushing each character as its own literal** (character codes / integers) in sequence. + +Chaining those rules, all of the following are equivalent (same quote body after full desugar): ``` -"xyz" ≡ [ 'xyz' ] -"hi" ≡ [ 'hi' ] +"hi" ≡ [ 'hi' ] ≡ [ 'h' 'i' ] ≡ [ 104 105 ] ``` -So `"hi"` is **not** sugar for `[ 'h' 'i' ]` (a quote of two character literals); it is exactly one quoted string token inside an outer quote. How the inner `'...'` is represented internally (e.g. cons chain vs literal) is specified below; the sugar rule is purely syntactic. +The **first** step for double quotes is still syntactic: `"hi"` becomes `[ 'hi' ]` — one string token inside the brackets, not two tokens `'h'` `'i'` at that stage. The equivalence to `[ 'h' 'i' ]` and `[ 104 105 ]` follows from how **single**-quoted strings desugar. + +**Escaping** rules are unchanged between single- and double-quoted forms: whatever escapes apply inside `'...'` apply inside the text of `"..."` as well (the lexer/parser treats the payload the same; only the outer delimiters differ). -The empty string is `0` — the same `0` that already means NOP/nil throughout the language. This gives strings a natural null terminator with no new machinery. The sugar form for an empty double-quoted string is consistent with the rule: `""` ≡ `[ '' ]` (if the lexer accepts empty single-quoted strings); implementations may still normalize or relate that to `0` as the chain terminator. +**No `0` on the double-quoted path:** desugaring `"..."` to `[ '...' ]` (and then to per-character pushes) does **not** add a `0` prefix or suffix. That is distinct from manually building a **nil-terminated cons chain** with `0 ... swons`, where `0` is the tail of the list — see Internal Representation. + +The empty string: `""` ≡ `[ '' ]`, which desugars to an empty quote body `[]` (no pushes). Separately, **`0`** remains the language’s NOP/nil and the **terminator** of cons-chain string values when constructed with `cons`/`swons`; it is not inserted by the double-quote sugar itself. --- @@ -23,17 +30,17 @@ Strings are built from **cons cells**, directly analogous to Lisp. Each cons cel x y cons → ptr, body: [ PUSH x, CALL y ] ``` -A string is a linked chain of cons cells terminating at `0`. The **inner** single-quoted literal `'hi'` has that shape; double-quoted `"hi"` is sugar for a **quote** whose body is that literal: +A **cons-chain string value** (what you hold on the stack after `0 ... swons`) is a linked list of cons cells terminating at `0`: ``` -"hi" ≡ [ 'hi' ] /* outer: one quote value on the stack */ - -'hi' (runtime) → ptrH, body: [ PUSH 104, CALL ptrI ] - ptrI, body: [ PUSH 105, CALL 0 ] +ptrH, body: [ PUSH 104, CALL ptrI ] +ptrI, body: [ PUSH 105, CALL 0 ] ``` Eval'ing the head pointer walks the chain, pushing each character code in order. +That shape is **not** the same token sequence as `"hi"` or `[ 'hi' ]` / `[ 104 105 ]`. The latter are **quotes** whose body (after desugar) is a flat sequence of pushes. Building a cons chain is still done manually (or by library words) with `0` and `cons`/`swons`. Double-quoted sugar never prepends or appends `0` to the quote body. + --- ## Primitives (opcodes) @@ -68,30 +75,51 @@ The plan originally called this `concat`, but the implementation uses `compose` ## Compiler Sugar -**Double quotes → quote + single-quoted string** +### Double quotes → one single-quoted string inside a quote ``` "hello" ≡ [ 'hello' ] ``` -The compiler may implement this by emitting the same AST/bytecode as for `[ 'hello' ]` directly, or by lowering later (e.g. to a cons chain) — but the **authoring rule** is: `"..."` is sugar for wrapping the corresponding `'...'` in an outer `[ ... ]`. +No `0` is added before or after the content when applying this rule. + +### Single quotes → one push per character -**Previous plan (superseded for `"..."` syntax):** desugaring `"hello"` straight to a `swons` chain at the surface: +A single-quoted string literal desugars to the same sequence of single-character literals (and thus to integer pushes) inside a quote: ``` -0 'o' swons 'l' swons 'l' swons 'e' swons 'h' swons +'hello' ≡ 'h' 'e' 'l' 'l' 'o' /* inside a quote body */ +[ 'hello' ] ≡ [ 'h' 'e' 'l' 'l' 'o' ] ≡ [ 104 101 108 108 111 ] /* example codes */ ``` -That chain remains a valid **manual** way to build the same runtime string shape; it is no longer the specified meaning of the `"hello"` token. +**Escaping:** the escape grammar for `'...'` and `"..."` stays the same; only the delimiters differ at the first double-quote step. + +### Combined equivalence (example) + +``` +"hi" ≡ [ 'hi' ] ≡ [ 'h' 'i' ] ≡ [ 104 105 ] +``` + +The compiler may fuse steps internally (e.g. emit numeric pushes directly) as long as the result matches the above. + +### Cons-chain construction (unchanged, not the same as `"..."` desugar) + +Manual nil-terminated string **values** are still built with `0` and `swons`, for example: + +``` +0 'o' swons 'l' swons 'l' swons 'e' swons 'h' swons +``` -Eval'ing the result of `[ 'hello' ]` (whether written with double or single quotes inside brackets) should still push character codes in order when that is how single-quoted string literals behave in the VM. +That produces a **single** cons-chain pointer on the stack, not the quote `[ 'hello' ]`. Library words such as `sprint` expect evaluable quote bodies or cons chains per existing conventions. -**Status: ❌ NOT IMPLEMENTED** — No implementation currently desugars `"..."` this way. -Users must currently write an explicit quote around a single-quoted string, or construct strings with `0` and `cons`/`swons`: +**Status: ❌ NOT IMPLEMENTED** — No implementation currently desugars `"..."` as specified. +Users must currently write quotes and character/integer sequences explicitly, or build cons chains with `0` and `cons`/`swons`: ``` -[ 'hi' ] /* equivalent target for future "hi" sugar */ -0 'i' swons 'h' swons /* manual cons chain for "hi" */ +[ 'hi' ] /* long form; future "hi" sugar lands here first */ +[ 'h' 'i' ] +[ 104 105 ] +0 'i' swons 'h' swons /* cons-chain "hi", includes terminating 0 */ ``` --- From 9f984d778589d9d7e900bee711b191a8bd455cdc Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 7 Apr 2026 03:48:19 +0000 Subject: [PATCH 3/8] feat(compiler): support double-quoted string literals in TS core Double-quoted tokens use the same unescapeString pipeline and per-character pushes as single-quoted literals (no implicit 0). Add Deno compile tests and a TAP .ffp suite under ff/lib/string/__tests__. Co-authored-by: Jayson Harshbarger --- deno/src/double_quote_strings_test.ts | 25 +++++++++++++++++++ .../__tests__/double-quoted-strings.test.ffp | 13 ++++++++++ typescript/core/src/compiler.ts | 14 ++++++++--- 3 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 deno/src/double_quote_strings_test.ts create mode 100644 ff/lib/string/__tests__/double-quoted-strings.test.ffp diff --git a/deno/src/double_quote_strings_test.ts b/deno/src/double_quote_strings_test.ts new file mode 100644 index 0000000..ffa9a1e --- /dev/null +++ b/deno/src/double_quote_strings_test.ts @@ -0,0 +1,25 @@ +import { assertEquals } from "std/assert/mod.ts"; + +import { Compiler } from "./compiler.ts"; +import { IROp } from "../../typescript/core/src/ir.ts"; + +function pushCodes(source: string): bigint[] { + const ir = new Compiler().compileToIR(Compiler.tokenize(source), "test.ff"); + return ir.filter((i) => i.op === IROp.push).map((i) => i.value); +} + +Deno.test("double-quoted strings compile like single-quoted (per-char pushes)", () => { + assertEquals(pushCodes('"hi"'), pushCodes("'hi'")); + assertEquals(pushCodes('"hi"'), [104n, 105n]); +}); + +Deno.test("double-quoted empty string emits no push literals", () => { + assertEquals(pushCodes('""'), []); +}); + +Deno.test("double-quoted strings use same escapes as single-quoted", () => { + assertEquals(pushCodes('"\\n"'), pushCodes("'\\n'")); + assertEquals(pushCodes('"\\n"'), [10n]); + assertEquals(pushCodes('"\\""'), [34n]); + assertEquals(pushCodes("'\\''"), [39n]); +}); diff --git a/ff/lib/string/__tests__/double-quoted-strings.test.ffp b/ff/lib/string/__tests__/double-quoted-strings.test.ffp new file mode 100644 index 0000000..bbd0812 --- /dev/null +++ b/ff/lib/string/__tests__/double-quoted-strings.test.ffp @@ -0,0 +1,13 @@ +.import ../string.ffp +.import ../../tap.ffp + +TAP-VERSION + +'\0Double-quoted\sstrings' SUBTEST + "hi" 'i' = swap 'h' = and OK + 42 dup "" = OK + '\n' "\n" = OK + '\"' "\"" = OK + 4 PLAN OK + +1 PLAN diff --git a/typescript/core/src/compiler.ts b/typescript/core/src/compiler.ts index e8dff12..2725555 100644 --- a/typescript/core/src/compiler.ts +++ b/typescript/core/src/compiler.ts @@ -197,12 +197,18 @@ export class Compiler { break; } } - } else if (ss[0] === "'" && ss.length > 1) { // String + } else if ( + (ss[0] === "'" || ss[0] === '"') && ss.length > 1 + ) { + // Single- and double-quoted string literals: same escapes (see strings.ts); + // each character becomes a separate push. Double quotes are sugar for the + // same per-character expansion as single quotes (no implicit 0). + const delim = ss[0] as "'" | '"'; unescapeString(ss) - .replace(/^'/, "") - .replace(/'$/, "") + .replace(delim === "'" ? /^'/ : /^"/, "") + .replace(delim === "'" ? /'$/ : /"$/, "") .split("") - .forEach(c => { + .forEach((c) => { push(c.charCodeAt(0), { comment: c }); }); } else if (ss.endsWith(":") && ss.length > 1) { // Definition From 366a09c7298071667c444efb02517e4ea5765b28 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 7 Apr 2026 03:59:34 +0000 Subject: [PATCH 4/8] fix(compiler): double-quoted strings compile as [ '...' ] quotations Emit BRA, per-character pushes (same as single-quoted), then KET so "hi" is a quotation; eval matches bare 'hi'. Accept standalone [ and ] tokens so [ 'hi' ] tokenizes like the sugar form. Update Deno IR tests, TAP tests (seq= vs [ 'hi' ], eval vs chars), and plan implementation status. Co-authored-by: Jayson Harshbarger --- _plans/ffm-better-strings.md | 2 +- deno/src/double_quote_strings_test.ts | 47 ++++++++++++++----- .../__tests__/double-quoted-strings.test.ffp | 12 +++-- typescript/core/src/compiler.ts | 29 ++++++++---- 4 files changed, 63 insertions(+), 27 deletions(-) diff --git a/_plans/ffm-better-strings.md b/_plans/ffm-better-strings.md index 4fe1819..45e25d6 100644 --- a/_plans/ffm-better-strings.md +++ b/_plans/ffm-better-strings.md @@ -112,7 +112,7 @@ Manual nil-terminated string **values** are still built with `0` and `swons`, fo That produces a **single** cons-chain pointer on the stack, not the quote `[ 'hello' ]`. Library words such as `sprint` expect evaluable quote bodies or cons chains per existing conventions. -**Status: ❌ NOT IMPLEMENTED** — No implementation currently desugars `"..."` as specified. +**Status: ✅ Implemented (TypeScript core compiler)** — `"..."` emits `BRA`, the same per-character pushes as `'...'`, then `KET`, matching tokenized `[ '...' ]`. Standalone `[` and `]` tokens are also accepted so that form parses as an explicit quotation. Users must currently write quotes and character/integer sequences explicitly, or build cons chains with `0` and `cons`/`swons`: ``` diff --git a/deno/src/double_quote_strings_test.ts b/deno/src/double_quote_strings_test.ts index ffa9a1e..b6cc660 100644 --- a/deno/src/double_quote_strings_test.ts +++ b/deno/src/double_quote_strings_test.ts @@ -2,24 +2,47 @@ import { assertEquals } from "std/assert/mod.ts"; import { Compiler } from "./compiler.ts"; import { IROp } from "../../typescript/core/src/ir.ts"; +import { OpCodes } from "../../typescript/core/src/opcodes.ts"; -function pushCodes(source: string): bigint[] { +function irSnapshot(source: string) { const ir = new Compiler().compileToIR(Compiler.tokenize(source), "test.ff"); - return ir.filter((i) => i.op === IROp.push).map((i) => i.value); + return ir.map((i) => ({ + op: i.op, + value: i.value, + })); } -Deno.test("double-quoted strings compile like single-quoted (per-char pushes)", () => { - assertEquals(pushCodes('"hi"'), pushCodes("'hi'")); - assertEquals(pushCodes('"hi"'), [104n, 105n]); +Deno.test('double-quoted string compiles like bracketed single-quoted', () => { + assertEquals( + irSnapshot('"hi"'), + irSnapshot("[ 'hi' ]"), + ); }); -Deno.test("double-quoted empty string emits no push literals", () => { - assertEquals(pushCodes('""'), []); +Deno.test("double-quoted string IR: BRA, character pushes, KET", () => { + const ir = new Compiler().compileToIR(Compiler.tokenize('"hi"'), "test.ff"); + assertEquals(ir.length, 4); + assertEquals(ir[0]?.op, IROp.call); + assertEquals(ir[0]?.value, BigInt(OpCodes.BRA)); + assertEquals(ir[1]?.op, IROp.push); + assertEquals(ir[1]?.value, 104n); + assertEquals(ir[2]?.op, IROp.push); + assertEquals(ir[2]?.value, 105n); + assertEquals(ir[3]?.op, IROp.call); + assertEquals(ir[3]?.value, BigInt(OpCodes.KET)); }); -Deno.test("double-quoted strings use same escapes as single-quoted", () => { - assertEquals(pushCodes('"\\n"'), pushCodes("'\\n'")); - assertEquals(pushCodes('"\\n"'), [10n]); - assertEquals(pushCodes('"\\""'), [34n]); - assertEquals(pushCodes("'\\''"), [39n]); +Deno.test('double-quoted empty string is empty quotation', () => { + const ir = new Compiler().compileToIR(Compiler.tokenize('""'), "test.ff"); + assertEquals(ir.length, 2); + assertEquals(ir[0]?.op, IROp.call); + assertEquals(ir[0]?.value, BigInt(OpCodes.BRA)); + assertEquals(ir[1]?.op, IROp.call); + assertEquals(ir[1]?.value, BigInt(OpCodes.KET)); +}); + +Deno.test("double-quoted escapes match [ '...' ] form", () => { + assertEquals(irSnapshot('"\\n"'), irSnapshot("[ '\\n' ]")); + // \" in double quotes is ASCII 34; '\'' in single quotes is ASCII 39 — same rules, different payload. + assertEquals(irSnapshot('"\\""'), irSnapshot("[ 34 ]")); }); diff --git a/ff/lib/string/__tests__/double-quoted-strings.test.ffp b/ff/lib/string/__tests__/double-quoted-strings.test.ffp index bbd0812..39f1acc 100644 --- a/ff/lib/string/__tests__/double-quoted-strings.test.ffp +++ b/ff/lib/string/__tests__/double-quoted-strings.test.ffp @@ -1,13 +1,15 @@ .import ../string.ffp +.import ../../seq/seq.ffp .import ../../tap.ffp TAP-VERSION '\0Double-quoted\sstrings' SUBTEST - "hi" 'i' = swap 'h' = and OK - 42 dup "" = OK - '\n' "\n" = OK - '\"' "\"" = OK - 4 PLAN OK + "hi" [ 'hi' ] seq= OK + "hi" eval 'i' = swap 'h' = and OK + "" [ ] seq= OK + '\n' "\n" eval = OK + '\"' "\"" eval = OK + 5 PLAN OK 1 PLAN diff --git a/typescript/core/src/compiler.ts b/typescript/core/src/compiler.ts index 2725555..945d794 100644 --- a/typescript/core/src/compiler.ts +++ b/typescript/core/src/compiler.ts @@ -197,20 +197,31 @@ export class Compiler { break; } } - } else if ( - (ss[0] === "'" || ss[0] === '"') && ss.length > 1 - ) { - // Single- and double-quoted string literals: same escapes (see strings.ts); - // each character becomes a separate push. Double quotes are sugar for the - // same per-character expansion as single quotes (no implicit 0). - const delim = ss[0] as "'" | '"'; + } else if (ss[0] === '"' && ss.length > 1) { + // Double-quoted strings: sugar for [ '...' ] — a quotation whose body is + // the same per-character pushes as a single-quoted literal (same escapes; + // no implicit 0). Eval'ing the quote matches running '...' at the call site. + call(OpCodes.BRA, { name: "[" }, currentToken); + const inner = unescapeString(ss) + .replace(/^"/, "") + .replace(/"$/, ""); + inner.split("").forEach((c) => { + push(c.charCodeAt(0), { comment: c }); + }); + call(OpCodes.KET, { name: "]" }, currentToken); + } else if (ss[0] === "'" && ss.length > 1) { + // Single-quoted string: one push per character (see strings.ts). unescapeString(ss) - .replace(delim === "'" ? /^'/ : /^"/, "") - .replace(delim === "'" ? /'$/ : /"$/, "") + .replace(/^'/, "") + .replace(/'$/, "") .split("") .forEach((c) => { push(c.charCodeAt(0), { comment: c }); }); + } else if (ss === "[") { + call(OpCodes.BRA, { name: "[" }, currentToken); + } else if (ss === "]") { + call(OpCodes.KET, { name: "]" }, currentToken); } else if (ss.endsWith(":") && ss.length > 1) { // Definition const name = ss.replace(/:$/, ""); push(this.getSymbol(name), { name: `${name}`, pointer: true }); From 39b4dd502a5c354f9621bf88590c6cfd25f50917 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 7 Apr 2026 04:18:36 +0000 Subject: [PATCH 5/8] feat(go,racket): double-quoted strings as [ '...' ] quotations Go: compile "..." as BRA + per-char pushes + KET; accept [ and ] tokens. Add compiler tests mirroring TS behavior. Racket: lex double-quoted strings (STR-DQ), standalone [ ] (BRA/KET); share ff-unescape with single-quoted strings. Extend brag grammar and compile/ runner/expander macros for STR-DQ and brackets. Update string plan implementation status. Co-authored-by: Jayson Harshbarger --- _plans/ffm-better-strings.md | 7 ++++--- go/src/compiler/compiler.go | 22 ++++++++++++++++++++ go/src/compiler/compiler_test.go | 31 ++++++++++++++++++++++++++++ racket/private/compiler.rkt | 14 ++++++++++++- racket/private/expander.rkt | 13 +++++++++++- racket/private/lexer.rkt | 11 +++++----- racket/private/parser.rkt | 5 ++++- racket/private/runner.rkt | 13 +++++++++++- racket/private/unescape.rkt | 35 ++++++++++++++++++++++++++++++++ 9 files changed, 139 insertions(+), 12 deletions(-) create mode 100644 racket/private/unescape.rkt diff --git a/_plans/ffm-better-strings.md b/_plans/ffm-better-strings.md index 45e25d6..92768fb 100644 --- a/_plans/ffm-better-strings.md +++ b/_plans/ffm-better-strings.md @@ -112,11 +112,12 @@ Manual nil-terminated string **values** are still built with `0` and `swons`, fo That produces a **single** cons-chain pointer on the stack, not the quote `[ 'hello' ]`. Library words such as `sprint` expect evaluable quote bodies or cons chains per existing conventions. -**Status: ✅ Implemented (TypeScript core compiler)** — `"..."` emits `BRA`, the same per-character pushes as `'...'`, then `KET`, matching tokenized `[ '...' ]`. Standalone `[` and `]` tokens are also accepted so that form parses as an explicit quotation. -Users must currently write quotes and character/integer sequences explicitly, or build cons chains with `0` and `cons`/`swons`: +**Status: ✅ Implemented** — TypeScript core, **Go** (`go/src/compiler/compiler.go`), and **Racket** (`racket/private/lexer.rkt`, `parser.rkt`, `compiler.rkt`, `runner.rkt`, `expander.rkt`): `"..."` emits `BRA`, the same per-character pushes as `'...'`, then `KET`, matching tokenized `[ '...' ]`. Standalone `[` and `]` tokens are accepted in Go and Racket as well. Racket’s lexer now applies the same **ff-unescape** rules as Go/TS for both `'` and `"` payloads (`racket/private/unescape.rkt`). + +Equivalent spellings and manual cons-chain construction: ``` -[ 'hi' ] /* long form; future "hi" sugar lands here first */ +[ 'hi' ] [ 'h' 'i' ] [ 104 105 ] 0 'i' swons 'h' swons /* cons-chain "hi", includes terminating 0 */ diff --git a/go/src/compiler/compiler.go b/go/src/compiler/compiler.go index c81825b..3a0c332 100644 --- a/go/src/compiler/compiler.go +++ b/go/src/compiler/compiler.go @@ -231,6 +231,28 @@ func compileToIR( } } else if strings.HasPrefix(element, "[") && strings.HasSuffix(element, "]") { push(getSymbol(element[1:len(element)-1]), element) + } else if element == "[" { + call(NewInt(OP_BRA), "[") + } else if element == "]" { + call(NewInt(OP_KET), "]") + } else if strings.HasPrefix(element, "\"") { + // Double-quoted: sugar for [ '...' ] — BRA, per-char pushes (same escapes as + // single-quoted), KET. No implicit 0. + l := 0 + if strings.HasSuffix(element, "\"") && len(element) > 1 { + l++ + } + s := convertEsc2Char(element[1 : len(element)-l]) + call(NewInt(OP_BRA), "[") + for i := 0; i < len(s); i++ { + v := NewInt(int64(s[i])) + if i == 0 { + push(v, s) + } else { + push(v, "") + } + } + call(NewInt(OP_KET), "]") } else if strings.HasPrefix(element, "'") { l := 0 if strings.HasSuffix(element, "'") { diff --git a/go/src/compiler/compiler_test.go b/go/src/compiler/compiler_test.go index 191a27e..2dd60a6 100644 --- a/go/src/compiler/compiler_test.go +++ b/go/src/compiler/compiler_test.go @@ -13,6 +13,37 @@ func resetCompilerStateForTest() { code = -1 } +func TestCompileDoubleQuotedStringAsQuotation(t *testing.T) { + resetCompilerStateForTest() + Setup() + + irDQ := CompileToIR(Tokenize(`"hi"`), "") + irBQ := CompileToIR(Tokenize("[ 'hi' ]"), "") + if len(irDQ) != len(irBQ) { + t.Fatalf("expected same IR length, got %d vs %d", len(irDQ), len(irBQ)) + } + for i := range irDQ { + if irDQ[i].op != irBQ[i].op || irDQ[i].value.Cmp(irBQ[i].value) != 0 { + t.Fatalf("IR mismatch at %d: %+v vs %+v", i, irDQ[i], irBQ[i]) + } + } + if len(irDQ) != 4 || irDQ[0].op != "call" || irDQ[0].value.Cmp(big.NewInt(OP_BRA)) != 0 || + irDQ[3].op != "call" || irDQ[3].value.Cmp(big.NewInt(OP_KET)) != 0 { + t.Fatalf("expected BRA push push KET, got %+v", irDQ) + } +} + +func TestCompileEmptyDoubleQuotedString(t *testing.T) { + resetCompilerStateForTest() + Setup() + + ir := CompileToIR(Tokenize(`""`), "") + if len(ir) != 2 || ir[0].op != "call" || ir[0].value.Cmp(big.NewInt(OP_BRA)) != 0 || + ir[1].op != "call" || ir[1].value.Cmp(big.NewInt(OP_KET)) != 0 { + t.Fatalf("expected empty quote BRA KET, got %+v", ir) + } +} + func TestCompileConsAsSystemWord(t *testing.T) { resetCompilerStateForTest() Setup() diff --git a/racket/private/compiler.rkt b/racket/private/compiler.rkt index 37cde53..e8c89da 100644 --- a/racket/private/compiler.rkt +++ b/racket/private/compiler.rkt @@ -28,4 +28,16 @@ (define cmds (flatten (map (lambda (x) (list x 0)) chars))) #`(list #,@cmds)) -(provide ff-program ff-marker ff-push ff-call ff-string) +;; Double-quoted: sugar for [ '...' ] — BRA, per-char pushes, KET (same escapes as STR). +(define-macro (ff-string-dq STR) + (define chars (map char->integer (string->list (syntax->datum #'STR)))) + (define cmds (flatten (map (lambda (x) (list x 0)) chars))) + #`(list op_bra 1 #,@cmds op_ket 1)) + +(define-macro (ff-bra . _) + #'(list op_bra 1)) + +(define-macro (ff-ket . _) + #'(list op_ket 1)) + +(provide ff-program ff-marker ff-push ff-call ff-string ff-string-dq ff-bra ff-ket) diff --git a/racket/private/expander.rkt b/racket/private/expander.rkt index b20e099..1bcfecd 100644 --- a/racket/private/expander.rkt +++ b/racket/private/expander.rkt @@ -57,4 +57,15 @@ (with-pattern ([(INTS ...) ints]) #`(begin (push INTS) ...))) -(provide ff-program ff-marker ff-push ff-call ff-string) +(define-macro (ff-string-dq STR) + (define ints (map char->integer (string->list (syntax->datum #'STR)))) + (with-pattern ([(INTS ...) ints]) + #`(begin (call op_bra) (push INTS) ... (call op_ket)))) + +(define-macro (ff-bra . _) + #'(call op_bra)) + +(define-macro (ff-ket . _) + #'(call op_ket)) + +(provide ff-program ff-marker ff-push ff-call ff-string ff-string-dq ff-bra ff-ket) diff --git a/racket/private/lexer.rkt b/racket/private/lexer.rkt index ef765e6..9471baf 100644 --- a/racket/private/lexer.rkt +++ b/racket/private/lexer.rkt @@ -1,7 +1,7 @@ #lang racket/base (require racket/string brag/support) -(require "ops.rkt" "symbols.rkt") +(require "ops.rkt" "symbols.rkt" "unescape.rkt") (define user_symbols (make-hash)) @@ -44,20 +44,21 @@ (define-lex-abbrev oct (:: (:or "-" "") "0o" (:+ (:or "_" "0" "1" "2" "3" "4" "5" "6" "7")))) (define-lex-abbrev comment (from/to "/*" "*/")) (define-lex-abbrev string (from/to "\'" "\'")) +(define-lex-abbrev string-dq (from/to "\"" "\"")) (define-lex-abbrev pointer (:: "[" id "]")) (define-lex-abbrev marker (:: id ":")) (define-lex-abbrev command (:: "." id)) -(define (unescape str) - (string-replace (string-replace (string-replace str "\\t" "\t") "\\s" " ") "\\n" "\n")) - (define ff-lexer (lexer [comment (token 'COMMENT lexeme #:skip? #t)] [whitespace (token 'WHITESPACE lexeme #:skip? #t)] [command (token 'COMMENT lexeme #:skip? #t)] - [string (token 'STR (unescape (string-trim lexeme "\'")))] + [string (token 'STR (ff-unescape (string-trim lexeme "\'")))] + [string-dq (token 'STR-DQ (ff-unescape (substring lexeme 1 (sub1 (string-length lexeme)))))] [pointer (token 'PUSH (lookup (trim-ends "[" lexeme "]")))] + ["[" (token 'BRA #\[)] + ["]" (token 'KET #\])] [marker (token 'MRKR (lookup (string-trim lexeme ":")))] [decimal (token 'PUSH (->number lexeme))] [bin (token 'PUSH (->number lexeme))] diff --git a/racket/private/parser.rkt b/racket/private/parser.rkt index ffb6dca..158e2ea 100644 --- a/racket/private/parser.rkt +++ b/racket/private/parser.rkt @@ -1,9 +1,12 @@ #lang brag -ff-program : (ff-call | ff-push | ff-string | ff-marker)* +ff-program : (ff-call | ff-push | ff-string | ff-string-dq | ff-marker | ff-bra | ff-ket)* ff-marker : MRKR ff-string : STR +ff-string-dq : STR-DQ +ff-bra : BRA +ff-ket : KET ff-call : CALL ff-push : PUSH \ No newline at end of file diff --git a/racket/private/runner.rkt b/racket/private/runner.rkt index 43f7784..9c6fa40 100644 --- a/racket/private/runner.rkt +++ b/racket/private/runner.rkt @@ -25,4 +25,15 @@ (define cmds (flatten (map (lambda (x) (list x 0)) chars))) #`(list #,@cmds)) -(provide ff-program ff-marker ff-push ff-call ff-string) +(define-macro (ff-string-dq STR) + (define chars (map char->integer (string->list (syntax->datum #'STR)))) + (define cmds (flatten (map (lambda (x) (list x 0)) chars))) + #`(list op_bra 1 #,@cmds op_ket 1)) + +(define-macro (ff-bra . _) + #'(list op_bra 1)) + +(define-macro (ff-ket . _) + #'(list op_ket 1)) + +(provide ff-program ff-marker ff-push ff-call ff-string ff-string-dq ff-bra ff-ket) diff --git a/racket/private/unescape.rkt b/racket/private/unescape.rkt new file mode 100644 index 0000000..ee61dc8 --- /dev/null +++ b/racket/private/unescape.rkt @@ -0,0 +1,35 @@ +#lang racket/base + +;; Match Go / TypeScript F♭m string escapes for lexer preprocessing. + +(define (hex->char hex) + (define n (string->number hex 16)) + (cond + [(not n) (string-append "[hex error: " hex "]")] + [(<= n #xFFFF) (string (integer->char n))] + [(<= n #x10FFFF) + (define adj (- n #x10000)) + (string (integer->char (bitwise-ior #xD800 (arithmetic-shift adj -10))) + (integer->char (bitwise-ior #xDC00 (bitwise-and adj #x3FF))))] + [else (string-append "[hex error: out of range]")])) + +(define (ff-unescape str) + (define s str) + (set! s (regexp-replace* #px"\\\\U([0-9a-fA-F]{8})" s + (lambda (_ m) (hex->char m)))) + (set! s (regexp-replace* #px"\\\\u([0-9a-fA-F]{4})" s + (lambda (_ m) (hex->char m)))) + (set! s (string-replace s "\\0" "\u0000")) + (set! s (string-replace s "\\b" "\b")) + (set! s (string-replace s "\\t" "\t")) + (set! s (string-replace s "\\n" "\n")) + (set! s (string-replace s "\\v" "\v")) + (set! s (string-replace s "\\f" "\f")) + (set! s (string-replace s "\\r" "\r")) + (set! s (string-replace s "\\'" "'")) + (set! s (string-replace s "\\\"" "\"")) + (set! s (string-replace s "\\s" " ")) + (set! s (string-replace s "\\\\" "\\")) + s) + +(provide ff-unescape) From 3e2acdd77d304862c996198a8782923c593b139c Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 7 Apr 2026 04:26:42 +0000 Subject: [PATCH 6/8] feat(python,ruby,dart): double-quoted strings as [ '...' ] sugar Expand "..." at tokenize time to '[', Unicode code units as integers, and ']', matching other compilers' BRA/push/KET shape. - python/execute.py, ruby/execute.rb: tokenizer flat_map expansion - dart/bin/dart.dart: tokenize() same expansion Add small verification scripts (python test_double_quote_tokenize.py, ruby test_double_quote_stdout.rb, dart run tool/check_double_quote.dart). Update string plan implementation list. Co-authored-by: Jayson Harshbarger --- _plans/ffm-better-strings.md | 2 +- dart/bin/dart.dart | 22 +++++++++++++++++++--- dart/tool/check_double_quote.dart | 22 ++++++++++++++++++++++ python/execute.py | 17 +++++++++++++++-- python/test_double_quote_tokenize.py | 15 +++++++++++++++ ruby/execute.rb | 12 +++++++++++- ruby/test_double_quote_stdout.rb | 11 +++++++++++ 7 files changed, 94 insertions(+), 7 deletions(-) create mode 100644 dart/tool/check_double_quote.dart create mode 100644 python/test_double_quote_tokenize.py create mode 100644 ruby/test_double_quote_stdout.rb diff --git a/_plans/ffm-better-strings.md b/_plans/ffm-better-strings.md index 92768fb..88adb9c 100644 --- a/_plans/ffm-better-strings.md +++ b/_plans/ffm-better-strings.md @@ -112,7 +112,7 @@ Manual nil-terminated string **values** are still built with `0` and `swons`, fo That produces a **single** cons-chain pointer on the stack, not the quote `[ 'hello' ]`. Library words such as `sprint` expect evaluable quote bodies or cons chains per existing conventions. -**Status: ✅ Implemented** — TypeScript core, **Go** (`go/src/compiler/compiler.go`), and **Racket** (`racket/private/lexer.rkt`, `parser.rkt`, `compiler.rkt`, `runner.rkt`, `expander.rkt`): `"..."` emits `BRA`, the same per-character pushes as `'...'`, then `KET`, matching tokenized `[ '...' ]`. Standalone `[` and `]` tokens are accepted in Go and Racket as well. Racket’s lexer now applies the same **ff-unescape** rules as Go/TS for both `'` and `"` payloads (`racket/private/unescape.rkt`). +**Status: ✅ Implemented** — **TypeScript core** (Node/Bun/Deno/web), **Go** (`go/src/compiler/compiler.go`), **Racket** (lexer/parser/compiler/runner/expander + `racket/private/unescape.rkt`), **Python** (`python/execute.py` tokenizer expands `"..."` to `[`, integer char codes, `]`), **Ruby** (`ruby/execute.rb` same tokenizer expansion), **Dart** (`dart/bin/dart.dart` `tokenize` same expansion before `ev`). In TS/Go/Racket the IR is BRA + pushes + KET; in Python/Ruby/Dart the queue receives the equivalent token sequence (these interpreters already treat `[` as opening a quotation and int tokens as pushes). Equivalent spellings and manual cons-chain construction: diff --git a/dart/bin/dart.dart b/dart/bin/dart.dart index 50b2d0b..d60f9ce 100644 --- a/dart/bin/dart.dart +++ b/dart/bin/dart.dart @@ -384,9 +384,25 @@ void callOp(BigInt code) { } List tokenize(String s) { - return s.split(RegExp(r"\s+")) - .where((ss) => ss.trim() != '') - .toList(); + final raw = s + .split(RegExp(r"\s+")) + .where((ss) => ss.trim() != '') + .toList(); + final out = []; + for (final t in raw) { + // Sugar: "..." is [ '...' ] — '[', decimal char codes, ']' + if (t.length > 1 && t.startsWith('"') && t.endsWith('"')) { + out.add('['); + final inner = t.substring(1, t.length - 1); + for (final unit in unescapeQuotedString(inner).codeUnits) { + out.add(unit.toString()); + } + out.add(']'); + } else { + out.add(t); + } + } + return out; } void ev() { diff --git a/dart/tool/check_double_quote.dart b/dart/tool/check_double_quote.dart new file mode 100644 index 0000000..0db8e50 --- /dev/null +++ b/dart/tool/check_double_quote.dart @@ -0,0 +1,22 @@ +// Run: cd dart && dart run tool/check_double_quote.dart +import 'dart:io'; + +import '../bin/dart.dart' as ff; + +void main() { + final hi = ff.tokenize('"hi"'); + if (hi.length != 4 || + hi[0] != '[' || + hi[1] != '104' || + hi[2] != '105' || + hi[3] != ']') { + stderr.writeln('tokenize("hi") expected [,104,105,], got $hi'); + exit(1); + } + final empty = ff.tokenize('""'); + if (empty.length != 2 || empty[0] != '[' || empty[1] != ']') { + stderr.writeln('tokenize("") expected [,], got $empty'); + exit(1); + } + stdout.writeln('ok'); +} diff --git a/python/execute.py b/python/execute.py index 2fab02c..f327b8b 100755 --- a/python/execute.py +++ b/python/execute.py @@ -309,9 +309,22 @@ def token(s): else: return s +def expand_double_quoted_string_token(s): + """Sugar: \"...\" is [ '...' ] — expand to '[', char code pushes, ']'.""" + if not (isinstance(s, str) and len(s) > 1 and s.startswith('"') and s.endswith('"')): + return [s] + inner = unescape(s[1:-1]) + return ['['] + [ord(c) for c in inner] + [']'] + def tokenize(text): - a = text.split() - return list(map(token, a)) + out = [] + for part in text.split(): + for piece in expand_double_quoted_string_token(part): + if isinstance(piece, str): + out.append(token(piece)) + else: + out.append(piece) + return out def run(): global queue diff --git a/python/test_double_quote_tokenize.py b/python/test_double_quote_tokenize.py new file mode 100644 index 0000000..7386d8c --- /dev/null +++ b/python/test_double_quote_tokenize.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +"""Quick check: double-quoted strings tokenize like [ '...' ] with int codes.""" +import importlib.util +from pathlib import Path + +root = Path(__file__).resolve().parent +spec = importlib.util.spec_from_file_location("ff_execute", root / "execute.py") +mod = importlib.util.module_from_spec(spec) +assert spec.loader is not None +spec.loader.exec_module(mod) + +assert mod.tokenize('"hi"') == ["[", 104, 105, "]"], mod.tokenize('"hi"') +assert mod.tokenize('""') == ["[", "]"], mod.tokenize('""') +assert mod.tokenize('a "x" b') == ["a", "[", ord("x"), "]", "b"] +print("ok") diff --git a/ruby/execute.rb b/ruby/execute.rb index 244ac2a..ce883c3 100755 --- a/ruby/execute.rb +++ b/ruby/execute.rb @@ -193,9 +193,19 @@ def token (x) return x end +def expand_double_quoted_string_token(s) + # Sugar: "..." is [ '...' ] — '[', char codes, ']' + return [s] unless s.is_a?(String) && s.length > 1 && s.start_with?('"') && s.end_with?('"') + inner = unescape(s[1..-2]) + ['['] + inner.each_char.map(&:ord) + [']'] +end + def tokenize (str) str = str.dup.force_encoding(Encoding::UTF_8) - return str.gsub(/\s+/m, ' ').strip.split(" ").map { |s| token(s) } + parts = str.gsub(/\s+/m, ' ').strip.split(' ') + parts.flat_map { |s| expand_double_quoted_string_token(s) }.map do |x| + x.is_a?(String) ? token(x) : x + end end defineSystem('nop', lambda {|| diff --git a/ruby/test_double_quote_stdout.rb b/ruby/test_double_quote_stdout.rb new file mode 100644 index 0000000..ee754db --- /dev/null +++ b/ruby/test_double_quote_stdout.rb @@ -0,0 +1,11 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require 'open3' + +exe = File.expand_path('execute.rb', __dir__) +out, err, status = Open3.capture3('ruby', exe, stdin_data: "\"hi\" eval dup putn swap putn\n") +raise "unexpected stderr: #{err}" unless err.empty? +raise "expected 105104, got #{out.inspect}" unless out == '105104' +raise "exit #{status}" unless status.success? +puts 'ok' From ae67a857dd5f7ab862cb6a7980ba46c8fc394d6a Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 7 Apr 2026 04:36:58 +0000 Subject: [PATCH 7/8] refactor(python,ruby): expand double-quoted strings in run(), not tokenizer Keep "..." as a single string token from tokenize(); prepend [ char-codes ] to the queue when run() processes it. Update plan doc and Python self-check. Co-authored-by: Jayson Harshbarger --- _plans/ffm-better-strings.md | 2 +- python/execute.py | 20 +++++--------------- python/test_double_quote_tokenize.py | 20 ++++++++++++++++---- ruby/execute.rb | 17 ++++++----------- 4 files changed, 28 insertions(+), 31 deletions(-) diff --git a/_plans/ffm-better-strings.md b/_plans/ffm-better-strings.md index 88adb9c..7586791 100644 --- a/_plans/ffm-better-strings.md +++ b/_plans/ffm-better-strings.md @@ -112,7 +112,7 @@ Manual nil-terminated string **values** are still built with `0` and `swons`, fo That produces a **single** cons-chain pointer on the stack, not the quote `[ 'hello' ]`. Library words such as `sprint` expect evaluable quote bodies or cons chains per existing conventions. -**Status: ✅ Implemented** — **TypeScript core** (Node/Bun/Deno/web), **Go** (`go/src/compiler/compiler.go`), **Racket** (lexer/parser/compiler/runner/expander + `racket/private/unescape.rkt`), **Python** (`python/execute.py` tokenizer expands `"..."` to `[`, integer char codes, `]`), **Ruby** (`ruby/execute.rb` same tokenizer expansion), **Dart** (`dart/bin/dart.dart` `tokenize` same expansion before `ev`). In TS/Go/Racket the IR is BRA + pushes + KET; in Python/Ruby/Dart the queue receives the equivalent token sequence (these interpreters already treat `[` as opening a quotation and int tokens as pushes). +**Status: ✅ Implemented** — **TypeScript core** (Node/Bun/Deno/web), **Go** (`go/src/compiler/compiler.go`), **Racket** (lexer/parser/compiler/runner/expander + `racket/private/unescape.rkt`), **Python** (`python/execute.py`: `run()` prepends `[`, integer char codes, `]` when it sees a `"..."` token — tokenizer leaves the token intact), **Ruby** (`ruby/execute.rb`: same in `run`), **Dart** (`dart/bin/dart.dart`: expansion in `tokenize()` before `ev`). In TS/Go/Racket the IR is BRA + pushes + KET; Python/Ruby/Dart feed the VM the equivalent queue sequence (`[` … codes … `]`). Equivalent spellings and manual cons-chain construction: diff --git a/python/execute.py b/python/execute.py index f327b8b..a6d5586 100755 --- a/python/execute.py +++ b/python/execute.py @@ -309,22 +309,8 @@ def token(s): else: return s -def expand_double_quoted_string_token(s): - """Sugar: \"...\" is [ '...' ] — expand to '[', char code pushes, ']'.""" - if not (isinstance(s, str) and len(s) > 1 and s.startswith('"') and s.endswith('"')): - return [s] - inner = unescape(s[1:-1]) - return ['['] + [ord(c) for c in inner] + [']'] - def tokenize(text): - out = [] - for part in text.split(): - for piece in expand_double_quoted_string_token(part): - if isinstance(piece, str): - out.append(token(piece)) - else: - out.append(piece) - return out + return list(map(token, text.split())) def run(): global queue @@ -334,6 +320,10 @@ def run(): if type(s) == int: stack.append(s) + elif isinstance(s, str) and len(s) > 1 and s.startswith('"') and s.endswith('"'): + # Sugar: "..." is [ '...' ] — prepend '[', char codes, ']' to the queue. + inner = unescape(s[1:-1]) + queue = ['['] + [ord(c) for c in inner] + [']'] + queue elif s.startswith('.') and len(s) > 1: continue elif s.startswith('[') and s.endswith(']'): diff --git a/python/test_double_quote_tokenize.py b/python/test_double_quote_tokenize.py index 7386d8c..fdd7b58 100644 --- a/python/test_double_quote_tokenize.py +++ b/python/test_double_quote_tokenize.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 -"""Quick check: double-quoted strings tokenize like [ '...' ] with int codes.""" +"""Double-quoted sugar: tokenizer keeps \"...\" as one token; run() expands to [ '...' ].""" import importlib.util +import io +import sys from pathlib import Path root = Path(__file__).resolve().parent @@ -9,7 +11,17 @@ assert spec.loader is not None spec.loader.exec_module(mod) -assert mod.tokenize('"hi"') == ["[", 104, 105, "]"], mod.tokenize('"hi"') -assert mod.tokenize('""') == ["[", "]"], mod.tokenize('""') -assert mod.tokenize('a "x" b') == ["a", "[", ord("x"), "]", "b"] +# Tokenizer must not expand double quotes +assert mod.tokenize('"hi"') == ['"hi"'], mod.tokenize('"hi"') +assert mod.tokenize('a "x" b') == ['a', '"x"', 'b'] + +# Runner expands and executes like [ 'hi' ] eval ... +old_stdout = sys.stdout +sys.stdout = buf = io.StringIO() +try: + mod.queue = mod.tokenize('"hi" eval dup putn swap putn') + mod.run() +finally: + sys.stdout = old_stdout +assert buf.getvalue() == "105104", repr(buf.getvalue()) print("ok") diff --git a/ruby/execute.rb b/ruby/execute.rb index ce883c3..bb9238a 100755 --- a/ruby/execute.rb +++ b/ruby/execute.rb @@ -139,6 +139,11 @@ def run () if item.is_a?(Numeric) $stack.push item + elsif item.is_a?(String) && item.length > 1 && item.start_with?('"') && item.end_with?('"') + # Sugar: "..." is [ '...' ] — prepend '[', char codes, ']' + inner = unescape(item[1..-2]) + expanded = ['['] + inner.each_char.map(&:ord) + [']'] + $queue = expanded + $queue elsif item[0] == "." && item.length() > 1 # no-op elsif item[0] == "'" @@ -193,19 +198,9 @@ def token (x) return x end -def expand_double_quoted_string_token(s) - # Sugar: "..." is [ '...' ] — '[', char codes, ']' - return [s] unless s.is_a?(String) && s.length > 1 && s.start_with?('"') && s.end_with?('"') - inner = unescape(s[1..-2]) - ['['] + inner.each_char.map(&:ord) + [']'] -end - def tokenize (str) str = str.dup.force_encoding(Encoding::UTF_8) - parts = str.gsub(/\s+/m, ' ').strip.split(' ') - parts.flat_map { |s| expand_double_quoted_string_token(s) }.map do |x| - x.is_a?(String) ? token(x) : x - end + return str.gsub(/\s+/m, ' ').strip.split(' ').map { |s| token(s) } end defineSystem('nop', lambda {|| From 424aabc117418506eb641055faca899296cfb80a Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 7 Apr 2026 04:43:37 +0000 Subject: [PATCH 8/8] refactor(dart): expand double quotes in ev(); whitespace-only tokenize Remove string expansion from tokenize(); handle "..." in ev() like Python/Ruby by prepending [ code-unit tokens ] to the queue. Update check script and plan doc. Co-authored-by: Jayson Harshbarger --- _plans/ffm-better-strings.md | 2 +- dart/bin/dart.dart | 33 +++++++++++-------------- dart/tool/check_double_quote.dart | 41 ++++++++++++++++++++++--------- 3 files changed, 46 insertions(+), 30 deletions(-) diff --git a/_plans/ffm-better-strings.md b/_plans/ffm-better-strings.md index 7586791..b9053f9 100644 --- a/_plans/ffm-better-strings.md +++ b/_plans/ffm-better-strings.md @@ -112,7 +112,7 @@ Manual nil-terminated string **values** are still built with `0` and `swons`, fo That produces a **single** cons-chain pointer on the stack, not the quote `[ 'hello' ]`. Library words such as `sprint` expect evaluable quote bodies or cons chains per existing conventions. -**Status: ✅ Implemented** — **TypeScript core** (Node/Bun/Deno/web), **Go** (`go/src/compiler/compiler.go`), **Racket** (lexer/parser/compiler/runner/expander + `racket/private/unescape.rkt`), **Python** (`python/execute.py`: `run()` prepends `[`, integer char codes, `]` when it sees a `"..."` token — tokenizer leaves the token intact), **Ruby** (`ruby/execute.rb`: same in `run`), **Dart** (`dart/bin/dart.dart`: expansion in `tokenize()` before `ev`). In TS/Go/Racket the IR is BRA + pushes + KET; Python/Ruby/Dart feed the VM the equivalent queue sequence (`[` … codes … `]`). +**Status: ✅ Implemented** — **TypeScript core** (Node/Bun/Deno/web), **Go** (`go/src/compiler/compiler.go`), **Racket** (lexer/parser/compiler/runner/expander + `racket/private/unescape.rkt`), **Python** (`python/execute.py`: `run()` prepends `[`, integer char codes, `]` when it sees a `"..."` token — tokenizer leaves the token intact), **Ruby** (`ruby/execute.rb`: same in `run`), **Dart** (`dart/bin/dart.dart`: `ev()` prepends `[`, UTF-16 code unit strings, `]` when dequeuing a `"..."` token; `tokenize()` splits on whitespace only). In TS/Go/Racket the IR is BRA + pushes + KET; Python/Ruby/Dart feed the VM the equivalent queue sequence (`[` … codes … `]`). Equivalent spellings and manual cons-chain construction: diff --git a/dart/bin/dart.dart b/dart/bin/dart.dart index d60f9ce..6e7106b 100644 --- a/dart/bin/dart.dart +++ b/dart/bin/dart.dart @@ -383,26 +383,12 @@ void callOp(BigInt code) { } } +/// Whitespace-only split (no string or comment awareness). List tokenize(String s) { - final raw = s - .split(RegExp(r"\s+")) - .where((ss) => ss.trim() != '') + return s + .split(RegExp(r'\s+')) + .where((ss) => ss.trim().isNotEmpty) .toList(); - final out = []; - for (final t in raw) { - // Sugar: "..." is [ '...' ] — '[', decimal char codes, ']' - if (t.length > 1 && t.startsWith('"') && t.endsWith('"')) { - out.add('['); - final inner = t.substring(1, t.length - 1); - for (final unit in unescapeQuotedString(inner).codeUnits) { - out.add(unit.toString()); - } - out.add(']'); - } else { - out.add(t); - } - } - return out; } void ev() { @@ -424,6 +410,17 @@ void ev() { var chars = unescapeQuotedString(text.substring(1, end)).split(''); var asc = chars.map((c) => BigInt.from(c.codeUnitAt(0))).toList(); stack.addAll(asc); + } else if (text.length > 1 && + text.startsWith('"') && + text.endsWith('"')) { + // Sugar: "..." is [ '...' ] — prepend '[', char code tokens, ']' + final inner = text.substring(1, text.length - 1); + final expanded = [ + '[', + ...unescapeQuotedString(inner).codeUnits.map((u) => u.toString()), + ']', + ]; + pushFrontQueueAll(expanded); } else if (symbols[text.toLowerCase()] != null) { var code = getSymbol(text); callOp(code); diff --git a/dart/tool/check_double_quote.dart b/dart/tool/check_double_quote.dart index 0db8e50..6746a67 100644 --- a/dart/tool/check_double_quote.dart +++ b/dart/tool/check_double_quote.dart @@ -1,22 +1,41 @@ // Run: cd dart && dart run tool/check_double_quote.dart +import 'dart:convert'; import 'dart:io'; import '../bin/dart.dart' as ff; -void main() { - final hi = ff.tokenize('"hi"'); - if (hi.length != 4 || - hi[0] != '[' || - hi[1] != '104' || - hi[2] != '105' || - hi[3] != ']') { - stderr.writeln('tokenize("hi") expected [,104,105,], got $hi'); +void main() async { + // Tokenizer: whitespace-only split; "..." stays one token + final hiTok = ff.tokenize('"hi"'); + if (hiTok.length != 1 || hiTok[0] != '"hi"') { + stderr.writeln('tokenize expected one token for "\\"hi\\"", got $hiTok'); exit(1); } - final empty = ff.tokenize('""'); - if (empty.length != 2 || empty[0] != '[' || empty[1] != ']') { - stderr.writeln('tokenize("") expected [,], got $empty'); + final multi = ff.tokenize('a \t "x" \n b'); + if (multi.length != 3 || multi[0] != 'a' || multi[1] != '"x"' || multi[2] != 'b') { + stderr.writeln('whitespace tokenize broken, got $multi'); exit(1); } + + final pkgRoot = File(Platform.script.toFilePath()).parent.parent.path; + final p = await Process.start( + 'dart', + ['run', 'bin/dart.dart'], + workingDirectory: pkgRoot, + ); + p.stdin.add(utf8.encode('"hi" eval dup putn swap putn\n')); + await p.stdin.close(); + final out = (await p.stdout.transform(utf8.decoder).join()).trim(); + final err = (await p.stderr.transform(utf8.decoder).join()).trim(); + final code = await p.exitCode; + if (code != 0) { + stderr.writeln('dart run failed ($code): $err'); + exit(1); + } + if (out != '105104') { + stderr.writeln('expected stdout 105104, got ${jsonEncode(out)}'); + exit(1); + } + stdout.writeln('ok'); }