From 22a1563dca5dd03a5db3b6b3bd0d25d36f51e9e8 Mon Sep 17 00:00:00 2001 From: oddessentials Date: Wed, 3 Dec 2025 22:16:09 -0500 Subject: [PATCH 1/3] refactor: add implementation instructions.md --- instructions.md | 197 +++++++++++++++++++++++++++++++++++++++ src/fixture-helpers.ts | 1 + src/index.ts | 1 + src/run-verify.ts | 1 + src/schemas/architect.ts | 1 + src/schemas/coder.ts | 1 + src/schemas/planner.ts | 1 + src/schemas/reviewer.ts | 1 + 8 files changed, 204 insertions(+) create mode 100644 instructions.md diff --git a/instructions.md b/instructions.md new file mode 100644 index 0000000..07cc6b8 --- /dev/null +++ b/instructions.md @@ -0,0 +1,197 @@ +# **Fixture Kit – Team Implementation Guide** + +**How to Build Complete Fixtures for Every Topic and Task** + +This guide explains exactly how to create fixtures under `/fixtures/` for all topics, tasks, and agents described in the master JSON. The goal is a fully populated, schema-valid, deterministic fixture suite. + +Follow this process consistently for every task. + +--- + +# **1. Directory Layout (Authoritative)** + +Each task must follow this structure: + +``` +fixtures/ +└── / + └── -/ + ├── architect/ + │ ├── prompt.md + │ ├── expected.json + │ ├── verify.ts + │ └── repo/ # optional + ├── planner/ + │ ├── prompt.md + │ ├── expected.json + │ ├── verify.ts + │ └── repo/ + ├── coder/ + │ ├── prompt.md + │ ├── expected.patch + │ ├── verify.ts + │ └── repo/ + └── reviewer/ + ├── prompt.md + ├── expected.json + ├── verify.ts + └── repo/ +``` + +**Notes:** + +* `` and `` come directly from the master JSON. +* Adding a readable slug (`.../task-001-is-even/`) is recommended. +* `repo/` is included only when a scenario requires source files. + +--- + +# **2. The Repeatable 5-Step Process (Do This for Every Task)** + +| Step | Action | Files | Criteria | +| ----- | -------------------------------------- | ------------------------------------------ | -------------------------------------------------- | +| **1** | Read the task description in the JSON | — | Understand what each agent must demonstrate | +| **2** | Create the directory skeleton | `fixtures///` + agent folders | IDs match JSON exactly | +| **3** | Write `prompt.md` for each agent | 4 prompts | Contains only what that agent receives as input | +| **4** | Write golden expected output | `expected.json` or `expected.patch` | Must pass Zod schema validation | +| **5** | Write `verify.ts` using shared helpers | 4 verifiers | `npm run verify` produces `OK` for all four agents | + +--- + +# **3. `verify.ts` Template (Use for All Agents)** + +Swap the helper depending on the agent: + +```ts +import type { VerifyCtx, VerifyResult } from "test-fixtures/fixture-helpers"; +import { verifyArchitect } from "test-fixtures/fixture-helpers"; // or verifyPlanner, verifyCoder, verifyReviewer + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyArchitect(ctx, (parsed, ctx) => { + // Add scenario-specific semantic checks here. + // Schema validation is handled by the helper. + return { ok: true }; + }); +} +``` + +**You only add semantic rules** specific to that scenario (task count, complexity, allowed file targets, blocking comments, etc.). + +--- + +# **4. Workflow for Contributors** + +Recommended daily process: + +```bash +git pull +git checkout -b fixtures// + +# create the prompts, expected outputs, verify.ts, and repo/ if required + +npm run verify # fails until all four agents are implemented +npm run verify -- --update # only when confident the goldens are correct + +git add . +git commit -m "feat(fixtures): add / full fixture suite" +git push -u origin HEAD +# open PR; CI runs "npm run ci" +``` + +**Important:** +Use `--update` only when intentionally regenerating goldens (e.g., after updating prompts or expected outputs). + +--- + +# **5. Rules to Follow for All Fixtures** + +### General + +* Do not invent APIs, files, fields, or behaviors not present in `repo/`. +* No edits to forbidden paths unless explicitly allowed: + `dist/`, `node_modules/`, `.swarm/`, `build/`, `.git/`, `.env`, sensitive configs. +* Patch must be minimal, targeted, and atomic. +* JSON must be pretty-printed with 2 spaces. +* `expected.patch` must apply cleanly (`git apply --check`). + +### Architect + +* Must conform to `ArchitectSpecSchema`. +* Must reflect the constraints and scope defined in the JSON. + +### Planner + +* Must conform to `plannerOutputSchema`. +* Must respect scenario constraints (maxTasks, complexity caps, cycle avoidance). +* May emit `"unknown"` tasks for ambiguous or infeasible areas as required. + +### Coder + +* Must conform to `coderOutputSchema` (non-empty unified diff). +* Patch must touch only allowed files. +* No speculative changes. + +### Reviewer + +* Must conform to `reviewerOutputSchema`. +* Every comment must include: + + ```json + "blocking": true | false + ``` +* Comments must be grounded in actual patch lines. + +--- + +# **6. Example Outline (Task: `task-001-is-even`)** + +``` +fixtures/zero-change/task-001-is-even/ + architect/prompt.md + architect/expected.json + architect/verify.ts + planner/prompt.md + planner/expected.json + planner/verify.ts + coder/prompt.md + coder/expected.patch + coder/verify.ts + reviewer/prompt.md + reviewer/expected.json + reviewer/verify.ts +``` + +* Architect defines a tiny utility. +* Planner emits 1 low-complexity fix task. +* Coder outputs minimal patch correcting logic. +* Reviewer approves with grounded comments. + +Running: + +```bash +npm run verify +``` + +produces: + +``` +zero-change/task-001-is-even/architect OK +zero-change/task-001-is-even/planner OK +zero-change/task-001-is-even/coder OK +zero-change/task-001-is-even/reviewer OK +``` + +--- + +# **7. Completion Criteria** + +A full run of: + +```bash +npm run verify +``` + +should produce one `OK` line for every `(topic × task × agent)` combination. + +When all are green, the fixture suite fully covers the entire JSON roadmap with deterministic, schema-valid, scenario-correct goldens. + diff --git a/src/fixture-helpers.ts b/src/fixture-helpers.ts index 046e594..329c524 100644 --- a/src/fixture-helpers.ts +++ b/src/fixture-helpers.ts @@ -1,3 +1,4 @@ +// src/fixture-helpers.ts import { ArchitectSpecSchema, type ArchitectSpec } from "./schemas/architect"; import { plannerOutputSchema, type PlannerOutput } from "./schemas/planner"; import { coderOutputSchema, type CoderOutput } from "./schemas/coder"; diff --git a/src/index.ts b/src/index.ts index 1df8229..346f1f9 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,3 +1,4 @@ +// src/index.ts export * from "./schemas/architect"; export * from "./schemas/planner"; export * from "./schemas/coder"; diff --git a/src/run-verify.ts b/src/run-verify.ts index 675527d..783be5a 100644 --- a/src/run-verify.ts +++ b/src/run-verify.ts @@ -1,3 +1,4 @@ +// src/run-verify.ts import * as path from "path"; import * as fs from "fs"; import { fileURLToPath } from "url"; diff --git a/src/schemas/architect.ts b/src/schemas/architect.ts index 5f4b2f7..623fa89 100644 --- a/src/schemas/architect.ts +++ b/src/schemas/architect.ts @@ -1,3 +1,4 @@ +// src/schemas/architect.ts import { z } from "zod"; export const ArchitectApiSchema = z.object({ diff --git a/src/schemas/coder.ts b/src/schemas/coder.ts index 6c5dfac..8ab89fd 100644 --- a/src/schemas/coder.ts +++ b/src/schemas/coder.ts @@ -1,3 +1,4 @@ +// src/schemas/coder.ts import { z } from "zod"; /** diff --git a/src/schemas/planner.ts b/src/schemas/planner.ts index a99257e..8f9bad9 100644 --- a/src/schemas/planner.ts +++ b/src/schemas/planner.ts @@ -1,3 +1,4 @@ +// src/schemas/planner.ts import { z } from "zod"; export const plannerTaskSchema = z.object({ diff --git a/src/schemas/reviewer.ts b/src/schemas/reviewer.ts index 01fc76e..5dfa518 100644 --- a/src/schemas/reviewer.ts +++ b/src/schemas/reviewer.ts @@ -1,3 +1,4 @@ +// src/schemas/reviewer.ts import { z } from "zod"; export const reviewerCommentSchema = z.object({ From 9750b9dd19a28ba62bdbafd74f2ff6aa56a85ae3 Mon Sep 17 00:00:00 2001 From: oddessentials Date: Wed, 3 Dec 2025 22:18:21 -0500 Subject: [PATCH 2/3] refactor: add note of example fixture --- instructions.md | 1 + 1 file changed, 1 insertion(+) diff --git a/instructions.md b/instructions.md index 07cc6b8..e0d05ac 100644 --- a/instructions.md +++ b/instructions.md @@ -43,6 +43,7 @@ fixtures/ * `` and `` come directly from the master JSON. * Adding a readable slug (`.../task-001-is-even/`) is recommended. * `repo/` is included only when a scenario requires source files. +* fixtures/zero-change/task-001-is-even/* is already completed and serves as a template for new tasks. --- From 7114c344ab3c0c083eb392163d0b7163d2a9e3da Mon Sep 17 00:00:00 2001 From: oddessentials Date: Wed, 3 Dec 2025 23:01:02 -0500 Subject: [PATCH 3/3] feat: add first two topics of fixtures --- .../architect/expected.json | 18 +++++++ .../architect/prompt.md | 13 +++++ .../architect/verify.ts | 28 ++++++++++ .../coder/expected.patch | 10 ++++ .../coder/prompt.md | 13 +++++ .../coder/verify.ts | 36 +++++++++++++ .../planner/expected.json | 14 +++++ .../planner/prompt.md | 3 ++ .../planner/verify.ts | 53 +++++++++++++++++++ .../reviewer/expected.json | 11 ++++ .../reviewer/prompt.md | 15 ++++++ .../reviewer/verify.ts | 47 ++++++++++++++++ .../architect/expected.json | 7 +++ .../architect/prompt.md | 13 +++++ .../architect/verify.ts | 27 ++++++++++ .../coder/expected.patch | 5 ++ .../coder/prompt.md | 5 ++ .../coder/verify.ts | 21 ++++++++ .../planner/expected.json | 17 ++++++ .../planner/prompt.md | 5 ++ .../planner/verify.ts | 22 ++++++++ .../reviewer/expected.json | 9 ++++ .../reviewer/prompt.md | 11 ++++ .../reviewer/verify.ts | 28 ++++++++++ .../architect/expected.json | 18 +++++++ .../architect/prompt.md | 13 +++++ .../architect/repo/src/server.ts | 18 +++++++ .../architect/verify.ts | 35 ++++++++++++ .../coder/expected.patch | 32 +++++++++++ .../task-002-auth-or-health/coder/prompt.md | 7 +++ .../coder/repo/src/server.ts | 18 +++++++ .../task-002-auth-or-health/coder/verify.ts | 31 +++++++++++ .../planner/expected.json | 22 ++++++++ .../task-002-auth-or-health/planner/prompt.md | 15 ++++++ .../planner/repo/src/server.ts | 18 +++++++ .../task-002-auth-or-health/planner/verify.ts | 26 +++++++++ .../reviewer/expected.json | 11 ++++ .../reviewer/prompt.md | 37 +++++++++++++ .../reviewer/repo/src/server.ts | 18 +++++++ .../reviewer/verify.ts | 32 +++++++++++ .../architect/expected.json | 17 ++++++ .../task-005-tiny-health/architect/prompt.md | 13 +++++ .../architect/repo/app.js | 6 +++ .../task-005-tiny-health/architect/verify.ts | 25 +++++++++ .../task-005-tiny-health/coder/expected.patch | 11 ++++ .../task-005-tiny-health/coder/prompt.md | 1 + .../task-005-tiny-health/coder/repo/app.js | 6 +++ .../task-005-tiny-health/coder/verify.ts | 26 +++++++++ .../planner/expected.json | 14 +++++ .../task-005-tiny-health/planner/prompt.md | 6 +++ .../task-005-tiny-health/planner/repo/app.js | 6 +++ .../task-005-tiny-health/planner/verify.ts | 25 +++++++++ .../reviewer/expected.json | 11 ++++ .../task-005-tiny-health/reviewer/prompt.md | 15 ++++++ .../task-005-tiny-health/reviewer/repo/app.js | 6 +++ .../task-005-tiny-health/reviewer/verify.ts | 27 ++++++++++ .../task-010-no-op/architect/expected.json | 7 +++ .../task-010-no-op/architect/prompt.md | 17 ++++++ .../task-010-no-op/architect/repo/utils.js | 5 ++ .../task-010-no-op/architect/verify.ts | 27 ++++++++++ .../task-010-no-op/coder/expected.patch | 5 ++ .../task-010-no-op/coder/prompt.md | 3 ++ .../task-010-no-op/coder/verify.ts | 21 ++++++++ .../task-010-no-op/planner/expected.json | 14 +++++ .../task-010-no-op/planner/prompt.md | 3 ++ .../task-010-no-op/planner/verify.ts | 26 +++++++++ .../task-010-no-op/reviewer/expected.json | 9 ++++ .../task-010-no-op/reviewer/prompt.md | 11 ++++ .../task-010-no-op/reviewer/verify.ts | 35 ++++++++++++ 69 files changed, 1180 insertions(+) create mode 100644 fixtures/single-file/task-101-single-file-low-complexity/architect/expected.json create mode 100644 fixtures/single-file/task-101-single-file-low-complexity/architect/prompt.md create mode 100644 fixtures/single-file/task-101-single-file-low-complexity/architect/verify.ts create mode 100644 fixtures/single-file/task-101-single-file-low-complexity/coder/expected.patch create mode 100644 fixtures/single-file/task-101-single-file-low-complexity/coder/prompt.md create mode 100644 fixtures/single-file/task-101-single-file-low-complexity/coder/verify.ts create mode 100644 fixtures/single-file/task-101-single-file-low-complexity/planner/expected.json create mode 100644 fixtures/single-file/task-101-single-file-low-complexity/planner/prompt.md create mode 100644 fixtures/single-file/task-101-single-file-low-complexity/planner/verify.ts create mode 100644 fixtures/single-file/task-101-single-file-low-complexity/reviewer/expected.json create mode 100644 fixtures/single-file/task-101-single-file-low-complexity/reviewer/prompt.md create mode 100644 fixtures/single-file/task-101-single-file-low-complexity/reviewer/verify.ts create mode 100644 fixtures/zero-change/task-000-impossible-requirements/architect/expected.json create mode 100644 fixtures/zero-change/task-000-impossible-requirements/architect/prompt.md create mode 100644 fixtures/zero-change/task-000-impossible-requirements/architect/verify.ts create mode 100644 fixtures/zero-change/task-000-impossible-requirements/coder/expected.patch create mode 100644 fixtures/zero-change/task-000-impossible-requirements/coder/prompt.md create mode 100644 fixtures/zero-change/task-000-impossible-requirements/coder/verify.ts create mode 100644 fixtures/zero-change/task-000-impossible-requirements/planner/expected.json create mode 100644 fixtures/zero-change/task-000-impossible-requirements/planner/prompt.md create mode 100644 fixtures/zero-change/task-000-impossible-requirements/planner/verify.ts create mode 100644 fixtures/zero-change/task-000-impossible-requirements/reviewer/expected.json create mode 100644 fixtures/zero-change/task-000-impossible-requirements/reviewer/prompt.md create mode 100644 fixtures/zero-change/task-000-impossible-requirements/reviewer/verify.ts create mode 100644 fixtures/zero-change/task-002-auth-or-health/architect/expected.json create mode 100644 fixtures/zero-change/task-002-auth-or-health/architect/prompt.md create mode 100644 fixtures/zero-change/task-002-auth-or-health/architect/repo/src/server.ts create mode 100644 fixtures/zero-change/task-002-auth-or-health/architect/verify.ts create mode 100644 fixtures/zero-change/task-002-auth-or-health/coder/expected.patch create mode 100644 fixtures/zero-change/task-002-auth-or-health/coder/prompt.md create mode 100644 fixtures/zero-change/task-002-auth-or-health/coder/repo/src/server.ts create mode 100644 fixtures/zero-change/task-002-auth-or-health/coder/verify.ts create mode 100644 fixtures/zero-change/task-002-auth-or-health/planner/expected.json create mode 100644 fixtures/zero-change/task-002-auth-or-health/planner/prompt.md create mode 100644 fixtures/zero-change/task-002-auth-or-health/planner/repo/src/server.ts create mode 100644 fixtures/zero-change/task-002-auth-or-health/planner/verify.ts create mode 100644 fixtures/zero-change/task-002-auth-or-health/reviewer/expected.json create mode 100644 fixtures/zero-change/task-002-auth-or-health/reviewer/prompt.md create mode 100644 fixtures/zero-change/task-002-auth-or-health/reviewer/repo/src/server.ts create mode 100644 fixtures/zero-change/task-002-auth-or-health/reviewer/verify.ts create mode 100644 fixtures/zero-change/task-005-tiny-health/architect/expected.json create mode 100644 fixtures/zero-change/task-005-tiny-health/architect/prompt.md create mode 100644 fixtures/zero-change/task-005-tiny-health/architect/repo/app.js create mode 100644 fixtures/zero-change/task-005-tiny-health/architect/verify.ts create mode 100644 fixtures/zero-change/task-005-tiny-health/coder/expected.patch create mode 100644 fixtures/zero-change/task-005-tiny-health/coder/prompt.md create mode 100644 fixtures/zero-change/task-005-tiny-health/coder/repo/app.js create mode 100644 fixtures/zero-change/task-005-tiny-health/coder/verify.ts create mode 100644 fixtures/zero-change/task-005-tiny-health/planner/expected.json create mode 100644 fixtures/zero-change/task-005-tiny-health/planner/prompt.md create mode 100644 fixtures/zero-change/task-005-tiny-health/planner/repo/app.js create mode 100644 fixtures/zero-change/task-005-tiny-health/planner/verify.ts create mode 100644 fixtures/zero-change/task-005-tiny-health/reviewer/expected.json create mode 100644 fixtures/zero-change/task-005-tiny-health/reviewer/prompt.md create mode 100644 fixtures/zero-change/task-005-tiny-health/reviewer/repo/app.js create mode 100644 fixtures/zero-change/task-005-tiny-health/reviewer/verify.ts create mode 100644 fixtures/zero-change/task-010-no-op/architect/expected.json create mode 100644 fixtures/zero-change/task-010-no-op/architect/prompt.md create mode 100644 fixtures/zero-change/task-010-no-op/architect/repo/utils.js create mode 100644 fixtures/zero-change/task-010-no-op/architect/verify.ts create mode 100644 fixtures/zero-change/task-010-no-op/coder/expected.patch create mode 100644 fixtures/zero-change/task-010-no-op/coder/prompt.md create mode 100644 fixtures/zero-change/task-010-no-op/coder/verify.ts create mode 100644 fixtures/zero-change/task-010-no-op/planner/expected.json create mode 100644 fixtures/zero-change/task-010-no-op/planner/prompt.md create mode 100644 fixtures/zero-change/task-010-no-op/planner/verify.ts create mode 100644 fixtures/zero-change/task-010-no-op/reviewer/expected.json create mode 100644 fixtures/zero-change/task-010-no-op/reviewer/prompt.md create mode 100644 fixtures/zero-change/task-010-no-op/reviewer/verify.ts diff --git a/fixtures/single-file/task-101-single-file-low-complexity/architect/expected.json b/fixtures/single-file/task-101-single-file-low-complexity/architect/expected.json new file mode 100644 index 0000000..dcc03b5 --- /dev/null +++ b/fixtures/single-file/task-101-single-file-low-complexity/architect/expected.json @@ -0,0 +1,18 @@ +{ + "highLevelSummary": "Define and clarify a tiny capitalize(str: string): string utility with strict non-goals: do not change any other functions or behavior in the system.", + "pagesOrScreens": [], + "apis": [], + "dataModels": [ + { + "name": "StringUtils", + "fields": ["value"], + "primaryKey": null, + "indexes": null, + "relations": null + } + ], + "recommendedFileStructure": [ + "src/utils/capitalize.ts", + "test/utils/capitalize.test.ts" + ] +} diff --git a/fixtures/single-file/task-101-single-file-low-complexity/architect/prompt.md b/fixtures/single-file/task-101-single-file-low-complexity/architect/prompt.md new file mode 100644 index 0000000..3bf6070 --- /dev/null +++ b/fixtures/single-file/task-101-single-file-low-complexity/architect/prompt.md @@ -0,0 +1,13 @@ +You are designing a tiny spec for a utility function `capitalize(str: string): string`. + +Requirements: + +- `capitalize("hello")` must return `"Hello"` (first letter uppercase, rest lowercase). +- `capitalize("WORLD")` must return `"World"`. +- `capitalize("")` must return `""` (empty string unchanged). + +Non-goals: + +- Do **not** change any other utilities. +- Do **not** introduce new features or APIs. +- Only clarify the behavior of `capitalize`. diff --git a/fixtures/single-file/task-101-single-file-low-complexity/architect/verify.ts b/fixtures/single-file/task-101-single-file-low-complexity/architect/verify.ts new file mode 100644 index 0000000..309e162 --- /dev/null +++ b/fixtures/single-file/task-101-single-file-low-complexity/architect/verify.ts @@ -0,0 +1,28 @@ +import { + verifyArchitect, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyArchitect(ctx, (spec) => { + const summary = spec.highLevelSummary.toLowerCase(); + + if (!summary.includes("capitalize")) { + return { + ok: false, + reason: "highLevelSummary must mention capitalize", + }; + } + + if (!summary.includes("do not change any other functions")) { + return { + ok: false, + reason: + "highLevelSummary must state non-goal: do not change other functions", + }; + } + + return { ok: true }; + }); +} diff --git a/fixtures/single-file/task-101-single-file-low-complexity/coder/expected.patch b/fixtures/single-file/task-101-single-file-low-complexity/coder/expected.patch new file mode 100644 index 0000000..17b41f8 --- /dev/null +++ b/fixtures/single-file/task-101-single-file-low-complexity/coder/expected.patch @@ -0,0 +1,10 @@ +diff --git a/src/utils/capitalize.ts b/src/utils/capitalize.ts +index 0000000..0000001 100644 +--- a/src/utils/capitalize.ts ++++ b/src/utils/capitalize.ts +@@ -1,3 +1,6 @@ + export function capitalize(str: string): string { +- return str.toUpperCase(); ++ if (str.length === 0) return str; ++ return str.charAt(0).toUpperCase() + str.slice(1).toLowerCase(); + } diff --git a/fixtures/single-file/task-101-single-file-low-complexity/coder/prompt.md b/fixtures/single-file/task-101-single-file-low-complexity/coder/prompt.md new file mode 100644 index 0000000..dcaee48 --- /dev/null +++ b/fixtures/single-file/task-101-single-file-low-complexity/coder/prompt.md @@ -0,0 +1,13 @@ +You are the coder agent. Apply the architect and planner guidance to fix the +`capitalize(str: string): string` function. + +Requirements: + +- Correct the implementation in `src/utils/capitalize.ts` so that: + - `capitalize("hello")` returns `"Hello"` + - `capitalize("WORLD")` returns `"World"` + - `capitalize("")` returns `""` +- Do **not** touch any other files. +- Keep the change as small and focused as possible. + +Return a single unified diff patch that updates `src/utils/capitalize.ts` only. diff --git a/fixtures/single-file/task-101-single-file-low-complexity/coder/verify.ts b/fixtures/single-file/task-101-single-file-low-complexity/coder/verify.ts new file mode 100644 index 0000000..5665422 --- /dev/null +++ b/fixtures/single-file/task-101-single-file-low-complexity/coder/verify.ts @@ -0,0 +1,36 @@ +import { + verifyCoder, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyCoder(ctx, (patch) => { + // Must mention the target file + if (!patch.includes("src/utils/capitalize.ts")) { + return { + ok: false, + reason: "Patch must target src/utils/capitalize.ts.", + }; + } + + // Must contain proper capitalization logic + if (!patch.includes("toUpperCase()") || !patch.includes("toLowerCase()")) { + return { + ok: false, + reason: "Patch must implement capitalize using toUpperCase and toLowerCase.", + }; + } + + // Very lightweight sanity: only one diff header + const headerCount = (patch.match(/^diff --git /gm) || []).length; + if (headerCount !== 1) { + return { + ok: false, + reason: "Patch must contain exactly one diff header.", + }; + } + + return { ok: true }; + }); +} diff --git a/fixtures/single-file/task-101-single-file-low-complexity/planner/expected.json b/fixtures/single-file/task-101-single-file-low-complexity/planner/expected.json new file mode 100644 index 0000000..3479e1f --- /dev/null +++ b/fixtures/single-file/task-101-single-file-low-complexity/planner/expected.json @@ -0,0 +1,14 @@ +{ + "tasks": [ + { + "id": "fix-capitalize-logic", + "description": "Fix the capitalize(str: string) implementation in src/utils/capitalize.ts so that it capitalizes the first letter and lowercases the rest.", + "file": "src/utils/capitalize.ts", + "type": "fix", + "complexity": "low", + "dependsOn": [] + } + ], + "ambiguities": [], + "invalidTaskCount": 0 +} diff --git a/fixtures/single-file/task-101-single-file-low-complexity/planner/prompt.md b/fixtures/single-file/task-101-single-file-low-complexity/planner/prompt.md new file mode 100644 index 0000000..f761f78 --- /dev/null +++ b/fixtures/single-file/task-101-single-file-low-complexity/planner/prompt.md @@ -0,0 +1,3 @@ +The `capitalize` utility is incorrectly implemented. Using the architect's spec for this tiny +utility, produce **one low-complexity task** that fixes the `capitalize` function in +`src/utils/capitalize.ts` and nothing else. diff --git a/fixtures/single-file/task-101-single-file-low-complexity/planner/verify.ts b/fixtures/single-file/task-101-single-file-low-complexity/planner/verify.ts new file mode 100644 index 0000000..4f0f0e7 --- /dev/null +++ b/fixtures/single-file/task-101-single-file-low-complexity/planner/verify.ts @@ -0,0 +1,53 @@ +import { + verifyPlanner, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyPlanner(ctx, (out) => { + if (out.tasks.length !== 1) { + return { + ok: false, + reason: "Planner must emit exactly one task for single-file low-complexity scenario.", + }; + } + + const task = out.tasks[0]; + + if (task.type !== "fix") { + return { ok: false, reason: 'Task type must be "fix".' }; + } + + if (task.complexity !== "low") { + return { ok: false, reason: 'Task complexity must be "low".' }; + } + + if (task.file !== "src/utils/capitalize.ts") { + return { ok: false, reason: "Task must target src/utils/capitalize.ts." }; + } + + if (task.dependsOn.length !== 0) { + return { + ok: false, + reason: "dependsOn must be empty for this trivial single-step fix.", + }; + } + + if (out.ambiguities.length !== 0) { + return { + ok: false, + reason: "ambiguities must be empty for a trivial capitalize fix.", + }; + } + + if (out.invalidTaskCount !== 0) { + return { + ok: false, + reason: "invalidTaskCount must be 0.", + }; + } + + return { ok: true }; + }); +} diff --git a/fixtures/single-file/task-101-single-file-low-complexity/reviewer/expected.json b/fixtures/single-file/task-101-single-file-low-complexity/reviewer/expected.json new file mode 100644 index 0000000..fa3a1b1 --- /dev/null +++ b/fixtures/single-file/task-101-single-file-low-complexity/reviewer/expected.json @@ -0,0 +1,11 @@ +{ + "decision": "approve", + "comments": [ + { + "message": "Change correctly implements capitalize using charAt(0).toUpperCase() and slice(1).toLowerCase(); consider adding edge case tests for empty strings and single-character inputs if not already covered.", + "path": "test/utils/capitalize.test.ts", + "line": 1, + "blocking": false + } + ] +} diff --git a/fixtures/single-file/task-101-single-file-low-complexity/reviewer/prompt.md b/fixtures/single-file/task-101-single-file-low-complexity/reviewer/prompt.md new file mode 100644 index 0000000..18e54b9 --- /dev/null +++ b/fixtures/single-file/task-101-single-file-low-complexity/reviewer/prompt.md @@ -0,0 +1,15 @@ +You are the reviewer agent. + +Review a tiny patch that fixes the `capitalize(str: string): string` function in +`src/utils/capitalize.ts`. The change is correct and minimal. + +Your responsibilities: + +- Approve the change or request only minor, non-blocking nits. +- Keep comments grounded in the changed behavior and nearby lines. +- Use structured comments with a `blocking: boolean` field for each issue. + +Return: + +- `decision`: `"approve" | "revise" | "reject"` +- `comments[]`: structured comments with `blocking: true | false` diff --git a/fixtures/single-file/task-101-single-file-low-complexity/reviewer/verify.ts b/fixtures/single-file/task-101-single-file-low-complexity/reviewer/verify.ts new file mode 100644 index 0000000..1fef0c0 --- /dev/null +++ b/fixtures/single-file/task-101-single-file-low-complexity/reviewer/verify.ts @@ -0,0 +1,47 @@ +import { + verifyReviewer, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyReviewer(ctx, (out) => { + if (out.decision !== "approve") { + return { + ok: false, + reason: + "For the single-file low-complexity scenario, the reviewer should approve with at most minor nits.", + }; + } + + if (out.comments.length === 0) { + return { + ok: false, + reason: + "Reviewer should include at least one (non-blocking) comment to exercise the comments schema.", + }; + } + + const blocking = out.comments.filter((c) => c.blocking); + if (blocking.length > 0) { + return { + ok: false, + reason: + "All reviewer comments for this trivial fix must be non-blocking (blocking: false).", + }; + } + + const mentionsCapitalize = out.comments.some((c) => + c.message.toLowerCase().includes("capitalize") + ); + if (!mentionsCapitalize) { + return { + ok: false, + reason: + "At least one comment should reference capitalize to stay grounded in the changed behavior.", + }; + } + + return { ok: true }; + }); +} diff --git a/fixtures/zero-change/task-000-impossible-requirements/architect/expected.json b/fixtures/zero-change/task-000-impossible-requirements/architect/expected.json new file mode 100644 index 0000000..b3e8a0f --- /dev/null +++ b/fixtures/zero-change/task-000-impossible-requirements/architect/expected.json @@ -0,0 +1,7 @@ +{ + "highLevelSummary": "The requirements are contradictory: the service must be stateless yet remember sessions across restarts, and must have no persistent storage yet store credentials in a database. These constraints are mutually exclusive and cannot be satisfied simultaneously.", + "pagesOrScreens": [], + "apis": [], + "dataModels": [], + "recommendedFileStructure": [] +} diff --git a/fixtures/zero-change/task-000-impossible-requirements/architect/prompt.md b/fixtures/zero-change/task-000-impossible-requirements/architect/prompt.md new file mode 100644 index 0000000..8bf8fb9 --- /dev/null +++ b/fixtures/zero-change/task-000-impossible-requirements/architect/prompt.md @@ -0,0 +1,13 @@ +You are designing a spec for a new stateless microservice that must handle user authentication. + +Requirements: + +- The service must be completely stateless with no persistent storage. +- The service must remember user sessions across restarts. +- The service must authenticate users without any external dependencies. +- The service must store user credentials securely in a database. + +Non-goals: + +- Do **not** use JWT tokens or session cookies. +- Do **not** rely on external authentication providers. diff --git a/fixtures/zero-change/task-000-impossible-requirements/architect/verify.ts b/fixtures/zero-change/task-000-impossible-requirements/architect/verify.ts new file mode 100644 index 0000000..0429736 --- /dev/null +++ b/fixtures/zero-change/task-000-impossible-requirements/architect/verify.ts @@ -0,0 +1,27 @@ +import { + verifyArchitect, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyArchitect(ctx, (spec) => { + const summary = spec.highLevelSummary.toLowerCase(); + + if (!summary.includes("contradict")) { + return { + ok: false, + reason: "highLevelSummary must mention contradictory requirements", + }; + } + + if (spec.recommendedFileStructure.length > 0) { + return { + ok: false, + reason: "recommendedFileStructure should be empty for infeasible scenario", + }; + } + + return { ok: true }; + }); +} diff --git a/fixtures/zero-change/task-000-impossible-requirements/coder/expected.patch b/fixtures/zero-change/task-000-impossible-requirements/coder/expected.patch new file mode 100644 index 0000000..a116306 --- /dev/null +++ b/fixtures/zero-change/task-000-impossible-requirements/coder/expected.patch @@ -0,0 +1,5 @@ +diff --git a/README.md b/README.md +index 0000000..0000000 100644 +--- a/README.md ++++ b/README.md +@@ -0,0 +1,0 @@ diff --git a/fixtures/zero-change/task-000-impossible-requirements/coder/prompt.md b/fixtures/zero-change/task-000-impossible-requirements/coder/prompt.md new file mode 100644 index 0000000..a392232 --- /dev/null +++ b/fixtures/zero-change/task-000-impossible-requirements/coder/prompt.md @@ -0,0 +1,5 @@ +The planner has determined that the requirements are contradictory and cannot be implemented. + +Task: Cannot proceed - requirements are contradictory and cannot be implemented as specified. + +Please generate a patch to implement this task. diff --git a/fixtures/zero-change/task-000-impossible-requirements/coder/verify.ts b/fixtures/zero-change/task-000-impossible-requirements/coder/verify.ts new file mode 100644 index 0000000..ae585f8 --- /dev/null +++ b/fixtures/zero-change/task-000-impossible-requirements/coder/verify.ts @@ -0,0 +1,21 @@ +import { + verifyCoder, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyCoder(ctx, (patch) => { + const trimmedPatch = patch.trim(); + + // Patch should be effectively empty (no real changes) + if (trimmedPatch.length > 200) { + return { + ok: false, + reason: "Patch should be minimal/empty for impossible requirements", + }; + } + + return { ok: true }; + }); +} diff --git a/fixtures/zero-change/task-000-impossible-requirements/planner/expected.json b/fixtures/zero-change/task-000-impossible-requirements/planner/expected.json new file mode 100644 index 0000000..2c0b0f5 --- /dev/null +++ b/fixtures/zero-change/task-000-impossible-requirements/planner/expected.json @@ -0,0 +1,17 @@ +{ + "tasks": [ + { + "id": "impossible-task", + "description": "Cannot proceed: requirements are contradictory and cannot be implemented as specified.", + "file": "", + "type": "unknown", + "complexity": "high", + "dependsOn": [] + } + ], + "ambiguities": [ + "Requirements specify both stateless design and persistent session storage, which are mutually exclusive.", + "Requirements specify no persistent storage yet require storing credentials in a database." + ], + "invalidTaskCount": 0 +} diff --git a/fixtures/zero-change/task-000-impossible-requirements/planner/prompt.md b/fixtures/zero-change/task-000-impossible-requirements/planner/prompt.md new file mode 100644 index 0000000..9cdc845 --- /dev/null +++ b/fixtures/zero-change/task-000-impossible-requirements/planner/prompt.md @@ -0,0 +1,5 @@ +The architect has analyzed the requirements and produced the following spec: + +> The requirements are contradictory: the service must be stateless yet remember sessions across restarts, and must have no persistent storage yet store credentials in a database. These constraints are mutually exclusive and cannot be satisfied simultaneously. + +Please create a task decomposition plan for implementing this specification. diff --git a/fixtures/zero-change/task-000-impossible-requirements/planner/verify.ts b/fixtures/zero-change/task-000-impossible-requirements/planner/verify.ts new file mode 100644 index 0000000..667e4d9 --- /dev/null +++ b/fixtures/zero-change/task-000-impossible-requirements/planner/verify.ts @@ -0,0 +1,22 @@ +import { + verifyPlanner, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyPlanner(ctx, (plan) => { + if (plan.tasks.length === 0) { + return { ok: true }; + } + + if (plan.tasks.length === 1 && plan.tasks[0].type === "unknown") { + return { ok: true }; + } + + return { + ok: false, + reason: "Plan should have zero tasks or a single unknown task for impossible requirements", + }; + }); +} diff --git a/fixtures/zero-change/task-000-impossible-requirements/reviewer/expected.json b/fixtures/zero-change/task-000-impossible-requirements/reviewer/expected.json new file mode 100644 index 0000000..250cb34 --- /dev/null +++ b/fixtures/zero-change/task-000-impossible-requirements/reviewer/expected.json @@ -0,0 +1,9 @@ +{ + "decision": "approve", + "comments": [ + { + "message": "Correctly identified that the requirements are contradictory and cannot be implemented. No changes are appropriate given the impossible constraints (stateless + persistent sessions, no storage + database storage).", + "blocking": false + } + ] +} diff --git a/fixtures/zero-change/task-000-impossible-requirements/reviewer/prompt.md b/fixtures/zero-change/task-000-impossible-requirements/reviewer/prompt.md new file mode 100644 index 0000000..abb19e7 --- /dev/null +++ b/fixtures/zero-change/task-000-impossible-requirements/reviewer/prompt.md @@ -0,0 +1,11 @@ +Please review the following patch: + +```diff +diff --git a/README.md b/README.md +index 0000000..0000000 100644 +--- a/README.md ++++ b/README.md +@@ -0,0 +1,0 @@ +``` + +Context: The original requirements were contradictory (stateless service that must persist sessions, no storage but must store credentials). The planner correctly identified this as impossible to implement. diff --git a/fixtures/zero-change/task-000-impossible-requirements/reviewer/verify.ts b/fixtures/zero-change/task-000-impossible-requirements/reviewer/verify.ts new file mode 100644 index 0000000..abcfb47 --- /dev/null +++ b/fixtures/zero-change/task-000-impossible-requirements/reviewer/verify.ts @@ -0,0 +1,28 @@ +import { + verifyReviewer, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyReviewer(ctx, (review) => { + if (review.decision === "reject") { + return { + ok: false, + reason: "Should not reject when correctly handling impossible requirements", + }; + } + + // Must have blocking field on all comments + for (const comment of review.comments) { + if (typeof comment.blocking !== "boolean") { + return { + ok: false, + reason: "All comments must have explicit blocking field", + }; + } + } + + return { ok: true }; + }); +} diff --git a/fixtures/zero-change/task-002-auth-or-health/architect/expected.json b/fixtures/zero-change/task-002-auth-or-health/architect/expected.json new file mode 100644 index 0000000..f2d4e28 --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/architect/expected.json @@ -0,0 +1,18 @@ +{ + "highLevelSummary": "Add a simple GET /health endpoint that returns status ok with no authentication required. Include corresponding test coverage. Do not modify existing business logic or server configuration.", + "pagesOrScreens": [], + "apis": [ + { + "name": "Health Check", + "description": "Returns service health status", + "method": "GET", + "path": "/health", + "authRequired": false + } + ], + "dataModels": [], + "recommendedFileStructure": [ + "src/routes/health.ts", + "test/routes/health.test.ts" + ] +} diff --git a/fixtures/zero-change/task-002-auth-or-health/architect/prompt.md b/fixtures/zero-change/task-002-auth-or-health/architect/prompt.md new file mode 100644 index 0000000..a9ab981 --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/architect/prompt.md @@ -0,0 +1,13 @@ +You are designing a spec for a simple web service that needs a `/health` endpoint. + +Requirements: + +- Add a GET `/health` endpoint that returns `{ "status": "ok" }` with 200 status. +- The endpoint should require no authentication. +- Add a corresponding test for the health endpoint. + +Non-goals: + +- Do **not** add any other endpoints or features. +- Do **not** modify existing business logic. +- Do **not** change the server configuration or port. diff --git a/fixtures/zero-change/task-002-auth-or-health/architect/repo/src/server.ts b/fixtures/zero-change/task-002-auth-or-health/architect/repo/src/server.ts new file mode 100644 index 0000000..04ea890 --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/architect/repo/src/server.ts @@ -0,0 +1,18 @@ +// src/server.ts +import express from 'express'; + +const app = express(); +app.use(express.json()); + +// TODO: Add health endpoint here + +app.get('/api/users', (req, res) => { + res.json({ users: [] }); +}); + +const PORT = process.env.PORT || 3000; +app.listen(PORT, () => { + console.log(`Server running on port ${PORT}`); +}); + +export default app; diff --git a/fixtures/zero-change/task-002-auth-or-health/architect/verify.ts b/fixtures/zero-change/task-002-auth-or-health/architect/verify.ts new file mode 100644 index 0000000..4b76cd6 --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/architect/verify.ts @@ -0,0 +1,35 @@ +import { + verifyArchitect, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyArchitect(ctx, (spec) => { + if (spec.apis.length === 0) { + return { + ok: false, + reason: "Must define at least one API (health endpoint)", + }; + } + + const healthApi = spec.apis.find((api) => + api.path?.toLowerCase().includes("health") + ); + if (!healthApi) { + return { + ok: false, + reason: "Must define a health endpoint API", + }; + } + + if (spec.recommendedFileStructure.length === 0) { + return { + ok: false, + reason: "Must recommend file structure for implementation", + }; + } + + return { ok: true }; + }); +} diff --git a/fixtures/zero-change/task-002-auth-or-health/coder/expected.patch b/fixtures/zero-change/task-002-auth-or-health/coder/expected.patch new file mode 100644 index 0000000..12116c0 --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/coder/expected.patch @@ -0,0 +1,32 @@ +diff --git a/src/server.ts b/src/server.ts +index 0000000..1111111 100644 +--- a/src/server.ts ++++ b/src/server.ts +@@ -4,7 +4,9 @@ const app = express(); + app.use(express.json()); + +-// TODO: Add health endpoint here ++app.get('/health', (req, res) => { ++ res.status(200).json({ status: 'ok' }); ++}); + + app.get('/api/users', (req, res) => { + res.json({ users: [] }); +diff --git a/test/routes/health.test.ts b/test/routes/health.test.ts +new file mode 100644 +index 0000000..2222222 +--- /dev/null ++++ b/test/routes/health.test.ts +@@ -0,0 +1,12 @@ ++import request from 'supertest'; ++import app from '../../src/server'; ++ ++describe('GET /health', () => { ++ it('should return status ok', async () => { ++ const response = await request(app).get('/health'); ++ ++ expect(response.status).toBe(200); ++ expect(response.body).toEqual({ status: 'ok' }); ++ }); ++}); ++ diff --git a/fixtures/zero-change/task-002-auth-or-health/coder/prompt.md b/fixtures/zero-change/task-002-auth-or-health/coder/prompt.md new file mode 100644 index 0000000..bdafd7e --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/coder/prompt.md @@ -0,0 +1,7 @@ +Please implement the following tasks: + +1. **add-health-endpoint**: Add GET /health endpoint to src/server.ts that returns { status: 'ok' } with 200 status code and no authentication. + +2. **add-health-test**: Add test for /health endpoint in test/routes/health.test.ts to verify it returns correct status. + +Current repository state: Basic Express server with /api/users endpoint, missing health endpoint. diff --git a/fixtures/zero-change/task-002-auth-or-health/coder/repo/src/server.ts b/fixtures/zero-change/task-002-auth-or-health/coder/repo/src/server.ts new file mode 100644 index 0000000..04ea890 --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/coder/repo/src/server.ts @@ -0,0 +1,18 @@ +// src/server.ts +import express from 'express'; + +const app = express(); +app.use(express.json()); + +// TODO: Add health endpoint here + +app.get('/api/users', (req, res) => { + res.json({ users: [] }); +}); + +const PORT = process.env.PORT || 3000; +app.listen(PORT, () => { + console.log(`Server running on port ${PORT}`); +}); + +export default app; diff --git a/fixtures/zero-change/task-002-auth-or-health/coder/verify.ts b/fixtures/zero-change/task-002-auth-or-health/coder/verify.ts new file mode 100644 index 0000000..48e3838 --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/coder/verify.ts @@ -0,0 +1,31 @@ +import { + verifyCoder, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyCoder(ctx, (patch) => { + const lowerPatch = patch.toLowerCase(); + + if (!lowerPatch.includes("health")) { + return { + ok: false, + reason: "Patch must implement health endpoint", + }; + } + + // Should not touch forbidden paths + const forbiddenPaths = ["dist/", "node_modules/", ".swarm/", "build/"]; + for (const forbidden of forbiddenPaths) { + if (lowerPatch.includes(forbidden)) { + return { + ok: false, + reason: `Patch must not touch forbidden path: ${forbidden}`, + }; + } + } + + return { ok: true }; + }); +} diff --git a/fixtures/zero-change/task-002-auth-or-health/planner/expected.json b/fixtures/zero-change/task-002-auth-or-health/planner/expected.json new file mode 100644 index 0000000..b916cc8 --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/planner/expected.json @@ -0,0 +1,22 @@ +{ + "tasks": [ + { + "id": "add-health-endpoint", + "description": "Add GET /health endpoint to src/server.ts that returns { status: 'ok' } with 200 status code and no authentication.", + "file": "src/server.ts", + "type": "feature", + "complexity": "low", + "dependsOn": [] + }, + { + "id": "add-health-test", + "description": "Add test for /health endpoint in test/routes/health.test.ts to verify it returns correct status.", + "file": "test/routes/health.test.ts", + "type": "feature", + "complexity": "low", + "dependsOn": ["add-health-endpoint"] + } + ], + "ambiguities": [], + "invalidTaskCount": 0 +} diff --git a/fixtures/zero-change/task-002-auth-or-health/planner/prompt.md b/fixtures/zero-change/task-002-auth-or-health/planner/prompt.md new file mode 100644 index 0000000..404fda8 --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/planner/prompt.md @@ -0,0 +1,15 @@ +The architect has produced the following specification: + +**Summary**: Add a simple GET /health endpoint that returns status ok with no authentication required. Include corresponding test coverage. + +**APIs**: +- Health Check (GET /health) - Returns service health status, no auth required + +**Recommended Files**: +- src/routes/health.ts +- test/routes/health.test.ts + +**Repository Context**: +The service currently has a basic Express server with a `/api/users` endpoint but is missing the health endpoint. + +Please create a task decomposition plan. diff --git a/fixtures/zero-change/task-002-auth-or-health/planner/repo/src/server.ts b/fixtures/zero-change/task-002-auth-or-health/planner/repo/src/server.ts new file mode 100644 index 0000000..04ea890 --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/planner/repo/src/server.ts @@ -0,0 +1,18 @@ +// src/server.ts +import express from 'express'; + +const app = express(); +app.use(express.json()); + +// TODO: Add health endpoint here + +app.get('/api/users', (req, res) => { + res.json({ users: [] }); +}); + +const PORT = process.env.PORT || 3000; +app.listen(PORT, () => { + console.log(`Server running on port ${PORT}`); +}); + +export default app; diff --git a/fixtures/zero-change/task-002-auth-or-health/planner/verify.ts b/fixtures/zero-change/task-002-auth-or-health/planner/verify.ts new file mode 100644 index 0000000..7af1a13 --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/planner/verify.ts @@ -0,0 +1,26 @@ +import { + verifyPlanner, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyPlanner(ctx, (plan) => { + if (plan.tasks.length < 1 || plan.tasks.length > 2) { + return { + ok: false, + reason: "Should have 1-2 tasks (endpoint + optional test)", + }; + } + + const complexities = plan.tasks.map((t) => t.complexity); + if (complexities.some((c) => c === "high")) { + return { + ok: false, + reason: "Health endpoint tasks should be low or medium complexity", + }; + } + + return { ok: true }; + }); +} diff --git a/fixtures/zero-change/task-002-auth-or-health/reviewer/expected.json b/fixtures/zero-change/task-002-auth-or-health/reviewer/expected.json new file mode 100644 index 0000000..b5a3f9c --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/reviewer/expected.json @@ -0,0 +1,11 @@ +{ + "decision": "approve", + "comments": [ + { + "message": "Health endpoint correctly implements the spec: returns { status: 'ok' } with 200 status and no authentication. Test coverage is appropriate.", + "path": "src/server.ts", + "line": 7, + "blocking": false + } + ] +} diff --git a/fixtures/zero-change/task-002-auth-or-health/reviewer/prompt.md b/fixtures/zero-change/task-002-auth-or-health/reviewer/prompt.md new file mode 100644 index 0000000..31bad1e --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/reviewer/prompt.md @@ -0,0 +1,37 @@ +Please review the following patch: + +```diff +diff --git a/src/server.ts b/src/server.ts +index 0000000..1111111 100644 +--- a/src/server.ts ++++ b/src/server.ts +@@ -4,7 +4,9 @@ const app = express(); + app.use(express.json()); + +-// TODO: Add health endpoint here ++app.get('/health', (req, res) => { ++ res.status(200).json({ status: 'ok' }); ++}); + + app.get('/api/users', (req, res) => { + res.json({ users: [] }); +diff --git a/test/routes/health.test.ts b/test/routes/health.test.ts +new file mode 100644 +index 0000000..2222222 +--- /dev/null ++++ b/test/routes/health.test.ts +@@ -0,0 +1,12 @@ ++import request from 'supertest'; ++import app from '../../src/server'; ++ ++describe('GET /health', () => { ++ it('should return status ok', async () => { ++ const response = await request(app).get('/health'); ++ ++ expect(response.status).toBe(200); ++ expect(response.body).toEqual({ status: 'ok' }); ++ }); ++}); +``` + +Context: Adding a simple health check endpoint as specified in the requirements. diff --git a/fixtures/zero-change/task-002-auth-or-health/reviewer/repo/src/server.ts b/fixtures/zero-change/task-002-auth-or-health/reviewer/repo/src/server.ts new file mode 100644 index 0000000..04ea890 --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/reviewer/repo/src/server.ts @@ -0,0 +1,18 @@ +// src/server.ts +import express from 'express'; + +const app = express(); +app.use(express.json()); + +// TODO: Add health endpoint here + +app.get('/api/users', (req, res) => { + res.json({ users: [] }); +}); + +const PORT = process.env.PORT || 3000; +app.listen(PORT, () => { + console.log(`Server running on port ${PORT}`); +}); + +export default app; diff --git a/fixtures/zero-change/task-002-auth-or-health/reviewer/verify.ts b/fixtures/zero-change/task-002-auth-or-health/reviewer/verify.ts new file mode 100644 index 0000000..b16cf10 --- /dev/null +++ b/fixtures/zero-change/task-002-auth-or-health/reviewer/verify.ts @@ -0,0 +1,32 @@ +import { + verifyReviewer, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyReviewer(ctx, (review) => { + // Must have blocking field on all comments + for (const comment of review.comments) { + if (typeof comment.blocking !== "boolean") { + return { + ok: false, + reason: "All comments must have explicit blocking field", + }; + } + } + + // Comments should be grounded in actual changes + const hasGroundedComment = review.comments.some( + (c) => c.path && c.path.includes("server.ts") + ); + if (review.comments.length > 0 && !hasGroundedComment) { + return { + ok: false, + reason: "Comments should reference actual changed files", + }; + } + + return { ok: true }; + }); +} diff --git a/fixtures/zero-change/task-005-tiny-health/architect/expected.json b/fixtures/zero-change/task-005-tiny-health/architect/expected.json new file mode 100644 index 0000000..8178904 --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/architect/expected.json @@ -0,0 +1,17 @@ +{ + "highLevelSummary": "Add minimal GET /health endpoint returning { ok: true } with no auth. No tests, no logging, absolute minimum implementation.", + "pagesOrScreens": [], + "apis": [ + { + "name": "Health", + "description": "Minimal health check", + "method": "GET", + "path": "/health", + "authRequired": false + } + ], + "dataModels": [], + "recommendedFileStructure": [ + "app.js" + ] +} diff --git a/fixtures/zero-change/task-005-tiny-health/architect/prompt.md b/fixtures/zero-change/task-005-tiny-health/architect/prompt.md new file mode 100644 index 0000000..fcde6c0 --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/architect/prompt.md @@ -0,0 +1,13 @@ +Add the absolute minimal health check endpoint to this tiny service. + +Requirements: + +- Add GET `/health` that returns `{ "ok": true }` +- No authentication needed +- As simple as possible + +Non-goals: + +- Do **not** add logging or monitoring +- Do **not** add tests (this is a smoke test variant) +- Keep it absolutely minimal diff --git a/fixtures/zero-change/task-005-tiny-health/architect/repo/app.js b/fixtures/zero-change/task-005-tiny-health/architect/repo/app.js new file mode 100644 index 0000000..b355cff --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/architect/repo/app.js @@ -0,0 +1,6 @@ +const express = require('express'); +const app = express(); + +app.get('/', (req, res) => res.send('Hello')); + +app.listen(3000); diff --git a/fixtures/zero-change/task-005-tiny-health/architect/verify.ts b/fixtures/zero-change/task-005-tiny-health/architect/verify.ts new file mode 100644 index 0000000..b7fad34 --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/architect/verify.ts @@ -0,0 +1,25 @@ +import { + verifyArchitect, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyArchitect(ctx, (spec) => { + if (spec.apis.length !== 1) { + return { + ok: false, + reason: "Should define exactly one API for minimal scenario", + }; + } + + if (spec.recommendedFileStructure.length > 2) { + return { + ok: false, + reason: "File structure should be minimal (1-2 files max)", + }; + } + + return { ok: true }; + }); +} diff --git a/fixtures/zero-change/task-005-tiny-health/coder/expected.patch b/fixtures/zero-change/task-005-tiny-health/coder/expected.patch new file mode 100644 index 0000000..bc8c4c5 --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/coder/expected.patch @@ -0,0 +1,11 @@ +diff --git a/app.js b/app.js +index 0000000..1111111 100644 +--- a/app.js ++++ b/app.js +@@ -2,4 +2,6 @@ const express = require('express'); + const app = express(); + + app.get('/', (req, res) => res.send('Hello')); ++app.get('/health', (req, res) => res.json({ ok: true })); + + app.listen(3000); diff --git a/fixtures/zero-change/task-005-tiny-health/coder/prompt.md b/fixtures/zero-change/task-005-tiny-health/coder/prompt.md new file mode 100644 index 0000000..99be3c8 --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/coder/prompt.md @@ -0,0 +1 @@ +Task: Add GET /health endpoint to app.js returning { ok: true } diff --git a/fixtures/zero-change/task-005-tiny-health/coder/repo/app.js b/fixtures/zero-change/task-005-tiny-health/coder/repo/app.js new file mode 100644 index 0000000..b355cff --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/coder/repo/app.js @@ -0,0 +1,6 @@ +const express = require('express'); +const app = express(); + +app.get('/', (req, res) => res.send('Hello')); + +app.listen(3000); diff --git a/fixtures/zero-change/task-005-tiny-health/coder/verify.ts b/fixtures/zero-change/task-005-tiny-health/coder/verify.ts new file mode 100644 index 0000000..8520767 --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/coder/verify.ts @@ -0,0 +1,26 @@ +import { + verifyCoder, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyCoder(ctx, (patch) => { + // Should be very small patch + if (patch.length > 300) { + return { + ok: false, + reason: "Patch should be minimal for tiny health endpoint", + }; + } + + if (!patch.includes("health")) { + return { + ok: false, + reason: "Patch must add health endpoint", + }; + } + + return { ok: true }; + }); +} diff --git a/fixtures/zero-change/task-005-tiny-health/planner/expected.json b/fixtures/zero-change/task-005-tiny-health/planner/expected.json new file mode 100644 index 0000000..87083f7 --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/planner/expected.json @@ -0,0 +1,14 @@ +{ + "tasks": [ + { + "id": "add-health", + "description": "Add GET /health endpoint to app.js returning { ok: true }", + "file": "app.js", + "type": "feature", + "complexity": "low", + "dependsOn": [] + } + ], + "ambiguities": [], + "invalidTaskCount": 0 +} diff --git a/fixtures/zero-change/task-005-tiny-health/planner/prompt.md b/fixtures/zero-change/task-005-tiny-health/planner/prompt.md new file mode 100644 index 0000000..88eeca2 --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/planner/prompt.md @@ -0,0 +1,6 @@ +Spec: Add minimal GET /health endpoint returning { ok: true } with no auth. + +**API**: Health (GET /health) +**Files**: app.js + +Create a minimal task plan. diff --git a/fixtures/zero-change/task-005-tiny-health/planner/repo/app.js b/fixtures/zero-change/task-005-tiny-health/planner/repo/app.js new file mode 100644 index 0000000..b355cff --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/planner/repo/app.js @@ -0,0 +1,6 @@ +const express = require('express'); +const app = express(); + +app.get('/', (req, res) => res.send('Hello')); + +app.listen(3000); diff --git a/fixtures/zero-change/task-005-tiny-health/planner/verify.ts b/fixtures/zero-change/task-005-tiny-health/planner/verify.ts new file mode 100644 index 0000000..7a9515e --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/planner/verify.ts @@ -0,0 +1,25 @@ +import { + verifyPlanner, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyPlanner(ctx, (plan) => { + if (plan.tasks.length !== 1) { + return { + ok: false, + reason: "Should have exactly one task for minimal scenario", + }; + } + + if (plan.tasks[0].complexity !== "low") { + return { + ok: false, + reason: "Minimal health endpoint should be low complexity", + }; + } + + return { ok: true }; + }); +} diff --git a/fixtures/zero-change/task-005-tiny-health/reviewer/expected.json b/fixtures/zero-change/task-005-tiny-health/reviewer/expected.json new file mode 100644 index 0000000..fc6449d --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/reviewer/expected.json @@ -0,0 +1,11 @@ +{ + "decision": "approve", + "comments": [ + { + "message": "Minimal health endpoint correctly implemented as specified.", + "path": "app.js", + "line": 5, + "blocking": false + } + ] +} diff --git a/fixtures/zero-change/task-005-tiny-health/reviewer/prompt.md b/fixtures/zero-change/task-005-tiny-health/reviewer/prompt.md new file mode 100644 index 0000000..3e18415 --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/reviewer/prompt.md @@ -0,0 +1,15 @@ +Review this minimal patch: + +```diff +diff --git a/app.js b/app.js +index 0000000..1111111 100644 +--- a/app.js ++++ b/app.js +@@ -2,4 +2,6 @@ const express = require('express'); + const app = express(); + + app.get('/', (req, res) => res.send('Hello')); ++app.get('/health', (req, res) => res.json({ ok: true })); + + app.listen(3000); +``` diff --git a/fixtures/zero-change/task-005-tiny-health/reviewer/repo/app.js b/fixtures/zero-change/task-005-tiny-health/reviewer/repo/app.js new file mode 100644 index 0000000..b355cff --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/reviewer/repo/app.js @@ -0,0 +1,6 @@ +const express = require('express'); +const app = express(); + +app.get('/', (req, res) => res.send('Hello')); + +app.listen(3000); diff --git a/fixtures/zero-change/task-005-tiny-health/reviewer/verify.ts b/fixtures/zero-change/task-005-tiny-health/reviewer/verify.ts new file mode 100644 index 0000000..c70fe44 --- /dev/null +++ b/fixtures/zero-change/task-005-tiny-health/reviewer/verify.ts @@ -0,0 +1,27 @@ +import { + verifyReviewer, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyReviewer(ctx, (review) => { + for (const comment of review.comments) { + if (typeof comment.blocking !== "boolean") { + return { + ok: false, + reason: "All comments must have explicit blocking field", + }; + } + } + + if (review.decision === "revise" || review.decision === "reject") { + return { + ok: false, + reason: "Minimal correct implementation should be approved", + }; + } + + return { ok: true }; + }); +} diff --git a/fixtures/zero-change/task-010-no-op/architect/expected.json b/fixtures/zero-change/task-010-no-op/architect/expected.json new file mode 100644 index 0000000..1599e09 --- /dev/null +++ b/fixtures/zero-change/task-010-no-op/architect/expected.json @@ -0,0 +1,7 @@ +{ + "highLevelSummary": "The isEven function is already correctly implemented using n % 2 === 0. No changes are needed.", + "pagesOrScreens": [], + "apis": [], + "dataModels": [], + "recommendedFileStructure": [] +} diff --git a/fixtures/zero-change/task-010-no-op/architect/prompt.md b/fixtures/zero-change/task-010-no-op/architect/prompt.md new file mode 100644 index 0000000..adc61eb --- /dev/null +++ b/fixtures/zero-change/task-010-no-op/architect/prompt.md @@ -0,0 +1,17 @@ +Inspect the following utility function to verify it works correctly: + +```javascript +function isEven(n) { + return n % 2 === 0; +} +``` + +Requirements: + +- Verify that the implementation is correct +- No changes are needed if it already works properly + +Non-goals: + +- Do **not** add new features +- Do **not** change working code diff --git a/fixtures/zero-change/task-010-no-op/architect/repo/utils.js b/fixtures/zero-change/task-010-no-op/architect/repo/utils.js new file mode 100644 index 0000000..56bec65 --- /dev/null +++ b/fixtures/zero-change/task-010-no-op/architect/repo/utils.js @@ -0,0 +1,5 @@ +function isEven(n) { + return n % 2 === 0; +} + +module.exports = { isEven }; diff --git a/fixtures/zero-change/task-010-no-op/architect/verify.ts b/fixtures/zero-change/task-010-no-op/architect/verify.ts new file mode 100644 index 0000000..fdac116 --- /dev/null +++ b/fixtures/zero-change/task-010-no-op/architect/verify.ts @@ -0,0 +1,27 @@ +import { + verifyArchitect, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyArchitect(ctx, (spec) => { + const summary = spec.highLevelSummary.toLowerCase(); + + if (!summary.includes("correct") && !summary.includes("no change")) { + return { + ok: false, + reason: "highLevelSummary should acknowledge correctness or no changes needed", + }; + } + + if (spec.recommendedFileStructure.length > 0) { + return { + ok: false, + reason: "recommendedFileStructure should be empty for no-op scenario", + }; + } + + return { ok: true }; + }); +} diff --git a/fixtures/zero-change/task-010-no-op/coder/expected.patch b/fixtures/zero-change/task-010-no-op/coder/expected.patch new file mode 100644 index 0000000..8dc52dc --- /dev/null +++ b/fixtures/zero-change/task-010-no-op/coder/expected.patch @@ -0,0 +1,5 @@ +diff --git a/utils.js b/utils.js +index 0000000..0000000 100644 +--- a/utils.js ++++ b/utils.js +@@ -0,0 +1,0 @@ diff --git a/fixtures/zero-change/task-010-no-op/coder/prompt.md b/fixtures/zero-change/task-010-no-op/coder/prompt.md new file mode 100644 index 0000000..22ca9f9 --- /dev/null +++ b/fixtures/zero-change/task-010-no-op/coder/prompt.md @@ -0,0 +1,3 @@ +Task: The implementation is already correct. No changes required. + +Generate a patch. diff --git a/fixtures/zero-change/task-010-no-op/coder/verify.ts b/fixtures/zero-change/task-010-no-op/coder/verify.ts new file mode 100644 index 0000000..d8871c5 --- /dev/null +++ b/fixtures/zero-change/task-010-no-op/coder/verify.ts @@ -0,0 +1,21 @@ +import { + verifyCoder, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyCoder(ctx, (patch) => { + const trimmedPatch = patch.trim(); + + // Patch should be effectively empty (no real changes) + if (trimmedPatch.length > 200) { + return { + ok: false, + reason: "Patch should be minimal/empty for no-op scenario", + }; + } + + return { ok: true }; + }); +} diff --git a/fixtures/zero-change/task-010-no-op/planner/expected.json b/fixtures/zero-change/task-010-no-op/planner/expected.json new file mode 100644 index 0000000..aca5001 --- /dev/null +++ b/fixtures/zero-change/task-010-no-op/planner/expected.json @@ -0,0 +1,14 @@ +{ + "tasks": [ + { + "id": "no-changes-needed", + "description": "The implementation is already correct. No changes required.", + "file": "", + "type": "unknown", + "complexity": "low", + "dependsOn": [] + } + ], + "ambiguities": [], + "invalidTaskCount": 0 +} diff --git a/fixtures/zero-change/task-010-no-op/planner/prompt.md b/fixtures/zero-change/task-010-no-op/planner/prompt.md new file mode 100644 index 0000000..7858756 --- /dev/null +++ b/fixtures/zero-change/task-010-no-op/planner/prompt.md @@ -0,0 +1,3 @@ +The architect has determined that the isEven function is already correctly implemented. No changes are needed. + +Create a task plan. diff --git a/fixtures/zero-change/task-010-no-op/planner/verify.ts b/fixtures/zero-change/task-010-no-op/planner/verify.ts new file mode 100644 index 0000000..95f183a --- /dev/null +++ b/fixtures/zero-change/task-010-no-op/planner/verify.ts @@ -0,0 +1,26 @@ +import { + verifyPlanner, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyPlanner(ctx, (plan) => { + // Should have zero tasks or a single unknown/no-op task + if (plan.tasks.length === 0) { + return { ok: true }; + } + + if (plan.tasks.length === 1) { + const task = plan.tasks[0]; + if (task.type === "unknown" || task.file === "") { + return { ok: true }; + } + } + + return { + ok: false, + reason: "No-op scenario should have zero tasks or single unknown task with no file target", + }; + }); +} diff --git a/fixtures/zero-change/task-010-no-op/reviewer/expected.json b/fixtures/zero-change/task-010-no-op/reviewer/expected.json new file mode 100644 index 0000000..548d526 --- /dev/null +++ b/fixtures/zero-change/task-010-no-op/reviewer/expected.json @@ -0,0 +1,9 @@ +{ + "decision": "approve", + "comments": [ + { + "message": "Correctly identified that no changes are needed. The existing implementation is already correct.", + "blocking": false + } + ] +} diff --git a/fixtures/zero-change/task-010-no-op/reviewer/prompt.md b/fixtures/zero-change/task-010-no-op/reviewer/prompt.md new file mode 100644 index 0000000..bce29ac --- /dev/null +++ b/fixtures/zero-change/task-010-no-op/reviewer/prompt.md @@ -0,0 +1,11 @@ +Review the following patch: + +```diff +diff --git a/utils.js b/utils.js +index 0000000..0000000 100644 +--- a/utils.js ++++ b/utils.js +@@ -0,0 +1,0 @@ +``` + +Context: The implementation was already correct, so no changes are needed. diff --git a/fixtures/zero-change/task-010-no-op/reviewer/verify.ts b/fixtures/zero-change/task-010-no-op/reviewer/verify.ts new file mode 100644 index 0000000..9c802bb --- /dev/null +++ b/fixtures/zero-change/task-010-no-op/reviewer/verify.ts @@ -0,0 +1,35 @@ +import { + verifyReviewer, + type VerifyCtx, + type VerifyResult, +} from "@kit/fixture-helpers"; + +export function verify(ctx: VerifyCtx): VerifyResult { + return verifyReviewer(ctx, (review) => { + for (const comment of review.comments) { + if (typeof comment.blocking !== "boolean") { + return { + ok: false, + reason: "All comments must have explicit blocking field", + }; + } + } + + if (review.decision === "reject") { + return { + ok: false, + reason: "Should not reject when correctly identifying no changes needed", + }; + } + + const commentText = review.comments.map((c) => c.message.toLowerCase()).join(" "); + if (commentText && !commentText.includes("no change") && !commentText.includes("correct")) { + return { + ok: false, + reason: "Comments should acknowledge correctness or no changes needed", + }; + } + + return { ok: true }; + }); +}