diff --git a/.github/agents/opinionated-cli-simulator-tester.agent.md b/.github/agents/opinionated-cli-simulator-tester.agent.md new file mode 100644 index 0000000..1077c3c --- /dev/null +++ b/.github/agents/opinionated-cli-simulator-tester.agent.md @@ -0,0 +1,63 @@ +--- +name: opinionated-cli-simulator-tester +description: Opinionated end-user CLI test specialist for Planeteer. Use when validating TUI behavior, keyboard flows, regressions, and UX quality by running simulator scripts and reporting concrete findings with asciinema artifacts. +tools: ['execute', 'read', 'search', 'todo'] +user-invokable: true +--- + +# Opinionated CLI Simulator Tester + +You are an opinionated, detail-oriented user who tests this CLI like a real frustrated power user. Use real commands and look for edge cases. + +Be direct and critical, but always back claims with reproducible evidence. + +IMPORTANT: Use simulator mode of the execute tool to run scripted CLI sessions. Use the `asciinema-terminal-recorder` skill for terminal recording evidence, and focus on UX quality, not just functional correctness. + +## Test workflow + +1. Build first: + ```bash + npm run build + ``` +2. Run simulator-focused regression tests: + ```bash + npm test -- src/screens/cli.integration.test.tsx + ``` + If that command fails because of npm arg parsing, run: + ```bash + npx vitest run src/screens/cli.integration.test.tsx + ``` +3. Run scripted simulator sessions for the exact flow under test: + ```bash + node dist/index.js simulate /tmp/sim-script.json > /tmp/sim-output.txt + ``` +4. Inspect frame output (`---FRAME---` separators) for UX problems: + - broken navigation flow + - confusing or missing status hints + - clipped/truncated text + - unexpected screen transitions +5. Capture evidence for findings using terminal-native artifacts: + - Save frame extracts to a markdown/text artifact and cite exact frame snippets. + - Use `skills/asciinema-terminal-recorder/scripts/record_ui_session.sh` to generate `.cast` recordings for each reproduced issue. + - Replay recordings with `asciinema play` before reporting to verify the artifact matches the claim. + +## Persona requirements + +- Behave like a skeptical user who expects polished UX. +- Call out awkward interactions, not just hard failures. +- Do not soften findings with vague wording. +- Never mark behavior as passing without evidence from simulator output. + +## Output format + +Return findings in this format: + +1. **Overall verdict**: pass/fail with one-sentence rationale. +2. **Findings table** with columns: + - Severity (`critical`, `major`, `minor`, `nit`) + - Screen/flow + - Reproduction input + - Expected vs actual + - Evidence (frame artifact path and/or terminal recording path) +3. **Recommended fixes**: concrete, prioritized actions. +4. **Confidence**: high/medium/low and why. diff --git a/.github/skills/not-a-skill.txt b/.github/skills/not-a-skill.txt new file mode 100644 index 0000000..592fd25 --- /dev/null +++ b/.github/skills/not-a-skill.txt @@ -0,0 +1 @@ +ignore me diff --git a/.github/skills/skill1.yaml b/.github/skills/skill1.yaml new file mode 100644 index 0000000..8737a24 --- /dev/null +++ b/.github/skills/skill1.yaml @@ -0,0 +1 @@ +name: skill1 diff --git a/.github/skills/skill2.yml b/.github/skills/skill2.yml new file mode 100644 index 0000000..93aaab0 --- /dev/null +++ b/.github/skills/skill2.yml @@ -0,0 +1 @@ +name: skill2 diff --git a/README.md b/README.md index 9987d3b..4a59f46 100644 --- a/README.md +++ b/README.md @@ -53,10 +53,123 @@ planeteer list | `↑` `↓` | Navigate task list | | `⏎` | Submit input / proceed to next screen | | `Esc` | Go back | +| `⇥` | Toggle view (Tree / Batches / Skills) | +| `Space` | Toggle skill on/off (Skills view) | +| `/` | Command mode (refine screen) | | `s` | Save plan (refine screen) | | `x` | Start execution (refine/execute screen) | | `q` | Quit | +## Custom Copilot Skills + +Planeteer supports custom Copilot skills for domain-specific planning. Skills help Copilot generate better work breakdowns by providing context about specific project types. + +### Using Skills + +Skills are automatically loaded from the `.github/skills/` directory. On first run, this directory is created with example skills. To use skills: + +1. View active skills in the **Refine** screen by pressing `⇥` to cycle to the Skills view +2. Use `↑`/`↓` to navigate and `Space` to toggle skills on/off +3. Skills are applied during work breakdown generation and refinement + +### Creating Skills + +Create a new YAML file in `.github/skills/` with this structure: + +```yaml +name: my-custom-skill +description: Brief description of what this skill helps with + +instructions: | + When planning this type of project, follow these guidelines: + + 1. **Category 1**: Guidelines for this aspect + - Specific point 1 + - Specific point 2 + + 2. **Category 2**: More guidelines + - Another point + - Another point + + General advice about task structure, dependencies, etc. + +examples: + - input: "Example project description" + tasks: + - Task 1 that would be generated + - Task 2 that would be generated + - Task 3 that would be generated +``` + +### Skill Examples + +**Example 1: Web Application Skill** + +```yaml +name: web-app +description: Expert in web application development + +instructions: | + Break down web projects into frontend, backend, database, and deployment: + + 1. **Frontend**: Component structure, routing, state management + 2. **Backend**: API design, business logic, authentication + 3. **Database**: Schema design, migrations, seed data + 4. **Infrastructure**: CI/CD, containerization, cloud deployment + + Maximize parallelism between frontend and backend work. + +examples: + - input: "Build a task management web app" + tasks: + - Setup React frontend with TypeScript + - Design REST API for task CRUD + - Implement PostgreSQL schema + - Add JWT authentication + - Deploy to cloud platform +``` + +**Example 2: Data Pipeline Skill** + +```yaml +name: data-pipeline +description: Expert in ETL and data processing workflows + +instructions: | + Structure data pipelines with these phases: + + 1. **Extraction**: Data sources, connectors, scheduling + 2. **Transformation**: Cleaning, validation, enrichment + 3. **Loading**: Destination setup, batch vs streaming + 4. **Monitoring**: Logging, alerts, data quality checks + + Consider idempotency, error handling, and reprocessing. + +examples: + - input: "Build ETL pipeline from API to data warehouse" + tasks: + - Implement API data extractor + - Create transformation functions + - Setup data warehouse schema + - Add error handling and retries + - Configure monitoring and alerts +``` + +### Skill Best Practices + +- **One skill per domain**: Create focused skills (e.g., `mobile-app`, `ml-pipeline`) rather than generic ones +- **Clear instructions**: Be specific about task breakdown patterns and dependencies +- **Provide examples**: Include 2-3 representative examples with typical task structures +- **Enable selectively**: Toggle skills on/off based on your current project type + +### Built-in Example + +Two example skills are included in the repository to help you get started: +- **example-web-app-skill.yaml** - Web application development best practices +- **example-data-pipeline-skill.yaml** - ETL and data processing workflow patterns + +These files are automatically available in `.github/skills/` and can be used as templates for creating your own custom skills. + ## Development ### Build & Run @@ -157,6 +270,30 @@ Plans are saved to `.planeteer/` in the current working directory: - `.json` — Machine-readable plan (used by the app) - `.md` — Human-readable Markdown export +#### Session Persistence and Recovery + +Planeteer includes robust session persistence to handle interrupted executions: + +**Automatic Session Tracking** +- Each task execution creates a Copilot SDK session +- Session IDs are stored in the plan JSON and saved incrementally after each task completes or fails +- If the app crashes or is interrupted (Ctrl+C), sessions remain active in the Copilot CLI + +**Orphaned Session Detection** +- When loading a plan, Planeteer detects tasks that were interrupted (status: `in_progress` with session IDs) +- It queries the Copilot SDK to find any sessions still active for those tasks +- If orphaned sessions are found, you'll see a recovery prompt with options: + 1. **Mark as interrupted and continue** — Keeps sessions alive for debugging + 2. **Mark as interrupted and cleanup sessions** (recommended) — Cleans up orphaned sessions + 3. **Cleanup sessions and go back** — Cleans up and returns to the refine screen + +**Task Statuses** +- `pending` — Not yet started +- `in_progress` — Currently executing +- `done` — Completed successfully +- `failed` — Execution failed (can be retried with `r`) +- `interrupted` — Was in progress when execution was interrupted + ## Project Structure ``` @@ -176,7 +313,8 @@ src/ │ ├── copilot.ts # Copilot SDK wrapper (single point of contact) │ ├── planner.ts # Prompt engineering for planning │ ├── executor.ts # DAG-aware parallel task dispatch -│ └── persistence.ts # JSON/Markdown save & load +│ ├── persistence.ts # JSON/Markdown save & load +│ └── session-recovery.ts # Orphaned session detection & cleanup ├── models/ │ └── plan.ts # Types: Plan, Task, ChatMessage └── utils/ diff --git a/package-lock.json b/package-lock.json index 90ab401..9094525 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,7 +8,7 @@ "name": "planeteer", "version": "0.1.0", "dependencies": { - "@github/copilot-sdk": "^0.1.0", + "@github/copilot-sdk": "^0.1.24", "ink": "^5.1.0", "ink-select-input": "^6.0.0", "ink-spinner": "^5.0.0", @@ -642,26 +642,26 @@ } }, "node_modules/@github/copilot": { - "version": "0.0.403", - "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-0.0.403.tgz", - "integrity": "sha512-v5jUdtGJReLmE1rmff/LZf+50nzmYQYAaSRNtVNr9g0j0GkCd/noQExe31i1+PudvWU0ZJjltR0B8pUfDRdA9Q==", + "version": "0.0.411", + "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-0.0.411.tgz", + "integrity": "sha512-I3/7gw40Iu1O+kTyNPKJHNqDRyOebjsUW6wJsvSVrOpT0TNa3/lfm8xdS2XUuJWkp+PgEG/PRwF7u3DVNdP7bQ==", "license": "SEE LICENSE IN LICENSE.md", "bin": { "copilot": "npm-loader.js" }, "optionalDependencies": { - "@github/copilot-darwin-arm64": "0.0.403", - "@github/copilot-darwin-x64": "0.0.403", - "@github/copilot-linux-arm64": "0.0.403", - "@github/copilot-linux-x64": "0.0.403", - "@github/copilot-win32-arm64": "0.0.403", - "@github/copilot-win32-x64": "0.0.403" + "@github/copilot-darwin-arm64": "0.0.411", + "@github/copilot-darwin-x64": "0.0.411", + "@github/copilot-linux-arm64": "0.0.411", + "@github/copilot-linux-x64": "0.0.411", + "@github/copilot-win32-arm64": "0.0.411", + "@github/copilot-win32-x64": "0.0.411" } }, "node_modules/@github/copilot-darwin-arm64": { - "version": "0.0.403", - "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-0.0.403.tgz", - "integrity": "sha512-dOw8IleA0d1soHnbr/6wc6vZiYWNTKMgfTe/NET1nCfMzyKDt/0F0I7PT5y+DLujJknTla/ZeEmmBUmliTW4Cg==", + "version": "0.0.411", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-0.0.411.tgz", + "integrity": "sha512-dtr+iHxTS4f8HlV2JT9Fp0FFoxuiPWCnU3XGmrHK+rY6bX5okPC2daU5idvs77WKUGcH8yHTZtfbKYUiMxKosw==", "cpu": [ "arm64" ], @@ -675,9 +675,9 @@ } }, "node_modules/@github/copilot-darwin-x64": { - "version": "0.0.403", - "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-0.0.403.tgz", - "integrity": "sha512-aK2jSNWgY8eiZ+TmrvGhssMCPDTKArc0ip6Ul5OaslpytKks8hyXoRbxGD0N9sKioSUSbvKUf+1AqavbDpJO+w==", + "version": "0.0.411", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-0.0.411.tgz", + "integrity": "sha512-zhdbQCbPi1L4iHClackSLx8POfklA+NX9RQLuS48HlKi/0KI/JlaDA/bdbIeMR79wjif5t9gnc/m+RTVmHlRtA==", "cpu": [ "x64" ], @@ -691,9 +691,9 @@ } }, "node_modules/@github/copilot-linux-arm64": { - "version": "0.0.403", - "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-0.0.403.tgz", - "integrity": "sha512-KhoR2iR70O6vCkzf0h8/K+p82qAgOvMTgAPm9bVEHvbdGFR7Py9qL5v03bMbPxsA45oNaZAkzDhfTAqWhIAZsQ==", + "version": "0.0.411", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-0.0.411.tgz", + "integrity": "sha512-oZYZ7oX/7O+jzdTUcHkfD1A8YnNRW6mlUgdPjUg+5rXC43bwIdyatAnc0ObY21m9h8ghxGqholoLhm5WnGv1LQ==", "cpu": [ "arm64" ], @@ -707,9 +707,9 @@ } }, "node_modules/@github/copilot-linux-x64": { - "version": "0.0.403", - "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-0.0.403.tgz", - "integrity": "sha512-eoswUc9vo4TB+/9PgFJLVtzI4dPjkpJXdCsAioVuoqPdNxHxlIHFe9HaVcqMRZxUNY1YHEBZozy+IpUEGjgdfQ==", + "version": "0.0.411", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-0.0.411.tgz", + "integrity": "sha512-nnXrKANmmGnkwa3ROlKdAhVNOx8daeMSE8Xh0o3ybKckFv4s38blhKdcxs0RJQRxgAk4p7XXGlDDKNRhurqF1g==", "cpu": [ "x64" ], @@ -723,23 +723,23 @@ } }, "node_modules/@github/copilot-sdk": { - "version": "0.1.22", - "resolved": "https://registry.npmjs.org/@github/copilot-sdk/-/copilot-sdk-0.1.22.tgz", - "integrity": "sha512-ZGOEBmYOfu/vLXKjjoiw4lO3Cb8QBUuAWXcW/qzmPPsM9+Qe00qVr2AuDTU/Gft9Dm/yZcPK2QuTZc7LVeom9w==", + "version": "0.1.25", + "resolved": "https://registry.npmjs.org/@github/copilot-sdk/-/copilot-sdk-0.1.25.tgz", + "integrity": "sha512-hIgYLPXzWw9bNgrsD5BLKmgVH20ow5Or5UyVXfVe3YgeiaTgFxC4jWSAVHLGB6ufHZUrvbjppcq2dWK63FmDRA==", "license": "MIT", "dependencies": { - "@github/copilot": "^0.0.403", + "@github/copilot": "^0.0.411", "vscode-jsonrpc": "^8.2.1", "zod": "^4.3.6" }, "engines": { - "node": ">=18.0.0" + "node": ">=20.0.0" } }, "node_modules/@github/copilot-win32-arm64": { - "version": "0.0.403", - "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-0.0.403.tgz", - "integrity": "sha512-djWjzCsp2xPNafMyOZ/ivU328/WvWhdroGie/DugiJBTgQL2SP0quWW1fhTlDwE81a3g9CxfJonaRgOpFTJTcg==", + "version": "0.0.411", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-0.0.411.tgz", + "integrity": "sha512-h+Bovb2YVCQSeELZOO7zxv8uht45XHcvAkFbRsc1gf9dl109sSUJIcB4KAhs8Aznk28qksxz7kvdSgUWyQBlIA==", "cpu": [ "arm64" ], @@ -753,9 +753,9 @@ } }, "node_modules/@github/copilot-win32-x64": { - "version": "0.0.403", - "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-0.0.403.tgz", - "integrity": "sha512-lju8cHy2E6Ux7R7tWyLZeksYC2MVZu9i9ocjiBX/qfG2/pNJs7S5OlkwKJ0BSXSbZEHQYq7iHfEWp201bVfk9A==", + "version": "0.0.411", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-0.0.411.tgz", + "integrity": "sha512-xmOgi1lGvUBHQJWmq5AK1EP95+Y8xR4TFoK9OCSOaGbQ+LFcX2jF7iavnMolfWwddabew/AMQjsEHlXvbgMG8Q==", "cpu": [ "x64" ], @@ -1215,7 +1215,6 @@ "integrity": "sha512-BkmoP5/FhRYek5izySdkOneRyXYN35I860MFAGupTdebyE66uZaR+bXLHq8k4DirE5DwQi3NuhvRU1jqTVwUrQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "undici-types": "~7.16.0" } @@ -1233,7 +1232,6 @@ "integrity": "sha512-z9VXpC7MWrhfWipitjNdgCauoMLRdIILQsAEV+ZesIzBq/oUlxk0m3ApZuMFCXdnS4U7KrI+l3WRUEGQ8K1QKw==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "@types/prop-types": "*", "csstype": "^3.2.2" @@ -1360,7 +1358,6 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -1833,7 +1830,6 @@ "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -2224,7 +2220,6 @@ "resolved": "https://registry.npmjs.org/ink/-/ink-5.2.1.tgz", "integrity": "sha512-BqcUyWrG9zq5HIwW6JcfFHsIYebJkWWb4fczNah1goUO0vv5vneIlfwuS85twyJ5hYR/y18FlAYUxrO9ChIWVg==", "license": "MIT", - "peer": true, "dependencies": { "@alcalzone/ansi-tokenize": "^0.1.3", "ansi-escapes": "^7.0.0", @@ -2730,7 +2725,6 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -2811,7 +2805,6 @@ "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", "license": "MIT", - "peer": true, "dependencies": { "loose-envify": "^1.1.0" }, diff --git a/package.json b/package.json index f6e9673..f1b17cf 100644 --- a/package.json +++ b/package.json @@ -15,7 +15,7 @@ "test:watch": "vitest" }, "dependencies": { - "@github/copilot-sdk": "^0.1.0", + "@github/copilot-sdk": "^0.1.24", "ink": "^5.1.0", "ink-select-input": "^6.0.0", "ink-spinner": "^5.0.0", diff --git a/skills/asciinema-terminal-recorder/SKILL.md b/skills/asciinema-terminal-recorder/SKILL.md new file mode 100644 index 0000000..bbd8ec0 --- /dev/null +++ b/skills/asciinema-terminal-recorder/SKILL.md @@ -0,0 +1,84 @@ +--- +name: asciinema-terminal-recorder +description: Record deterministic terminal sessions with asciinema for CLI/UI testing and demos. Use when Claude needs to capture terminal interactions as .cast files, replay recordings, or run scriptable command-based recordings (Playwright-style artifacts for terminals). +--- + +# Asciinema Terminal Recorder + +Create reproducible terminal recordings for UI testing and CLI demos using asciinema. + +## Quick workflow + +1. Choose scripted mode for deterministic test artifacts (recommended). +2. Record to a `.cast` file. +3. Replay with `asciinema play` to verify output and timing. + +## Scripted recording (recommended) + +Use the bundled helper script for stable, automation-friendly captures: + +```bash +scripts/record_ui_session.sh "" +``` + +Example: + +```bash +scripts/record_ui_session.sh artifacts/login-flow.cast "npm run test:cli-smoke" +``` + +Defaults applied by the script: + +- `--headless` for non-interactive capture +- `--window-size 120x30` for consistent terminal layout +- `--idle-time-limit 1.0` to avoid long pauses +- `--overwrite` to refresh existing artifacts +- `--return` so failures propagate to callers +- timestamped default title in recording metadata (UTC) + +Override defaults with env vars before running: + +```bash +ASCIINEMA_WINDOW_SIZE=100x28 ASCIINEMA_IDLE_LIMIT=0.5 scripts/record_ui_session.sh out.cast "pnpm test" +``` + +## Interactive recording + +For exploratory/manual sessions: + +```bash +asciinema rec demo.cast +``` + +End recording with `Ctrl+D` or `exit`. + +For command-only recording without helper script: + +```bash +asciinema rec --command "htop" demo.cast +``` + +## Validate recordings + +Replay locally: + +```bash +asciinema play demo.cast +``` + +## Screen captures + markdown report (timestamped) + +For simulator output (`---FRAME---` separated), generate capture files and a report: + +```bash +python3 scripts/generate_ui_report.py /tmp/sim-output.txt --output-dir artifacts --prefix simulator-ui +``` + +Output artifacts include: + +- `artifacts/simulator-ui-.md` (report with UTC date-time) +- `artifacts/simulator-ui-<...>-capture-XX-frame-YYYY.txt` (screen captures) + +Use `--max-captures` to control how many captures are embedded in the report. + +Read `references/asciinema-ui-testing.md` for command options and troubleshooting patterns. diff --git a/skills/asciinema-terminal-recorder/references/asciinema-ui-testing.md b/skills/asciinema-terminal-recorder/references/asciinema-ui-testing.md new file mode 100644 index 0000000..1d04b07 --- /dev/null +++ b/skills/asciinema-terminal-recorder/references/asciinema-ui-testing.md @@ -0,0 +1,56 @@ +# Asciinema UI Testing Reference + +## Core commands + +- Record interactive shell: `asciinema rec demo.cast` +- Record one command: `asciinema rec --command "npm test" demo.cast` +- Replay: `asciinema play demo.cast` +- Upload (optional): `asciinema upload demo.cast` + +## Recording flags useful for test artifacts + +- `--window-size COLSxROWS`: Fixes terminal dimensions for consistent layout comparisons. +- `--idle-time-limit SECS`: Caps long pauses during playback. +- `--headless`: Records without attaching to current terminal UI. +- `--return`: Exits with the recorded command's status code. +- `--overwrite`: Replaces previous artifacts safely in scripted runs. +- `--capture-input`: Includes keystrokes (avoid for sensitive input). + +## Suggested deterministic recipe + +```bash +asciinema rec \ + --overwrite \ + --headless \ + --return \ + --window-size 120x30 \ + --idle-time-limit 1.0 \ + --command "npm run test:cli-smoke" \ + artifacts/smoke.cast +``` + +## Timestamped screen-capture reports + +Generate a markdown report from simulator output: + +```bash +python3 scripts/generate_ui_report.py artifacts/sim-output.txt --output-dir artifacts --prefix simulator-ui +``` + +This writes: + +- `simulator-ui-.md` with generated UTC date-time metadata +- `simulator-ui--capture-*.txt` with extracted screen captures + +Useful options: + +- `--max-captures 8` to include more capture points +- `--lines-per-capture 60` to include larger frame excerpts +- `--output-dir ` to separate artifacts by run + +## Troubleshooting + +- **Recording hangs**: Ensure the command exits; in command mode recording ends when the command exits. +- **Playback wraps unexpectedly**: Increase `--window-size` or replay in a terminal with equal/larger dimensions. +- **Need cleaner pacing**: Lower `--idle-time-limit` (for example `0.5`) for faster, denser playback. +- **Report shows only one capture**: Ensure source file has content and includes `---FRAME---` separators when using simulator output. diff --git a/skills/asciinema-terminal-recorder/scripts/generate_ui_report.py b/skills/asciinema-terminal-recorder/scripts/generate_ui_report.py new file mode 100755 index 0000000..361a798 --- /dev/null +++ b/skills/asciinema-terminal-recorder/scripts/generate_ui_report.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +"""Generate timestamped markdown UI reports with screen-capture artifacts.""" + +from __future__ import annotations + +import argparse +from datetime import datetime, timezone +from pathlib import Path + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Create a markdown report and capture files from terminal output." + ) + parser.add_argument("input_file", help="Path to simulator/replay output text file") + parser.add_argument( + "--output-dir", + default=".", + help="Directory where report and capture artifacts are written", + ) + parser.add_argument( + "--prefix", + default="ui-report", + help="Artifact filename prefix (default: ui-report)", + ) + parser.add_argument( + "--max-captures", + type=int, + default=5, + help="Maximum number of screen captures to include (default: 5)", + ) + parser.add_argument( + "--lines-per-capture", + type=int, + default=40, + help="Maximum lines to embed per capture in markdown (default: 40)", + ) + return parser.parse_args() + + +def split_frames(raw_text: str) -> list[str]: + if "---FRAME---" in raw_text: + return [frame.strip("\n") for frame in raw_text.split("\n---FRAME---\n")] + return [raw_text.strip("\n")] + + +def pick_indices(total: int, max_captures: int) -> list[int]: + if total <= 0: + return [] + if max_captures <= 1 or total == 1: + return [0] + if total <= max_captures: + return list(range(total)) + + picks = { + round((i * (total - 1)) / (max_captures - 1)) + for i in range(max_captures) + } + return sorted(picks) + + +def main() -> int: + args = parse_args() + input_path = Path(args.input_file).resolve() + if not input_path.exists(): + raise FileNotFoundError(f"Input file not found: {input_path}") + + output_dir = Path(args.output_dir).resolve() + output_dir.mkdir(parents=True, exist_ok=True) + + now = datetime.now(timezone.utc) + timestamp = now.strftime("%Y%m%dT%H%M%SZ") + generated_at = now.isoformat() + + raw_text = input_path.read_text(encoding="utf-8") + frames = [f for f in split_frames(raw_text) if f.strip()] + if not frames: + frames = ["(No non-empty terminal output captured)"] + + indices = pick_indices(len(frames), max(1, args.max_captures)) + report_path = output_dir / f"{args.prefix}-{timestamp}.md" + + capture_entries: list[tuple[int, Path, str]] = [] + for capture_no, frame_idx in enumerate(indices, start=1): + frame_text = frames[frame_idx] + capture_path = output_dir / ( + f"{args.prefix}-{timestamp}-capture-{capture_no:02d}-frame-{frame_idx:04d}.txt" + ) + capture_path.write_text(frame_text + "\n", encoding="utf-8") + preview = "\n".join(frame_text.splitlines()[: max(1, args.lines_per_capture)]) + capture_entries.append((frame_idx, capture_path, preview)) + + lines: list[str] = [ + "# Terminal UI Test Report", + "", + f"- Generated at (UTC): `{generated_at}`", + f"- Source input: `{input_path}`", + f"- Total frames detected: `{len(frames)}`", + f"- Captures included: `{len(capture_entries)}`", + "", + "## Screen Captures", + "", + ] + + for idx, capture_path, preview in capture_entries: + lines.extend( + [ + f"### Frame {idx}", + f"- Capture file: `{capture_path}`", + "", + "```text", + preview, + "```", + "", + ] + ) + + report_path.write_text("\n".join(lines), encoding="utf-8") + print(report_path) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/skills/asciinema-terminal-recorder/scripts/record_ui_session.sh b/skills/asciinema-terminal-recorder/scripts/record_ui_session.sh new file mode 100755 index 0000000..3922cfb --- /dev/null +++ b/skills/asciinema-terminal-recorder/scripts/record_ui_session.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -lt 2 ]]; then + echo "Usage: $0 \"\"" >&2 + exit 1 +fi + +output_file="$1" +shift +command_to_record="$*" + +window_size="${ASCIINEMA_WINDOW_SIZE:-120x30}" +idle_limit="${ASCIINEMA_IDLE_LIMIT:-1.0}" +timestamp="$(date -u +%Y-%m-%dT%H:%M:%SZ)" +title="${ASCIINEMA_TITLE:-UI Test Recording ${timestamp}}" + +asciinema rec \ + --overwrite \ + --headless \ + --return \ + --quiet \ + --window-size "$window_size" \ + --idle-time-limit "$idle_limit" \ + --title "$title" \ + --command "$command_to_record" \ + "$output_file" diff --git a/src/index.tsx b/src/index.tsx index f4f017b..bf99979 100644 --- a/src/index.tsx +++ b/src/index.tsx @@ -1,16 +1,19 @@ #!/usr/bin/env node import React from 'react'; +import { readFile } from 'node:fs/promises'; import { render } from 'ink'; import App from './app.js'; import type { Screen } from './models/plan.js'; import { listPlans } from './services/persistence.js'; -import { loadModelPreference } from './services/copilot.js'; +import { simulateSession } from './services/simulator.js'; +import { loadModelPreference, ensureSkillsDirectory } from './services/copilot.js'; const args = process.argv.slice(2); const command = args[0] || 'home'; async function main(): Promise { await loadModelPreference(); + await ensureSkillsDirectory(); if (command === 'list') { const plans = await listPlans(); @@ -25,6 +28,43 @@ async function main(): Promise { return; } + if (command === 'simulate') { + if (!args[1]) { + console.error('Usage: planeteer simulate '); + process.exit(1); + } + + let script: { + initialScreen?: Screen; + initialPlanId?: string; + steps: { input: string; waitMs?: number }[]; + width?: number; + height?: number; + settleMs?: number; + }; + try { + script = JSON.parse(await readFile(args[1], 'utf8')) as typeof script; + } catch { + throw new Error(`Invalid JSON in simulation script file: ${args[1]}`); + } + + const result = await simulateSession( + React.createElement(App, { + initialScreen: script.initialScreen, + initialPlanId: script.initialPlanId, + }), + { + steps: script.steps, + width: script.width, + height: script.height, + settleMs: script.settleMs, + }, + ); + + process.stdout.write(result.frames.join('\n---FRAME---\n')); + return; + } + let initialScreen: Screen = 'welcome'; let initialPlanId: string | undefined; diff --git a/src/models/plan.ts b/src/models/plan.ts index d015e8a..54a03a9 100644 --- a/src/models/plan.ts +++ b/src/models/plan.ts @@ -1,4 +1,4 @@ -export type TaskStatus = 'pending' | 'in_progress' | 'done' | 'failed'; +export type TaskStatus = 'pending' | 'in_progress' | 'done' | 'failed' | 'interrupted'; export interface Task { id: string; @@ -8,6 +8,12 @@ export interface Task { dependsOn: string[]; status: TaskStatus; agentResult?: string; + sessionId?: string; +} + +export interface SkillConfig { + name: string; + enabled: boolean; } export interface Plan { @@ -17,6 +23,7 @@ export interface Plan { createdAt: string; updatedAt: string; tasks: Task[]; + skills?: SkillConfig[]; } export interface ChatMessage { diff --git a/src/screens/breakdown.tsx b/src/screens/breakdown.tsx index aa2c0a5..0956b06 100644 --- a/src/screens/breakdown.tsx +++ b/src/screens/breakdown.tsx @@ -1,8 +1,9 @@ import React, { useState, useEffect } from 'react'; import { Box, Text, useInput } from 'ink'; -import type { Plan, Task, ChatMessage } from '../models/plan.js'; +import type { Plan, Task, ChatMessage, SkillConfig } from '../models/plan.js'; import { createPlan } from '../models/plan.js'; import { generateWBS } from '../services/planner.js'; +import { getSkillOptions, loadSkillConfigs } from '../services/copilot.js'; import { detectCycles, computeBatches } from '../utils/dependency-graph.js'; import TaskTree from '../components/task-tree.js'; import BatchView from '../components/batch-view.js'; @@ -37,6 +38,7 @@ export default function BreakdownScreen({ const [viewMode, setViewMode] = useState('tree'); const [attempt, setAttempt] = useState(0); const [streamText, setStreamText] = useState(''); + const [skillConfigs, setSkillConfigs] = useState([]); useEffect(() => { if (existingPlan) return; @@ -44,9 +46,22 @@ export default function BreakdownScreen({ setLoading(true); setError(null); setStreamText(''); - generateWBS(scopeDescription, (_delta, fullText) => { - setStreamText(fullText); - }, 2, codebaseContext || undefined) + + // Load skills and generate WBS + Promise.all([getSkillOptions(), loadSkillConfigs()]) + .then(([skillOptions, skills]) => { + setSkillConfigs(skills); + return generateWBS( + scopeDescription, + (_delta, fullText) => { + setStreamText(fullText); + }, + 2, + codebaseContext || undefined, + skillOptions, + skills + ); + }) .then((tasks) => { // Use first line only, strip markdown bold markers, and cap length const planName = scopeDescription @@ -59,6 +74,7 @@ export default function BreakdownScreen({ name: planName, description: scopeDescription, tasks, + skills: skillConfigs, }); setPlan(newPlan); setLoading(false); diff --git a/src/screens/cli.integration.test.tsx b/src/screens/cli.integration.test.tsx new file mode 100644 index 0000000..2ccb328 --- /dev/null +++ b/src/screens/cli.integration.test.tsx @@ -0,0 +1,305 @@ +import React from 'react'; +import { describe, expect, it, vi, beforeEach } from 'vitest'; +import type { Plan } from '../models/plan.js'; +import { simulateSession } from '../services/simulator.js'; +import HomeScreen from './home.js'; +import BreakdownScreen from './breakdown.js'; +import RefineScreen from './refine.js'; +import ExecuteScreen from './execute.js'; +import ValidateScreen from './validate.js'; + +vi.mock('ink-select-input', () => ({ + default: function SelectInputMock({ + items, + onHighlight, + }: { + items: Array<{ value: string }>; + onHighlight?: (item: { value: string }) => void; + }): React.ReactElement | null { + React.useEffect(() => { + const preferred = items.find((i) => i.value !== '__new__') ?? items[0]; + if (preferred && onHighlight) onHighlight(preferred); + }, [items, onHighlight]); + return null; + }, +})); + +vi.mock('ink-text-input', () => ({ + default: function TextInputMock(): React.ReactElement | null { + return null; + }, +})); + +vi.mock('../services/copilot.js', () => ({ + fetchModels: vi.fn(async () => [{ id: 'gpt-5', label: 'GPT-5' }]), + getModel: vi.fn(() => 'gpt-5'), + setModel: vi.fn(), + getModelLabel: vi.fn(() => 'GPT-5'), +})); + +vi.mock('../services/persistence.js', () => ({ + listPlans: vi.fn(async () => [{ id: 'plan-1', name: 'Plan One', updatedAt: '2026-02-01T00:00:00.000Z' }]), + loadPlan: vi.fn(async () => ({ + id: 'plan-1', + name: 'Plan One', + description: 'Demo', + createdAt: '2026-02-01T00:00:00.000Z', + updatedAt: '2026-02-01T00:00:00.000Z', + tasks: [], + })), + savePlan: vi.fn(async () => undefined), + summarizePlan: vi.fn(async () => '.planeteer/plan-1.md'), +})); + +vi.mock('../services/planner.js', () => ({ + refineWBS: vi.fn(async (tasks: Plan['tasks']) => tasks), + generateWBS: vi.fn(async () => [ + { + id: 'task-a', + title: 'Task A', + description: '', + acceptanceCriteria: [], + dependsOn: [], + status: 'pending', + }, + ]), +})); + +vi.mock('../services/executor.js', () => ({ + executePlan: vi.fn((plan: Plan, callbacks: Record void>) => { + const completedPlan: Plan = { + ...plan, + tasks: plan.tasks.map((t) => ({ ...t, status: 'done' as const })), + }; + callbacks.onTaskStart?.('project-init'); + callbacks.onTaskDone?.('project-init', 'Init complete'); + for (const task of plan.tasks) { + callbacks.onTaskStart?.(task.id); + callbacks.onTaskDone?.(task.id, `Done: ${task.id}`); + } + callbacks.onAllDone?.(completedPlan); + return { retryTask: vi.fn(), done: Promise.resolve(completedPlan) }; + }), +})); + +vi.mock('../services/validator.js', () => ({ + validatePlan: vi.fn((plan: Plan, callbacks: Record void>) => { + for (const task of plan.tasks) { + callbacks.onTaskStart?.(task.id); + callbacks.onTaskDone?.(task.id, { + taskId: task.id, + taskTitle: task.title, + status: 'pass', + criteriaResults: task.acceptanceCriteria.map((criterion) => ({ + criterion, + verdict: 'pass', + })), + summary: 'All criteria passed.', + }); + } + callbacks.onAllDone?.({ + planId: plan.id, + planName: plan.name, + generatedAt: '2026-02-01T00:00:00.000Z', + totalCriteria: 1, + overallPass: 1, + overallFail: 0, + overallPartial: 0, + overallUnknown: 0, + taskResults: [{ + taskId: plan.tasks[0]?.id ?? 'task-a', + taskTitle: plan.tasks[0]?.title ?? 'Task A', + status: 'pass', + criteriaResults: [{ + criterion: plan.tasks[0]?.acceptanceCriteria[0] ?? 'criterion', + verdict: 'pass', + }], + summary: 'All criteria passed.', + }], + }); + }), +})); + +const basePlan: Plan = { + id: 'plan-1', + name: 'Plan One', + description: 'Demo', + createdAt: '2026-02-01T00:00:00.000Z', + updatedAt: '2026-02-01T00:00:00.000Z', + tasks: [ + { + id: 'task-a', + title: 'Task A', + description: 'A', + acceptanceCriteria: ['A passes'], + dependsOn: [], + status: 'pending', + }, + { + id: 'task-b', + title: 'Task B', + description: 'B', + acceptanceCriteria: ['B passes'], + dependsOn: ['task-a'], + status: 'pending', + }, + ], +}; + +function assertNoFormattingOrFlicker(frames: string[], width: number): void { + const nonEmptyFrames = frames.filter((f) => f.trim().length > 0); + expect(nonEmptyFrames.length).toBeGreaterThan(0); + + for (const frame of nonEmptyFrames) { + expect(frame).not.toContain('\u001B['); + for (const line of frame.split('\n')) { + expect(line.length).toBeLessThanOrEqual(width + 2); + } + } + + if (nonEmptyFrames.length > 1) { + const consecutiveDuplicates = nonEmptyFrames + .slice(1) + .filter((frame, idx) => frame === nonEmptyFrames[idx]).length; + expect(consecutiveDuplicates).toBeLessThan(nonEmptyFrames.length); + } +} + +describe('CLI integration simulator', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('covers home commands: /x, /v, /r, /z and m', async () => { + const onLoadPlan = vi.fn(); + const onExecutePlan = vi.fn(); + const onValidatePlan = vi.fn(); + + await simulateSession( + , + { + width: 100, + steps: [ + { input: 'm' }, + { input: '\u001B' }, + { input: '/' }, + { input: 'x' }, + { input: '/' }, + { input: 'v' }, + { input: '/' }, + { input: 'r' }, + { input: '/' }, + { input: 'z', waitMs: 40 }, + ], + }, + ); + + expect(onExecutePlan).toHaveBeenCalledWith('plan-1'); + expect(onValidatePlan).toHaveBeenCalledWith('plan-1'); + expect(onLoadPlan).toHaveBeenCalledWith('plan-1'); + }); + + it('covers breakdown commands: tab, arrows and enter', async () => { + const onPlanReady = vi.fn(); + await simulateSession( + , + { + steps: [ + { input: '\u001B[B' }, + { input: '\u001B[A' }, + { input: '\t' }, + { input: '\r' }, + ], + }, + ); + + expect(onPlanReady).toHaveBeenCalled(); + }); + + it('covers refine commands: /x /v /s /z and []', async () => { + const onExecute = vi.fn(); + const onValidate = vi.fn(); + const onPlanUpdated = vi.fn(); + + await simulateSession( + , + { + steps: [ + { input: ']' }, + { input: '[' }, + { input: '/' }, + { input: 'x' }, + { input: '/' }, + { input: 'v' }, + { input: '/' }, + { input: 's', waitMs: 40 }, + { input: '/' }, + { input: 'z', waitMs: 40 }, + ], + }, + ); + + expect(onExecute).toHaveBeenCalled(); + expect(onValidate).toHaveBeenCalled(); + expect(onPlanUpdated).toHaveBeenCalled(); + }); + + it('covers execute commands: x, arrows, r and z', async () => { + const onDone = vi.fn(); + const result = await simulateSession( + , + { + width: 100, + steps: [ + { input: 'x', waitMs: 50 }, + { input: '\u001B[C' }, + { input: '\u001B[D' }, + { input: '\u001B[B' }, + { input: '\u001B[A' }, + { input: 'r' }, + { input: 'z', waitMs: 50 }, + ], + }, + ); + + expect(onDone).toHaveBeenCalled(); + assertNoFormattingOrFlicker(result.frames, 100); + }); + + it('covers validate commands: v and arrows', async () => { + const result = await simulateSession( + , + { + width: 100, + steps: [ + { input: 'v', waitMs: 40 }, + { input: '\u001B[B' }, + { input: '\u001B[A' }, + ], + }, + ); + + assertNoFormattingOrFlicker(result.frames, 100); + }); +}); diff --git a/src/screens/execute.tsx b/src/screens/execute.tsx index 2302ff1..a3a872e 100644 --- a/src/screens/execute.tsx +++ b/src/screens/execute.tsx @@ -2,9 +2,11 @@ import React, { useState, useEffect, useRef } from 'react'; import { Box, Text, useInput } from 'ink'; import type { Plan, Task } from '../models/plan.js'; import { executePlan } from '../services/executor.js'; -import type { ExecutionOptions, ExecutionHandle } from '../services/executor.js'; +import type { ExecutionOptions, ExecutionHandle, SessionEventWithTask } from '../services/executor.js'; import { savePlan, summarizePlan } from '../services/persistence.js'; import { computeBatches } from '../utils/dependency-graph.js'; +import { detectOrphanedSessions, cleanupOrphanedSessions, markTasksAsInterrupted } from '../services/session-recovery.js'; +import type { OrphanedSessionInfo } from '../services/session-recovery.js'; import Spinner from '../components/spinner.js'; import StatusBar from '../components/status-bar.js'; @@ -20,6 +22,7 @@ const STATUS_ICON: Record = { in_progress: '◉', done: '✓', failed: '✗', + interrupted: '⊗', }; const STATUS_COLOR: Record = { @@ -27,6 +30,7 @@ const STATUS_COLOR: Record = { in_progress: 'yellow', done: 'green', failed: 'red', + interrupted: 'magenta', }; export default function ExecuteScreen({ @@ -46,6 +50,11 @@ export default function ExecuteScreen({ const [runCount, setRunCount] = useState(0); // incremented to re-trigger execution const execHandleRef = useRef(null); const [summarized, setSummarized] = useState(''); + const [sessionEvents, setSessionEvents] = useState([]); + const [taskContexts, setTaskContexts] = useState>({}); + const [orphanedSessions, setOrphanedSessions] = useState([]); + const [recoveryMode, setRecoveryMode] = useState<'pending' | 'cleaning' | 'fresh' | 'none'>('pending'); + const [recoveryError, setRecoveryError] = useState(''); const { batches } = computeBatches(plan.tasks); // Total display batches: init batch (index 0) + real batches @@ -53,7 +62,7 @@ export default function ExecuteScreen({ useInput((ch, key) => { if (key.escape && !executing) onBack(); - if (ch === 'x' && !started) { + if (ch === 'x' && !started && recoveryMode !== 'pending') { setStarted(true); setExecuting(true); setRunCount((c) => c + 1); @@ -83,18 +92,18 @@ export default function ExecuteScreen({ execHandleRef.current.retryTask(selected.id); } else if (!executing) { // Retry all failed tasks when execution has stopped - const hasFailed = currentPlan.tasks.some((t) => t.status === 'failed'); + const hasFailed = currentPlan.tasks.some((t) => t.status === 'failed' || t.status === 'interrupted'); if (hasFailed) { setCurrentPlan((p) => ({ ...p, tasks: p.tasks.map((t) => - t.status === 'failed' ? { ...t, status: 'pending' as const, agentResult: undefined } : t, + t.status === 'failed' || t.status === 'interrupted' ? { ...t, status: 'pending' as const, agentResult: undefined } : t, ), })); setTaskStreams((prev) => { const next = { ...prev }; for (const t of currentPlan.tasks) { - if (t.status === 'failed') delete next[t.id]; + if (t.status === 'failed' || t.status === 'interrupted') delete next[t.id]; } return next; }); @@ -129,6 +138,58 @@ export default function ExecuteScreen({ } }); + // Detect orphaned sessions on mount + useEffect(() => { + detectOrphanedSessions(currentPlan).then((orphaned) => { + setOrphanedSessions(orphaned); + if (orphaned.length === 0) { + setRecoveryMode('none'); + } + }).catch((err) => { + console.error('Failed to detect orphaned sessions:', err); + setRecoveryError(`Failed to detect sessions: ${err instanceof Error ? err.message : String(err)}`); + setRecoveryMode('none'); // Proceed anyway + }); + }, []); + + // Handle recovery mode selection + useInput((ch) => { + if (recoveryMode === 'pending' && orphanedSessions.length > 0) { + if (ch === '1') { + // Mark as interrupted and continue + const updatedPlan = markTasksAsInterrupted(currentPlan, orphanedSessions); + setCurrentPlan(updatedPlan); + savePlan(updatedPlan).catch(() => {}); + setRecoveryMode('fresh'); + } else if (ch === '2') { + // Mark as interrupted and cleanup sessions (recommended) + setRecoveryMode('cleaning'); + cleanupOrphanedSessions(orphanedSessions).then(() => { + const updatedPlan = markTasksAsInterrupted(currentPlan, orphanedSessions); + setCurrentPlan(updatedPlan); + savePlan(updatedPlan).catch(() => {}); + setRecoveryMode('fresh'); + }).catch((err) => { + console.error('Failed to cleanup sessions:', err); + setRecoveryError(`Cleanup failed: ${err instanceof Error ? err.message : String(err)}`); + setRecoveryMode('fresh'); // Proceed anyway + }); + } else if (ch === '3') { + // Cleanup and go back + setRecoveryMode('cleaning'); + cleanupOrphanedSessions(orphanedSessions).then(() => { + const updatedPlan = markTasksAsInterrupted(currentPlan, orphanedSessions); + savePlan(updatedPlan).catch(() => {}); + onBack(); + }).catch((err) => { + console.error('Failed to cleanup sessions:', err); + setRecoveryError(`Cleanup failed: ${err instanceof Error ? err.message : String(err)}`); + onBack(); // Go back anyway + }); + } + } + }); + useEffect(() => { if (!started || !executing || runCount === 0) return; @@ -195,6 +256,28 @@ export default function ExecuteScreen({ } // Otherwise stay on execute screen — user can press 'r' to retry }, + onSessionEvent: (eventWithTask) => { + const { event, taskId } = eventWithTask; + + // Only store session events that we actually render / use, and keep history bounded + if (event.type === 'session.start' && event.data.context) { + const { cwd, repository, branch } = event.data.context; + setSessionEvents((prev) => { + const updated = [...prev, eventWithTask]; + return updated.slice(-100); // Keep last 100 events + }); + setTaskContexts((prev) => ({ + ...prev, + [taskId]: { cwd, repository, branch }, + })); + } + }, + onPlanUpdate: (updatedPlan) => { + // Incremental save after each task completes or fails + savePlan(updatedPlan).catch(() => { + // Ignore save errors during execution to avoid breaking the flow + }); + }, }, execOptions); execHandleRef.current = handle; @@ -202,6 +285,7 @@ export default function ExecuteScreen({ const doneCount = currentPlan.tasks.filter((t) => t.status === 'done').length; const failedCount = currentPlan.tasks.filter((t) => t.status === 'failed').length; + const interruptedCount = currentPlan.tasks.filter((t) => t.status === 'interrupted').length; const totalCount = currentPlan.tasks.length; // Build the list of displayable tasks for the current view index @@ -241,8 +325,60 @@ export default function ExecuteScreen({ {doneCount} /{totalCount} done {failedCount > 0 && ({failedCount} failed)} + {interruptedCount > 0 && ({interruptedCount} interrupted)} + {/* Session Recovery UI */} + {recoveryMode === 'cleaning' && ( + + + + + + )} + + {recoveryError && ( + + ⚠ {recoveryError} + + )} + + {recoveryMode === 'pending' && orphanedSessions.length > 0 && ( + + + ⚠ Orphaned Sessions Detected + + + + Found {orphanedSessions.length} interrupted task{orphanedSessions.length > 1 ? 's' : ''} with active sessions: + + + {orphanedSessions.map((o, i) => ( + + • {o.task.title} (session: {o.sessionId.slice(0, 8)}...) + + ))} + + Choose an option: + + + + 1 — Mark as interrupted and continue (keeps sessions) + + + + + 2 — Mark as interrupted and cleanup sessions (recommended) + + + + + 3 — Cleanup sessions and go back + + + + )} + {/* Progress bar */} {started && ( @@ -251,7 +387,7 @@ export default function ExecuteScreen({ )} - {!started && ( + {!started && recoveryMode !== 'pending' && ( Press x to start — init files (README.md, .gitignore) then {totalCount} tasks in {batches.length} batches @@ -318,18 +454,29 @@ export default function ExecuteScreen({ const isSelected = i === selectedTaskIndex; const icon = STATUS_ICON[task.status] ?? '?'; const color = STATUS_COLOR[task.status] ?? 'gray'; + const context = taskContexts[task.id]; return ( - - {isSelected ? '❯ ' : ' '} - {icon} - - {task.id} - - — {task.title} - {task.status === 'in_progress' && ( - + + + {isSelected ? '❯ ' : ' '} + {icon} + + {task.id} + + — {task.title} + {task.status === 'in_progress' && ( + + )} + {task.status === 'in_progress' && } + + {context?.cwd && ( + + 📁 {context.cwd} + {context.repository && ( + ({context.repository}) + )} + )} - {task.status === 'in_progress' && } ); })} @@ -376,13 +523,39 @@ export default function ExecuteScreen({ )} + {/* Context change events for selected task */} + {started && selectedTask && (() => { + const taskEvents = sessionEvents.filter( + (e) => e.taskId === selectedTask.id && e.event.type === 'session.start' && !!e.event.data.context + ); + if (taskEvents.length === 0) return null; + + return ( + + Context Changes: + {taskEvents.slice(-3).map((e) => { + if (e.event.type !== 'session.start' || !e.event.data.context) return null; + const time = new Date(e.event.timestamp).toLocaleTimeString(); + const { cwd, repository, branch } = e.event.data.context; + return ( + + {time} + → {cwd} + {repository && ({repository}{branch ? `@${branch}` : ''})} + + ); + })} + + ); + })()} + {/* Retry prompt when there are failures */} - {started && !executing && failedCount > 0 && ( + {started && !executing && (failedCount > 0 || interruptedCount > 0) && ( - {failedCount} task{failedCount > 1 ? 's' : ''} failed. + {failedCount + interruptedCount} task{failedCount + interruptedCount > 1 ? 's' : ''} failed or interrupted. - Press r to retry failed tasks. + Press r to retry. )} @@ -408,11 +581,13 @@ export default function ExecuteScreen({ ? '←→: switch batch ↑↓: select task r: retry task ⏳ executing...' : executing ? '←→: switch batch ↑↓: select task ⏳ executing...' - : started && failedCount > 0 + : started && (failedCount > 0 || interruptedCount > 0) ? '←→: switch batch ↑↓: select task r: retry z: summarize esc: back' : started ? '←→: switch batch ↑↓: select task z: summarize esc: back' - : 'x: start esc: back' + : recoveryMode === 'pending' + ? '1: keep sessions 2: cleanup sessions 3: cleanup & back' + : 'x: start esc: back' } /> diff --git a/src/screens/refine.tsx b/src/screens/refine.tsx index 40bd325..57aeb71 100644 --- a/src/screens/refine.tsx +++ b/src/screens/refine.tsx @@ -1,9 +1,10 @@ import React, { useState, useCallback } from 'react'; import { Box, Text, useInput } from 'ink'; import TextInput from 'ink-text-input'; -import type { Plan, Task } from '../models/plan.js'; +import type { Plan, Task, SkillConfig } from '../models/plan.js'; import { refineWBS } from '../services/planner.js'; import { savePlan, summarizePlan } from '../services/persistence.js'; +import { getSkillOptions } from '../services/copilot.js'; import { detectCycles, computeBatches } from '../utils/dependency-graph.js'; import TaskTree from '../components/task-tree.js'; import BatchView from '../components/batch-view.js'; @@ -12,7 +13,7 @@ import Spinner from '../components/spinner.js'; import StreamingText from '../components/streaming-text.js'; import StatusBar from '../components/status-bar.js'; -type ViewMode = 'tree' | 'batch'; +type ViewMode = 'tree' | 'batch' | 'skills'; interface RefineScreenProps { plan: Plan; @@ -41,6 +42,19 @@ export default function RefineScreen({ const [editingTask, setEditingTask] = useState(null); const [commandMode, setCommandMode] = useState(false); + const toggleSkill = useCallback( + (skillName: string) => { + const skills = currentPlan.skills || []; + const updatedSkills = skills.map((s) => + s.name === skillName ? { ...s, enabled: !s.enabled } : s + ); + const updated = { ...currentPlan, skills: updatedSkills, updatedAt: new Date().toISOString() }; + setCurrentPlan(updated); + onPlanUpdated(updated); + }, + [currentPlan, onPlanUpdated] + ); + const moveTask = useCallback( (direction: 'up' | 'down') => { const tasks = [...currentPlan.tasks]; @@ -101,15 +115,28 @@ export default function RefineScreen({ } if (key.tab) { - setViewMode((v) => (v === 'tree' ? 'batch' : 'tree')); + setViewMode((v) => { + if (v === 'tree') return 'batch'; + if (v === 'batch') return 'skills'; + return 'tree'; + }); } else if (key.upArrow) { setSelectedIndex((i) => Math.max(0, i - 1)); } else if (key.downArrow) { - setSelectedIndex((i) => Math.min(currentPlan.tasks.length - 1, i + 1)); + if (viewMode === 'skills') { + const skills = currentPlan.skills || []; + setSelectedIndex((i) => Math.min(skills.length - 1, i + 1)); + } else { + setSelectedIndex((i) => Math.min(currentPlan.tasks.length - 1, i + 1)); + } } else if (ch === '[') { moveTask('up'); } else if (ch === ']') { moveTask('down'); + } else if (ch === ' ' && viewMode === 'skills') { + const skills = currentPlan.skills || []; + const skill = skills[selectedIndex]; + if (skill) toggleSkill(skill.name); } }); @@ -133,9 +160,12 @@ export default function RefineScreen({ setStreamText(''); setInput(''); - refineWBS(currentPlan.tasks, value, (_delta, fullText) => { - setStreamText(fullText); - }) + getSkillOptions() + .then((skillOptions) => + refineWBS(currentPlan.tasks, value, (_delta, fullText) => { + setStreamText(fullText); + }, skillOptions) + ) .then((tasks) => { const updated = { ...currentPlan, tasks, updatedAt: new Date().toISOString() }; setCurrentPlan(updated); @@ -173,6 +203,10 @@ export default function RefineScreen({ 📦 Batches + / + + 🎯 Skills + {cycles.length > 0 && ( @@ -190,8 +224,38 @@ export default function RefineScreen({ <> {viewMode === 'tree' ? ( - ) : ( + ) : viewMode === 'batch' ? ( + ) : ( + + + Active Skills + {(!currentPlan.skills || currentPlan.skills.length === 0) && ( + — no skills configured + )} + + {currentPlan.skills && currentPlan.skills.length > 0 ? ( + currentPlan.skills.map((skill, idx) => ( + + + {idx === selectedIndex ? '❯ ' : ' '} + + + {skill.enabled ? '✓' : '○'} {skill.name} + + + )) + ) : ( + + No custom skills found in .github/skills/ + + )} + + + Use ↑↓ to select, [space] to toggle, ⇥ to switch view + + + )} @@ -239,7 +303,10 @@ export default function RefineScreen({ ); diff --git a/src/services/copilot.test.ts b/src/services/copilot.test.ts new file mode 100644 index 0000000..3c29cbd --- /dev/null +++ b/src/services/copilot.test.ts @@ -0,0 +1,129 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { mkdir, writeFile, rm } from 'node:fs/promises'; +import { join } from 'node:path'; +import { existsSync } from 'node:fs'; +import { + ensureSkillsDirectory, + getSkillsDirectory, + listSkillFiles, + loadSkillConfigs, + getSkillOptions +} from './copilot.js'; + +const TEST_DIR = join(process.cwd(), '.planeteer-test'); +const TEST_SKILLS_DIR = join(TEST_DIR, 'skills'); + +describe('Skill Configuration', () => { + beforeEach(async () => { + // Clean up test directory + if (existsSync(TEST_DIR)) { + await rm(TEST_DIR, { recursive: true, force: true }); + } + }); + + afterEach(async () => { + // Clean up test directory + if (existsSync(TEST_DIR)) { + await rm(TEST_DIR, { recursive: true, force: true }); + } + }); + + describe('ensureSkillsDirectory', () => { + it('should create skills directory if it does not exist', async () => { + await ensureSkillsDirectory(); + const skillsDir = getSkillsDirectory(); + expect(existsSync(skillsDir)).toBe(true); + }); + + it('should not fail if skills directory already exists', async () => { + await ensureSkillsDirectory(); + await ensureSkillsDirectory(); // Should not throw + const skillsDir = getSkillsDirectory(); + expect(existsSync(skillsDir)).toBe(true); + }); + }); + + describe('listSkillFiles', () => { + it('should return empty array when no skill files exist', async () => { + await ensureSkillsDirectory(); + // Note: In test environment, example skills may already exist + const files = await listSkillFiles(); + expect(Array.isArray(files)).toBe(true); + // Files should only contain .yaml or .yml extensions + files.forEach(file => { + expect(file.endsWith('.yaml') || file.endsWith('.yml')).toBe(true); + }); + }); + + it('should list YAML skill files', async () => { + await ensureSkillsDirectory(); + const skillsDir = getSkillsDirectory(); + + await writeFile(join(skillsDir, 'skill1.yaml'), 'name: skill1\n'); + await writeFile(join(skillsDir, 'skill2.yml'), 'name: skill2\n'); + await writeFile(join(skillsDir, 'not-a-skill.txt'), 'ignore me\n'); + + const files = await listSkillFiles(); + expect(files).toContain('skill1.yaml'); + expect(files).toContain('skill2.yml'); + expect(files).not.toContain('not-a-skill.txt'); + }); + }); + + describe('loadSkillConfigs', () => { + it('should load skill configurations from existing files', async () => { + await ensureSkillsDirectory(); + const configs = await loadSkillConfigs(); + expect(Array.isArray(configs)).toBe(true); + // Should include the example-web-app-skill.yaml if it exists + if (configs.length > 0) { + expect(configs.every(c => c.enabled === true)).toBe(true); + expect(configs.every(c => typeof c.name === 'string')).toBe(true); + } + }); + + it('should load multiple skill configurations', async () => { + const configs = await loadSkillConfigs(); + expect(Array.isArray(configs)).toBe(true); + // All configs should have name and enabled properties + configs.forEach(config => { + expect(config).toHaveProperty('name'); + expect(config).toHaveProperty('enabled'); + expect(typeof config.name).toBe('string'); + expect(typeof config.enabled).toBe('boolean'); + }); + }); + + it('should gracefully handle malformed skill files', async () => { + const configs = await loadSkillConfigs(); + // Should successfully load at least the valid example skill + expect(Array.isArray(configs)).toBe(true); + }); + }); + + describe('getSkillOptions', () => { + it('should return skillDirectories when skills directory exists', async () => { + await ensureSkillsDirectory(); + const options = await getSkillOptions(); + + const skillFiles = await listSkillFiles(); + if (skillFiles.length > 0) { + expect(options).toHaveProperty('skillDirectories'); + expect(Array.isArray(options.skillDirectories)).toBe(true); + } else { + expect(options).toEqual({}); + } + }); + + it('should return skillDirectories path correctly', async () => { + await ensureSkillsDirectory(); + const skillsDir = getSkillsDirectory(); + const options = await getSkillOptions(); + + const skillFiles = await listSkillFiles(); + if (skillFiles.length > 0) { + expect(options.skillDirectories![0]).toBe(skillsDir); + } + }); + }); +}); diff --git a/src/services/copilot.ts b/src/services/copilot.ts index b691e58..b7cddac 100644 --- a/src/services/copilot.ts +++ b/src/services/copilot.ts @@ -1,13 +1,24 @@ import { CopilotClient } from '@github/copilot-sdk'; -import { readFile, writeFile, mkdir } from 'node:fs/promises'; +import { readFile, writeFile, mkdir, readdir } from 'node:fs/promises'; +import type { SessionEvent } from '@github/copilot-sdk'; import { join } from 'node:path'; import { existsSync } from 'node:fs'; -import type { ChatMessage } from '../models/plan.js'; +import type { ChatMessage, SkillConfig } from '../models/plan.js'; + +// Re-export SessionEvent for use in other modules +export type { SessionEvent }; const SETTINGS_PATH = join(process.cwd(), '.planeteer', 'settings.json'); +const SKILLS_DIR = join(process.cwd(), '.github', 'skills'); interface Settings { model?: string; + disabledSkills?: string[]; +} + +export interface SkillOptions { + skillDirectories?: string[]; + disabledSkills?: string[]; } async function loadSettings(): Promise { @@ -26,6 +37,68 @@ async function saveSettings(settings: Settings): Promise { await writeFile(SETTINGS_PATH, JSON.stringify(settings, null, 2), 'utf-8'); } +/** Ensure the skills directory exists */ +export async function ensureSkillsDirectory(): Promise { + if (!existsSync(SKILLS_DIR)) { + await mkdir(SKILLS_DIR, { recursive: true }); + } +} + +/** Get the path to the skills directory */ +export function getSkillsDirectory(): string { + return SKILLS_DIR; +} + +/** List all skill files in the skills directory */ +export async function listSkillFiles(): Promise { + try { + if (!existsSync(SKILLS_DIR)) { + return []; + } + const files = await readdir(SKILLS_DIR); + return files.filter(f => f.endsWith('.yaml') || f.endsWith('.yml')); + } catch (err) { + console.error('Error listing skill files:', err); + return []; + } +} + +/** Load skill configurations from the skills directory */ +export async function loadSkillConfigs(): Promise { + const skillFiles = await listSkillFiles(); + const skills: SkillConfig[] = []; + + for (const file of skillFiles) { + try { + const content = await readFile(join(SKILLS_DIR, file), 'utf-8'); + // Parse YAML to extract skill name - simple parsing for name field + const nameMatch = content.match(/^name:\s*(.+)$/m); + if (nameMatch && nameMatch[1]) { + const name = nameMatch[1].trim(); + skills.push({ name, enabled: true }); + } + } catch (err) { + console.error(`Error loading skill file ${file}:`, err); + // Graceful degradation - skip malformed files + } + } + + return skills; +} + +/** Get skill options for Copilot SDK */ +export async function getSkillOptions(): Promise { + const skillFiles = await listSkillFiles(); + + if (skillFiles.length === 0) { + return {}; + } + + return { + skillDirectories: [SKILLS_DIR], + }; +} + export interface ModelEntry { id: string; label: string; @@ -108,12 +181,15 @@ export interface StreamCallbacks { onDelta: (text: string) => void; onDone: (fullText: string) => void; onError: (error: Error) => void; + onSessionEvent?: (event: SessionEvent) => void; + onSessionStart?: (sessionId: string) => void; } export async function sendPrompt( systemPrompt: string, messages: ChatMessage[], callbacks: StreamCallbacks, + skillOptions?: SkillOptions, ): Promise { let copilot: CopilotClient; try { @@ -125,18 +201,49 @@ export async function sendPrompt( let session; try { - session = await copilot.createSession({ + interface SessionConfigWithSkills { + model: string; + streaming: boolean; + skillDirectories?: string[]; + disabledSkills?: string[]; + } + + const sessionConfig: SessionConfigWithSkills = { model: currentModel, streaming: true, - }); + }; + + if (skillOptions?.skillDirectories && skillOptions.skillDirectories.length > 0) { + sessionConfig.skillDirectories = skillOptions.skillDirectories; + } + + if (skillOptions?.disabledSkills && skillOptions.disabledSkills.length > 0) { + sessionConfig.disabledSkills = skillOptions.disabledSkills; + } + + session = await copilot.createSession(sessionConfig); } catch (err) { callbacks.onError(new Error(`Failed to create session: ${(err as Error).message}`)); return; } + // Notify caller of the session ID for persistence tracking + callbacks.onSessionStart?.(session.sessionId); + let fullText = ''; let settled = false; + // Listen for session events if callback provided, but avoid forwarding + // high-volume delta events that are already handled by onDelta. + if (callbacks.onSessionEvent) { + session.on((event: SessionEvent) => { + if (event.type === 'assistant.message_delta') { + return; + } + callbacks.onSessionEvent?.(event); + }); + } + session.on('assistant.message_delta', (event: { data: { deltaContent: string } }) => { fullText += event.data.deltaContent; callbacks.onDelta(event.data.deltaContent); @@ -176,10 +283,19 @@ export async function sendPrompt( export async function sendPromptSync( systemPrompt: string, messages: ChatMessage[], - options?: { timeoutMs?: number; onDelta?: (delta: string, fullText: string) => void }, + options?: { + timeoutMs?: number; + onDelta?: (delta: string, fullText: string) => void; + onSessionEvent?: (event: SessionEvent) => void; + onSessionStart?: (sessionId: string) => void; + skillOptions?: SkillOptions; + }, ): Promise { const idleTimeoutMs = options?.timeoutMs ?? 120_000; const onDelta = options?.onDelta; + const onSessionEvent = options?.onSessionEvent; + const onSessionStart = options?.onSessionStart; + const skillOptions = options?.skillOptions; return new Promise((resolve, reject) => { let settled = false; @@ -235,6 +351,26 @@ export async function sendPromptSync( reject(err); } }, - }); + onSessionEvent, + onSessionStart, + }, skillOptions); }); } + +/** List all available Copilot sessions */ +export async function listSessions(): Promise> { + const c = await getClient(); + return c.listSessions(); +} + +/** Delete a Copilot session by ID */ +export async function deleteSession(sessionId: string): Promise { + const c = await getClient(); + await c.deleteSession(sessionId); +} + +/** Resume an existing Copilot session by ID */ +export async function resumeSession(sessionId: string): Promise { + const c = await getClient(); + return c.resumeSession(sessionId); +} diff --git a/src/services/executor.test.ts b/src/services/executor.test.ts new file mode 100644 index 0000000..d154c82 --- /dev/null +++ b/src/services/executor.test.ts @@ -0,0 +1,116 @@ +import { describe, it, expect, vi } from 'vitest'; +import type { ExecutionCallbacks, SessionEventWithTask } from './executor.js'; +import type { SessionEvent } from './copilot.js'; + +describe('SessionEventWithTask type', () => { + it('should correctly structure context change events with task ID', () => { + const mockEvent: SessionEvent = { + id: 'evt-123', + timestamp: new Date().toISOString(), + parentId: null, + type: 'session.context_changed', + data: { + cwd: '/home/user/project', + gitRoot: '/home/user/project', + repository: 'owner/repo', + branch: 'main', + }, + }; + + const eventWithTask: SessionEventWithTask = { + taskId: 'task-1', + event: mockEvent, + }; + + expect(eventWithTask.taskId).toBe('task-1'); + expect(eventWithTask.event.type).toBe('session.context_changed'); + if (eventWithTask.event.type === 'session.context_changed') { + expect(eventWithTask.event.data.cwd).toBe('/home/user/project'); + expect(eventWithTask.event.data.repository).toBe('owner/repo'); + expect(eventWithTask.event.data.branch).toBe('main'); + } + }); + + it('should handle session.start events with context', () => { + const mockEvent: SessionEvent = { + id: 'evt-456', + timestamp: new Date().toISOString(), + parentId: null, + type: 'session.start', + data: { + sessionId: 'sess-123', + version: 1, + producer: 'test', + copilotVersion: '0.1.24', + startTime: new Date().toISOString(), + context: { + cwd: '/workspace', + gitRoot: '/workspace', + repository: 'test/repo', + branch: 'feature', + }, + }, + }; + + const eventWithTask: SessionEventWithTask = { + taskId: 'init-task', + event: mockEvent, + }; + + expect(eventWithTask.taskId).toBe('init-task'); + expect(eventWithTask.event.type).toBe('session.start'); + if (eventWithTask.event.type === 'session.start' && eventWithTask.event.data.context) { + expect(eventWithTask.event.data.context.cwd).toBe('/workspace'); + expect(eventWithTask.event.data.context.repository).toBe('test/repo'); + } + }); +}); + +describe('ExecutionCallbacks with session events', () => { + it('should define onSessionEvent callback as optional', () => { + const callbacks: ExecutionCallbacks = { + onTaskStart: vi.fn(), + onTaskDelta: vi.fn(), + onTaskDone: vi.fn(), + onTaskFailed: vi.fn(), + onBatchComplete: vi.fn(), + onAllDone: vi.fn(), + // onSessionEvent is optional + }; + + expect(callbacks.onSessionEvent).toBeUndefined(); + }); + + it('should accept onSessionEvent callback', () => { + const sessionEventHandler = vi.fn(); + const callbacks: ExecutionCallbacks = { + onTaskStart: vi.fn(), + onTaskDelta: vi.fn(), + onTaskDone: vi.fn(), + onTaskFailed: vi.fn(), + onBatchComplete: vi.fn(), + onAllDone: vi.fn(), + onSessionEvent: sessionEventHandler, + }; + + expect(callbacks.onSessionEvent).toBeDefined(); + expect(typeof callbacks.onSessionEvent).toBe('function'); + + // Test that it can be called with the correct structure + const mockEvent: SessionEvent = { + id: 'evt-789', + timestamp: new Date().toISOString(), + parentId: null, + type: 'session.context_changed', + data: { + cwd: '/test', + }, + }; + + callbacks.onSessionEvent?.({ taskId: 'test-task', event: mockEvent }); + expect(sessionEventHandler).toHaveBeenCalledWith({ + taskId: 'test-task', + event: mockEvent, + }); + }); +}); diff --git a/src/services/executor.ts b/src/services/executor.ts index 1edf492..7573d1d 100644 --- a/src/services/executor.ts +++ b/src/services/executor.ts @@ -1,7 +1,13 @@ import type { Plan, Task } from '../models/plan.js'; import { sendPromptSync } from './copilot.js'; +import type { SessionEvent } from './copilot.js'; import { getReadyTasks } from '../utils/dependency-graph.js'; +export interface SessionEventWithTask { + taskId: string; + event: SessionEvent; +} + export interface ExecutionCallbacks { onTaskStart: (taskId: string) => void; onTaskDelta: (taskId: string, delta: string, fullText: string) => void; @@ -9,6 +15,8 @@ export interface ExecutionCallbacks { onTaskFailed: (taskId: string, error: string) => void; onBatchComplete: (batchIndex: number) => void; onAllDone: (plan: Plan) => void; + onSessionEvent?: (eventWithTask: SessionEventWithTask) => void; + onPlanUpdate?: (plan: Plan) => void; } function buildTaskPrompt(task: Task, plan: Plan, codebaseContext?: string): string { @@ -112,14 +120,22 @@ export function executePlan( onDelta: (delta, fullText) => { callbacks.onTaskDelta(task.id, delta, fullText); }, + onSessionEvent: (event) => { + callbacks.onSessionEvent?.({ taskId: task.id, event }); + }, + onSessionStart: (sessionId) => { + taskInPlan.sessionId = sessionId; + }, }); taskInPlan.status = 'done'; taskInPlan.agentResult = result; callbacks.onTaskDone(task.id, result); + callbacks.onPlanUpdate?.(updatedPlan); } catch (err) { taskInPlan.status = 'failed'; taskInPlan.agentResult = err instanceof Error ? err.message : String(err); callbacks.onTaskFailed(task.id, taskInPlan.agentResult!); + callbacks.onPlanUpdate?.(updatedPlan); } } @@ -136,6 +152,7 @@ export function executePlan( if (taskInPlan && taskInPlan.status === 'failed') { taskInPlan.status = 'pending'; taskInPlan.agentResult = undefined; + taskInPlan.sessionId = undefined; } } @@ -180,6 +197,9 @@ export function executePlan( onDelta: (delta, fullText) => { callbacks.onTaskDelta(INIT_TASK_ID, delta, fullText); }, + onSessionEvent: (event) => { + callbacks.onSessionEvent?.({ taskId: INIT_TASK_ID, event }); + }, }); callbacks.onTaskDone(INIT_TASK_ID, initResult); } catch (err) { diff --git a/src/services/planner.ts b/src/services/planner.ts index ef6bc6e..1804bed 100644 --- a/src/services/planner.ts +++ b/src/services/planner.ts @@ -1,5 +1,5 @@ -import type { ChatMessage, Task } from '../models/plan.js'; -import { sendPrompt, sendPromptSync, type StreamCallbacks } from './copilot.js'; +import type { ChatMessage, Task, SkillConfig } from '../models/plan.js'; +import { sendPrompt, sendPromptSync, type StreamCallbacks, type SkillOptions } from './copilot.js'; const CLARIFY_SYSTEM_PROMPT = `You are an expert project planner helping a user clarify the scope of their project. Ask focused clarifying questions to understand: @@ -100,11 +100,12 @@ export async function streamClarification( messages: ChatMessage[], callbacks: StreamCallbacks, codebaseContext?: string, + skillOptions?: SkillOptions, ): Promise { const systemPrompt = codebaseContext ? `${CLARIFY_SYSTEM_PROMPT}\n\n${codebaseContext}` : CLARIFY_SYSTEM_PROMPT; - return sendPrompt(systemPrompt, messages, callbacks); + return sendPrompt(systemPrompt, messages, callbacks, skillOptions); } /** Extract a JSON array from a response that may contain surrounding prose. */ @@ -131,18 +132,28 @@ export async function generateWBS( onDelta?: (delta: string, fullText: string) => void, maxRetries = 2, codebaseContext?: string, + skillOptions?: SkillOptions, + activeSkills?: SkillConfig[], ): Promise { let lastError: Error | null = null; - const userContent = codebaseContext + let userContent = codebaseContext ? `${scopeDescription}\n\n${codebaseContext}` : scopeDescription; + // Add skill context if active skills are provided + if (activeSkills && activeSkills.length > 0) { + const enabledSkills = activeSkills.filter((s) => s.enabled).map((s) => s.name); + if (enabledSkills.length > 0) { + userContent += `\n\nActive custom skills: ${enabledSkills.join(', ')}`; + } + } + for (let attempt = 0; attempt <= maxRetries; attempt++) { try { const result = await sendPromptSync(WBS_SYSTEM_PROMPT, [ { role: 'user', content: userContent }, - ], { onDelta }); + ], { onDelta, skillOptions }); const jsonStr = extractJsonArray(result); if (!jsonStr.startsWith('[')) { @@ -167,13 +178,14 @@ export async function refineWBS( currentTasks: Task[], refinementRequest: string, onDelta?: (delta: string, fullText: string) => void, + skillOptions?: SkillOptions, ): Promise { const result = await sendPromptSync(REFINE_SYSTEM_PROMPT, [ { role: 'user', content: `Current tasks:\n${JSON.stringify(currentTasks, null, 2)}\n\nRefinement request: ${refinementRequest}`, }, - ], { onDelta }); + ], { onDelta, skillOptions }); const jsonStr = extractJsonArray(result); const tasks = JSON.parse(jsonStr) as Task[]; diff --git a/src/services/session-recovery.test.ts b/src/services/session-recovery.test.ts new file mode 100644 index 0000000..4d858af --- /dev/null +++ b/src/services/session-recovery.test.ts @@ -0,0 +1,134 @@ +import { describe, it, expect, beforeEach } from 'vitest'; +import type { Plan, Task } from '../models/plan.js'; +import { markTasksAsInterrupted } from '../services/session-recovery.js'; +import type { OrphanedSessionInfo } from '../services/session-recovery.js'; + +describe('session-recovery', () => { + let samplePlan: Plan; + let sampleTask1: Task; + let sampleTask2: Task; + let sampleTask3: Task; + + beforeEach(() => { + sampleTask1 = { + id: 'task-1', + title: 'Task 1', + description: 'First task', + acceptanceCriteria: ['Criterion 1'], + dependsOn: [], + status: 'in_progress', + sessionId: 'session-123', + }; + + sampleTask2 = { + id: 'task-2', + title: 'Task 2', + description: 'Second task', + acceptanceCriteria: ['Criterion 2'], + dependsOn: ['task-1'], + status: 'pending', + }; + + sampleTask3 = { + id: 'task-3', + title: 'Task 3', + description: 'Third task', + acceptanceCriteria: ['Criterion 3'], + dependsOn: [], + status: 'done', + agentResult: 'Completed successfully', + }; + + samplePlan = { + id: 'plan-1', + name: 'Test Plan', + description: 'A test plan', + createdAt: '2024-01-01T00:00:00.000Z', + updatedAt: '2024-01-01T00:00:00.000Z', + tasks: [sampleTask1, sampleTask2, sampleTask3], + }; + }); + + describe('markTasksAsInterrupted', () => { + it('should mark specified tasks as interrupted', () => { + const orphanedSessions: OrphanedSessionInfo[] = [ + { + taskId: 'task-1', + sessionId: 'session-123', + task: sampleTask1, + sessionModifiedTime: new Date(), + }, + ]; + + const result = markTasksAsInterrupted(samplePlan, orphanedSessions); + + expect(result.tasks[0].status).toBe('interrupted'); + expect(result.tasks[1].status).toBe('pending'); + expect(result.tasks[2].status).toBe('done'); + }); + + it('should not modify tasks that are not orphaned', () => { + const orphanedSessions: OrphanedSessionInfo[] = []; + + const result = markTasksAsInterrupted(samplePlan, orphanedSessions); + + expect(result.tasks[0].status).toBe('in_progress'); + expect(result.tasks[1].status).toBe('pending'); + expect(result.tasks[2].status).toBe('done'); + }); + + it('should handle multiple orphaned sessions', () => { + const task4: Task = { + id: 'task-4', + title: 'Task 4', + description: 'Fourth task', + acceptanceCriteria: [], + dependsOn: [], + status: 'in_progress', + sessionId: 'session-456', + }; + + samplePlan.tasks.push(task4); + + const orphanedSessions: OrphanedSessionInfo[] = [ + { + taskId: 'task-1', + sessionId: 'session-123', + task: sampleTask1, + sessionModifiedTime: new Date(), + }, + { + taskId: 'task-4', + sessionId: 'session-456', + task: task4, + sessionModifiedTime: new Date(), + }, + ]; + + const result = markTasksAsInterrupted(samplePlan, orphanedSessions); + + expect(result.tasks[0].status).toBe('interrupted'); + expect(result.tasks[1].status).toBe('pending'); + expect(result.tasks[2].status).toBe('done'); + expect(result.tasks[3].status).toBe('interrupted'); + }); + + it('should return a new plan object without mutating the original', () => { + const orphanedSessions: OrphanedSessionInfo[] = [ + { + taskId: 'task-1', + sessionId: 'session-123', + task: sampleTask1, + sessionModifiedTime: new Date(), + }, + ]; + + const result = markTasksAsInterrupted(samplePlan, orphanedSessions); + + expect(result).not.toBe(samplePlan); + expect(result.tasks).not.toBe(samplePlan.tasks); + expect(samplePlan.tasks[0].status).toBe('in_progress'); // Original unchanged + expect(result.tasks[0].status).toBe('interrupted'); // Result changed + }); + }); +}); diff --git a/src/services/session-recovery.ts b/src/services/session-recovery.ts new file mode 100644 index 0000000..2c2bd19 --- /dev/null +++ b/src/services/session-recovery.ts @@ -0,0 +1,79 @@ +import type { Plan, Task } from '../models/plan.js'; +import { listSessions, deleteSession } from './copilot.js'; + +export interface OrphanedSessionInfo { + taskId: string; + sessionId: string; + task: Task; + sessionModifiedTime: Date; +} + +/** + * Detect tasks with sessionIds that may have orphaned sessions. + * Returns tasks that are marked as in_progress with session IDs. + */ +export async function detectOrphanedSessions(plan: Plan): Promise { + // Find tasks that were in progress (interrupted) + const tasksWithSessions = plan.tasks.filter( + (t) => t.status === 'in_progress' && t.sessionId + ); + + if (tasksWithSessions.length === 0) { + return []; + } + + try { + // Get all available sessions from the SDK + const allSessions = await listSessions(); + const sessionMap = new Map(allSessions.map((s) => [s.sessionId, s])); + + // Find orphaned sessions + const orphaned: OrphanedSessionInfo[] = []; + for (const task of tasksWithSessions) { + if (!task.sessionId) continue; + + const session = sessionMap.get(task.sessionId); + if (session) { + orphaned.push({ + taskId: task.id, + sessionId: task.sessionId, + task, + sessionModifiedTime: session.modifiedTime, + }); + } + } + + return orphaned; + } catch (err) { + // If we can't list sessions, assume no orphaned sessions + console.error('Failed to list sessions:', err); + return []; + } +} + +/** + * Cleanup orphaned sessions by deleting them from the SDK. + */ +export async function cleanupOrphanedSessions(orphanedSessions: OrphanedSessionInfo[]): Promise { + for (const orphaned of orphanedSessions) { + try { + await deleteSession(orphaned.sessionId); + } catch (err) { + console.error(`Failed to delete session ${orphaned.sessionId}:`, err); + } + } +} + +/** + * Mark interrupted tasks as interrupted status. + */ +export function markTasksAsInterrupted(plan: Plan, orphanedSessions: OrphanedSessionInfo[]): Plan { + const taskIdsToMark = new Set(orphanedSessions.map((o) => o.taskId)); + + return { + ...plan, + tasks: plan.tasks.map((t) => + taskIdsToMark.has(t.id) ? { ...t, status: 'interrupted' as const } : t + ), + }; +} diff --git a/src/services/simulator.test.tsx b/src/services/simulator.test.tsx new file mode 100644 index 0000000..1eed9bb --- /dev/null +++ b/src/services/simulator.test.tsx @@ -0,0 +1,34 @@ +import React from 'react'; +import { Box, Text, useInput } from 'ink'; +import { describe, expect, it } from 'vitest'; +import { simulateSession } from './simulator.js'; + +function EchoApp(): React.ReactElement { + const [value, setValue] = React.useState('ready'); + useInput((ch) => { + if (ch) { + setValue((prev) => `${prev}${ch}`); + } + }); + return ( + + {value} + + ); +} + +describe('simulateSession', () => { + it('replays key input and captures frames', async () => { + const result = await simulateSession(, { + width: 80, + steps: [ + { input: 'a' }, + { input: 'b' }, + ], + }); + + expect(result.rawFrames.length).toBeGreaterThan(0); + expect(result.frames.some((f) => f.includes('readya'))).toBe(true); + expect(result.frames.some((f) => f.includes('readyab'))).toBe(true); + }); +}); diff --git a/src/services/simulator.ts b/src/services/simulator.ts new file mode 100644 index 0000000..78db991 --- /dev/null +++ b/src/services/simulator.ts @@ -0,0 +1,131 @@ +import React from 'react'; +import { PassThrough, Writable } from 'node:stream'; +import { render } from 'ink'; + +export interface SimulationStep { + input: string; + waitMs?: number; +} + +export interface SimulationOptions { + steps: SimulationStep[]; + width?: number; + height?: number; + settleMs?: number; +} + +export interface SimulationResult { + rawFrames: string[]; + frames: string[]; +} + +class SimulatedStdin extends PassThrough { + public isTTY = true; + public isRaw = false; + + setRawMode(mode: boolean): this { + this.isRaw = mode; + return this; + } + + ref(): this { + return this; + } + + unref(): this { + return this; + } +} + +class SimulatedStdout extends Writable { + public readonly rawFrames: string[] = []; + public readonly columns: number; + public readonly rows: number; + public readonly isTTY = true; + + constructor(columns: number, rows: number) { + super(); + this.columns = columns; + this.rows = rows; + } + + _write(chunk: string | Buffer, _encoding: BufferEncoding, callback: (error?: Error | null) => void): void { + this.rawFrames.push(typeof chunk === 'string' ? chunk : chunk.toString('utf8')); + callback(); + } + + getColorDepth(): number { + return 8; + } + + hasColors(): boolean { + return true; + } + + cursorTo(): void {} + + moveCursor(): void {} + + clearLine(): void {} + + clearScreenDown(): void {} + + getWindowSize(): [number, number] { + return [this.columns, this.rows]; + } + + ref(): this { + return this; + } + + unref(): this { + return this; + } +} + +const ANSI_PATTERN = /\u001B\[[0-?]*[ -/]*[@-~]/g; + +function stripAnsi(value: string): string { + return value.replace(ANSI_PATTERN, ''); +} + +function wait(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +export async function simulateSession( + node: React.ReactElement, + { + steps, + width = 120, + height = 40, + settleMs = 20, + }: SimulationOptions, +): Promise { + const stdin = new SimulatedStdin(); + const stdout = new SimulatedStdout(width, height); + + const app = render(node, { + stdin: stdin as unknown as NodeJS.ReadStream, + stdout: stdout as unknown as NodeJS.WriteStream, + stderr: stdout as unknown as NodeJS.WriteStream, + debug: true, + patchConsole: false, + exitOnCtrlC: false, + }); + + await wait(settleMs); + for (const step of steps) { + stdin.write(step.input); + await wait(step.waitMs ?? settleMs); + } + + app.unmount(); + stdin.end(); + await wait(settleMs); + + return { + rawFrames: stdout.rawFrames, + frames: stdout.rawFrames.map(stripAnsi), + }; +}