diff --git a/.claude/agents/code-quality-reviewer.md b/.claude/agents/code-quality-reviewer.md new file mode 100644 index 0000000..c21994e --- /dev/null +++ b/.claude/agents/code-quality-reviewer.md @@ -0,0 +1,132 @@ +--- +name: code-quality-reviewer +description: "Use this agent when the user wants to improve code quality, review recently written code for issues, refactor existing code, or get suggestions for better patterns and practices. This includes requests to review a file, clean up code, improve readability, reduce complexity, fix code smells, or apply best practices.\\n\\nExamples:\\n\\n- Example 1:\\n user: \"I just finished implementing the new integration class, can you review it?\"\\n assistant: \"Let me use the code-quality-reviewer agent to analyze your new integration class for quality improvements.\"\\n (The assistant launches the code-quality-reviewer agent via the Task tool to review the recently written integration class.)\\n\\n- Example 2:\\n user: \"This function feels messy, can you help clean it up?\"\\n assistant: \"I'll use the code-quality-reviewer agent to analyze the function and suggest improvements.\"\\n (The assistant launches the code-quality-reviewer agent via the Task tool to review and suggest refactoring for the messy function.)\\n\\n- Example 3:\\n user: \"Can you check my recent changes for any issues?\"\\n assistant: \"I'll launch the code-quality-reviewer agent to review your recent changes for potential issues and improvements.\"\\n (The assistant launches the code-quality-reviewer agent via the Task tool to review the recent diff or modified files.)\\n\\n- Example 4 (proactive usage):\\n Context: The user has just written a substantial block of new code.\\n user: \"Okay, I think that feature is done.\"\\n assistant: \"Great! Now let me use the code-quality-reviewer agent to review the code you just wrote and make sure it's solid before we move on.\"\\n (The assistant proactively launches the code-quality-reviewer agent via the Task tool to review the newly written code.)" +model: opus +color: green +memory: project +--- + +You are an elite code quality engineer with deep expertise in software craftsmanship, clean code principles, design patterns, and language-specific best practices. You have decades of experience reviewing production codebases across multiple languages and frameworks, with a particular strength in TypeScript/JavaScript ecosystems. You approach code review with a constructive, educational mindset—your goal is not just to identify issues but to help developers understand *why* something should change and *how* to make it better. + +## Core Responsibilities + +You review recently written or modified code and provide actionable, prioritized feedback to improve its quality. You focus on code that was recently changed or written, not the entire codebase, unless explicitly asked otherwise. + +## Review Methodology + +When reviewing code, systematically evaluate these dimensions in order of importance: + +### 1. Correctness & Bugs +- Logic errors, off-by-one errors, race conditions +- Null/undefined handling and edge cases +- Error handling completeness (are errors caught, logged, and handled appropriately?) +- Type safety issues (especially in TypeScript: `any` abuse, missing type guards, unsafe casts) + +### 2. Security +- Input validation and sanitization +- Secrets or credentials in code +- Injection vulnerabilities (SQL, command, path traversal) +- Unsafe deserialization or eval usage + +### 3. Architecture & Design +- Single Responsibility Principle violations +- Inappropriate coupling between modules +- Missing abstractions or over-abstraction +- Consistency with existing codebase patterns +- Proper separation of concerns + +### 4. Readability & Maintainability +- Naming clarity (variables, functions, classes, files) +- Function length and complexity (cyclomatic complexity) +- Code duplication (DRY violations) +- Comment quality (missing where needed, excessive where code should be self-documenting) +- Consistent formatting and style + +### 5. Performance +- Unnecessary computations or allocations +- N+1 query patterns or inefficient data access +- Memory leaks (event listeners, subscriptions, closures) +- Algorithmic complexity concerns + +### 6. Testing & Testability +- Is the code structured to be testable? +- Are there missing test cases for the logic? +- Are edge cases covered? + +## Output Format + +Structure your review as follows: + +**Summary**: A 1-3 sentence overview of the code's overall quality and the most important finding. + +**Critical Issues** (must fix): +- Each issue with: location, description, why it matters, and a concrete fix + +**Improvements** (should fix): +- Each suggestion with: location, current state, proposed improvement, and rationale + +**Minor Suggestions** (nice to have): +- Style, naming, or minor readability tweaks + +**What's Done Well**: +- Highlight genuinely good patterns to reinforce positive practices + +## Review Principles + +1. **Be specific**: Always reference exact lines, functions, or patterns. Never give vague feedback like "improve error handling" without saying exactly where and how. +2. **Provide fixes, not just complaints**: Every issue should include a concrete code suggestion or clear description of the fix. +3. **Prioritize ruthlessly**: A review with 3 critical findings is more valuable than one with 30 nitpicks. Lead with what matters most. +4. **Respect existing patterns**: If the codebase has established conventions, suggest improvements that align with them rather than introducing entirely new patterns. +5. **Be constructive**: Frame feedback as improvements, not criticisms. Use "Consider..." or "This could be improved by..." rather than "This is wrong." +6. **Context matters**: Consider the purpose of the code. A quick prototype has different quality standards than a production API endpoint. + +## Project-Specific Guidelines + +When working in projects with specific coding standards (from CLAUDE.md or similar configuration): +- Always check for and respect project-specific linting rules, formatting standards, and architectural patterns +- Verify import styles match project conventions (e.g., ESM imports with `.js` extensions in TypeScript projects) +- Check that the correct package manager is used in any scripts or commands +- Ensure changes align with the project's stated priorities and patterns + +## Self-Verification + +Before delivering your review: +1. Re-read each finding—is it actionable and specific? +2. Verify your suggested fixes are syntactically correct +3. Check that you haven't contradicted yourself +4. Ensure your priority ordering is correct (critical issues first) +5. Confirm you've looked at the actual changed/new code, not unrelated files + +## Edge Cases + +- If the code is too short or trivial for meaningful review, acknowledge this and focus on any improvements that would still add value. +- If you need more context (e.g., related files, the purpose of the code, or the broader architecture), ask for it before proceeding with assumptions. +- If the code is generally excellent, say so clearly and focus your review on minor polish items. + +**Update your agent memory** as you discover code patterns, style conventions, common issues, architectural decisions, and recurring quality concerns in this codebase. This builds up institutional knowledge across conversations. Write concise notes about what you found and where. + +Examples of what to record: +- Recurring code smells or anti-patterns you've seen across reviews +- Project-specific conventions and style preferences +- Architectural patterns and module relationships +- Common error handling approaches used in the codebase +- Testing patterns and coverage gaps you've identified + +# Persistent Agent Memory + +You have a persistent Persistent Agent Memory directory at `/Users/ruben/learn/ralph-starter/.claude/agent-memory/code-quality-reviewer/`. Its contents persist across conversations. + +As you work, consult your memory files to build on previous experience. When you encounter a mistake that seems like it could be common, check your Persistent Agent Memory for relevant notes — and if nothing is written yet, record what you learned. + +Guidelines: +- `MEMORY.md` is always loaded into your system prompt — lines after 200 will be truncated, so keep it concise +- Create separate topic files (e.g., `debugging.md`, `patterns.md`) for detailed notes and link to them from MEMORY.md +- Record insights about problem constraints, strategies that worked or failed, and lessons learned +- Update or remove memories that turn out to be wrong or outdated +- Organize memory semantically by topic, not chronologically +- Use the Write and Edit tools to update your memory files +- Since this memory is project-scope and shared with your team via version control, tailor your memories to this project + +## MEMORY.md + +Your MEMORY.md is currently empty. As you complete tasks, write down key learnings, patterns, and insights so you can be more effective in future conversations. Anything saved in MEMORY.md will be included in your system prompt next time. diff --git a/README.md b/README.md index fa19b73..3852a9c 100644 --- a/README.md +++ b/README.md @@ -451,6 +451,7 @@ This creates: |---------|-------------| | `ralph-starter` | Launch interactive wizard | | `ralph-starter run [task]` | Run an autonomous coding loop | +| `ralph-starter fix [task]` | Fix build errors, lint issues, or design problems | | `ralph-starter auto` | Batch-process issues from GitHub/Linear | | `ralph-starter integrations ` | Manage integrations (list, help, test, fetch) | | `ralph-starter plan` | Create implementation plan from specs | @@ -561,6 +562,31 @@ ralph-starter run --circuit-breaker-failures 2 "build Y" | `--output-dir ` | Directory to run task in (skips prompt) | | `--prd ` | Read tasks from markdown | +## Options for `fix` + +| Flag | Description | +|------|-------------| +| `--scan` | Force full project scan (build + lint + typecheck + tests) | +| `--agent ` | Specify agent to use (default: auto-detect) | +| `--commit` | Auto-commit the fix | +| `--max-iterations ` | Max fix iterations (default: 3) | +| `--output-dir ` | Project directory (default: cwd) | + +```bash +# Fix build/lint errors automatically +ralph-starter fix + +# Fix a specific design/visual issue +ralph-starter fix "fix the paddings and make the colors brighter" + +# Full scan with auto-commit +ralph-starter fix --scan --commit +``` + +For design-related tasks (CSS, colors, spacing, etc.), the fix command automatically: +- Detects and applies installed design skills +- Instructs the agent to visually verify changes via browser screenshots + ## Config Commands ```bash diff --git a/docs/docs/cli/fix.md b/docs/docs/cli/fix.md new file mode 100644 index 0000000..21cec2a --- /dev/null +++ b/docs/docs/cli/fix.md @@ -0,0 +1,167 @@ +--- +sidebar_position: 2 +title: fix +description: Fix build errors, lint issues, or design problems +keywords: [cli, fix, command, build errors, lint, design, visual, css] +--- + +# ralph-starter fix + +Fix build errors, lint issues, or design problems. + +## Synopsis + +```bash +ralph-starter fix [task] [options] +``` + +## Description + +The `fix` command runs a focused AI loop to fix project issues. It scans for build, lint, typecheck, and test failures, then orchestrates a coding agent to fix them automatically. + +When given a custom task describing a visual or design problem (e.g., "fix the paddings and make the colors brighter"), the fix command detects CSS/design keywords and: + +- Auto-applies installed design skills (frontend-design, ui-ux-designer, etc.) +- Instructs the agent to visually verify changes using browser screenshots + +For structured visual fix passes, use the `--design` flag — see [Design Mode](#design-mode) below. + +## Arguments + +| Argument | Description | +|----------|-------------| +| `task` | Optional description of what to fix. If not provided, scans for build/lint errors. | + +## Options + +| Option | Description | Default | +|--------|-------------|---------| +| `--scan` | Force full project scan (build + lint + typecheck + tests) | false | +| `--design` | Structured visual fix mode with screenshot verification | false | +| `--agent ` | Specify agent (claude-code, cursor, codex, opencode) | auto-detect | +| `--commit` | Auto-commit the fix | false | +| `--max-iterations ` | Maximum fix iterations | 3 (scan), 4 (design keywords), 7 (--design) | +| `--output-dir ` | Project directory | cwd | + +## Examples + +### Fix Build Errors + +```bash +# Auto-detect and fix build/lint errors +ralph-starter fix + +# Force full project scan +ralph-starter fix --scan +``` + +### Fix Design Issues + +```bash +# Structured visual fix pass (recommended for design work) +ralph-starter fix --design + +# Design mode with specific notes +ralph-starter fix --design "the hero section spacing is off and colors are too muted" + +# Ad-hoc CSS/design fix (auto-detected as design task) +ralph-starter fix "fix the paddings and make the colors brighter" + +# Fix responsive layout +ralph-starter fix "make the layout responsive on mobile" +``` + +### With Options + +```bash +# Auto-commit the fix +ralph-starter fix --scan --commit + +# Use a specific agent +ralph-starter fix "fix lint errors" --agent claude-code + +# Allow more iterations for complex fixes +ralph-starter fix "fix all test failures" --max-iterations 5 + +# Design fix with more room to iterate +ralph-starter fix --design --max-iterations 10 +``` + +## Behavior + +1. **Error Detection**: + - If `task` provided → runs build check for baseline, then fixes the described issue + - If no task and previous failures exist → re-runs failed validations from `.ralph/activity.md` + - If `--scan` → runs full validation suite (build + lint + typecheck + tests) + +2. **Skill Detection**: + - Detects installed Claude Code skills relevant to the task + - For CSS/design tasks → auto-applies design skills and adds visual verification instructions + - Searches skills.sh for complementary skills if needed + +3. **Fix Loop**: + - Agent works on fixing issues (default: 3 iterations for scan, 7 for `--design`) + - Lint checks run between iterations (fast feedback) + - Full build check runs on final iteration + - If build fails on final iteration → extends loop by 2 extra iterations + +4. **Verification**: + - Re-runs original validation commands after the loop + - Reports success only if all checks pass (not just agent completion) + +## Design Mode + +The `--design` flag enables a structured visual fix workflow specifically designed for CSS, layout, and styling issues. It runs the agent through a 5-phase process: + +### Phase 1: Visual Audit + +The agent's **first action** is to start the dev server and take screenshots at 3 viewports: +- Desktop (1440px) +- Tablet (768px) +- Mobile (375px) + +### Phase 2: Issue Identification + +The agent analyzes screenshots against the project spec and checks for issues in priority order: + +0. **CSS cascade conflicts** — Detects unlayered CSS resets (e.g., `* { margin: 0; padding: 0; }`) that silently override Tailwind v4 utilities. This is the most common cause of "classes are correct but nothing works." +1. **Page structure** — Content centering, max-width wrappers, empty gaps +2. **Layout & positioning** — Grid/flex rendering, column balance, overlaps +3. **Responsive issues** — Viewport breakage, overflow, clipping +4. **Spacing** — Vertical rhythm, abnormal gaps +5. **Typography & colors** — Font loading, readability, consistency + +### Phase 3: Fix Plan + +The agent creates a `DESIGN_FIX_PLAN.md` with specific issues, exact files, and CSS properties to change. + +### Phase 4: Execute & Verify + +Fixes are applied in priority order (structural first, cosmetic last). The agent re-screenshots after each structural fix to verify improvement. + +### Phase 5: Completion + +The loop requires the agent to output `DESIGN_VERIFIED` after taking final verification screenshots. The loop will **not** accept generic completion signals like "All tasks completed" — only `DESIGN_VERIFIED` after visual confirmation. + +### Why Design Mode Exists + +Without `--design`, agents often: +- Read code and see "correct" Tailwind classes, then declare victory without visual verification +- Add more CSS classes on top of cascade conflicts instead of fixing the root cause +- Complete in 1 iteration without actually verifying the visual result + +Design mode forces visual-first debugging and prevents premature exit. + +## Exit Codes + +| Code | Description | +|------|-------------| +| 0 | All issues fixed | +| 1 | Could not fix all issues automatically | + +## See Also + +- [ralph-starter run](/docs/cli/run) +- [ralph-starter skill](/docs/cli/skill) +- [Validation](/docs/advanced/validation) +- [Skills System](/docs/guides/skills-system) diff --git a/docs/docs/cli/skill.md b/docs/docs/cli/skill.md index 25f5c71..e3edbf8 100644 --- a/docs/docs/cli/skill.md +++ b/docs/docs/cli/skill.md @@ -125,17 +125,11 @@ and included in the agent's prompt context when relevant. ## Auto Skill Discovery -Auto skill discovery is opt-in. When enabled, ralph-starter -queries the skills.sh registry to find and install relevant -skills automatically. +Auto skill discovery is enabled by default. ralph-starter +queries the skills.sh API to find and install relevant +skills automatically before each run. -Enable it by setting: - -```bash -RALPH_ENABLE_SKILL_AUTO_INSTALL=1 -``` - -You can also force-disable it with: +To disable it, set: ```bash RALPH_DISABLE_SKILL_AUTO_INSTALL=1 diff --git a/docs/docs/community/changelog.md b/docs/docs/community/changelog.md index ec02e82..7e48b50 100644 --- a/docs/docs/community/changelog.md +++ b/docs/docs/community/changelog.md @@ -11,6 +11,24 @@ All notable changes to ralph-starter are documented here. This project follows [ --- +## [0.1.1-beta.17] - 2026-02-14 + +### Added +- **`fix --design` mode**: Structured 5-phase visual fix workflow with screenshot verification, CSS cascade conflict detection, and `DESIGN_VERIFIED` completion token +- **Smart UI defaults**: Web projects now default to Tailwind CSS + shadcn/ui + motion-primitives when no styling is specified (framework-aware: shadcn-vue for Vue, shadcn-svelte for Svelte) +- **`uiLibrary` field** in TechStack for explicit UI component library selection +- **Rich spec generation**: Specs and AGENTS.md now include Tailwind v4 setup notes, CSS cascade layer warnings, and shadcn component setup instructions + +### Fixed +- Design loop premature exit — `fix --design` now requires explicit `DESIGN_VERIFIED` token after visual confirmation (prevents 1-iteration false completions) +- Design loop stall detection — screenshot/viewport analysis no longer falsely triggers idle detection +- Default design iterations increased from 5 to 7 for more thorough visual fixes + +### Changed +- Completion instruction in agent preamble is now conditional — design mode uses task-specific completion flow instead of generic "All tasks completed" + +--- + ## [0.1.1-beta.16] - 2026-02-07 ### Added diff --git a/docs/docs/wizard/overview.md b/docs/docs/wizard/overview.md index 7406dab..ba1070e 100644 --- a/docs/docs/wizard/overview.md +++ b/docs/docs/wizard/overview.md @@ -46,10 +46,12 @@ Once you describe your idea (e.g., "a habit tracker app"), the AI: You can then customize: - **Project Type** - Web, API, CLI, Mobile, Library, Automation -- **Tech Stack** - Frontend, backend, database choices +- **Tech Stack** - Frontend, backend, database, styling, UI library choices - **Features** - Select which features to include - **Complexity** - Prototype, MVP, or Full-featured +**Smart UI defaults**: For web projects, if you don't specify a styling or UI library, ralph-starter defaults to **Tailwind CSS + shadcn/ui + motion-primitives**. The shadcn variant is framework-aware: shadcn/ui for React/Next.js, shadcn-vue for Vue, and shadcn-svelte for Svelte. These defaults produce richer specs with Tailwind v4 setup notes that prevent common CSS cascade conflicts. + ### Step 4: Execution Options Choose how to proceed: @@ -95,9 +97,11 @@ $ ralph-starter Type: Web Application Tech Stack: - Frontend: react - Backend: nodejs - Database: sqlite + Frontend: React + Backend: Node.js + Database: SQLite + Styling: Tailwind CSS + UI Library: shadcn/ui Key Features: • Transaction tracking diff --git a/docs/sidebars.ts b/docs/sidebars.ts index 93d1de1..2f1b4f3 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -40,6 +40,7 @@ const sidebars: SidebarsConfig = { collapsed: true, items: [ 'cli/run', + 'cli/fix', 'cli/init', 'cli/plan', 'cli/config', diff --git a/docs/static/ai-index.json b/docs/static/ai-index.json index af31353..78648b4 100644 --- a/docs/static/ai-index.json +++ b/docs/static/ai-index.json @@ -5,7 +5,7 @@ "description": "Connect your tools like GitHub, Linear, and Notion. Fetch specs from anywhere and let AI coding agents build production-ready code automatically with autonomous loops.", "url": "https://ralphstarter.ai", "version": "1.0.0", - "generatedAt": "2026-02-09T23:59:04.120Z", + "generatedAt": "2026-02-13T21:19:43.300Z", "summary": { "purpose": "ralph-starter is an AI-powered CLI tool that fetches specs from tools like Figma, GitHub, Linear, and Notion, then runs autonomous AI coding loops to build production-ready code.", "primaryUseCase": "Developers use ralph-starter to automate coding tasks by providing natural language specs or importing requirements from external tools.", @@ -144,7 +144,7 @@ }, { "name": "Cli", - "documentCount": 13, + "documentCount": 14, "documents": [ { "title": "auth", @@ -206,6 +206,20 @@ "credentials" ] }, + { + "title": "fix", + "description": "Fix build errors, lint issues, or design problems", + "url": "https://ralphstarter.ai/docs/cli/fix", + "markdownUrl": "https://ralphstarter.ai/docs/cli/fix.md", + "keywords": [ + "cli", + "fix", + "command", + "build errors", + "lint", + "design" + ] + }, { "title": "init", "description": "Initialize Ralph Playbook in a project", @@ -639,8 +653,8 @@ "sitemap": "https://ralphstarter.ai/sitemap.xml" }, "stats": { - "totalDocuments": 39, + "totalDocuments": 40, "totalCategories": 8, - "lastUpdated": "2026-02-09T23:59:04.120Z" + "lastUpdated": "2026-02-13T21:19:43.300Z" } } \ No newline at end of file diff --git a/docs/static/docs-urls.txt b/docs/static/docs-urls.txt index f5d6207..58be194 100644 --- a/docs/static/docs-urls.txt +++ b/docs/static/docs-urls.txt @@ -7,6 +7,7 @@ https://ralphstarter.ai/docs/cli/auth.md https://ralphstarter.ai/docs/cli/auto.md https://ralphstarter.ai/docs/cli/check.md https://ralphstarter.ai/docs/cli/config.md +https://ralphstarter.ai/docs/cli/fix.md https://ralphstarter.ai/docs/cli/init.md https://ralphstarter.ai/docs/cli/integrations.md https://ralphstarter.ai/docs/cli/plan.md diff --git a/docs/static/docs.json b/docs/static/docs.json index 0b86d4b..c881da0 100644 --- a/docs/static/docs.json +++ b/docs/static/docs.json @@ -2,8 +2,8 @@ "name": "ralph-starter - AI-Powered Autonomous Coding from Specs to Production", "description": "Connect your tools like GitHub, Linear, and Notion. Fetch specs from anywhere and let AI coding agents build production-ready code automatically with autonomous loops.", "baseUrl": "https://ralphstarter.ai", - "generatedAt": "2026-02-09T23:59:04.119Z", - "totalDocs": 39, + "generatedAt": "2026-02-13T21:19:43.299Z", + "totalDocs": 40, "access": { "llmsTxt": "https://ralphstarter.ai/llms.txt", "llmsFullTxt": "https://ralphstarter.ai/llms-full.txt", @@ -155,6 +155,22 @@ "credentials" ] }, + { + "title": "fix", + "description": "Fix build errors, lint issues, or design problems", + "path": "/docs/cli/fix", + "markdownUrl": "https://ralphstarter.ai/docs/cli/fix.md", + "htmlUrl": "https://ralphstarter.ai/docs/cli/fix", + "category": "Cli", + "keywords": [ + "cli", + "fix", + "command", + "build errors", + "lint", + "design" + ] + }, { "title": "init", "description": "Initialize Ralph Playbook in a project", diff --git a/docs/static/llms-full.txt b/docs/static/llms-full.txt index 53e7f6b..c5181f4 100644 --- a/docs/static/llms-full.txt +++ b/docs/static/llms-full.txt @@ -1671,6 +1671,119 @@ These environment variables override config file values: --- +## fix + +# ralph-starter fix + +Fix build errors, lint issues, or design problems. + +## Synopsis + +```bash +ralph-starter fix [task] [options] +``` + +## Description + +The `fix` command runs a focused AI loop to fix project issues. It scans for build, lint, typecheck, and test failures, then orchestrates a coding agent to fix them automatically. + +When given a custom task describing a visual or design problem (e.g., "fix the paddings and make the colors brighter"), the fix command detects CSS/design keywords and: + +- Auto-applies installed design skills (frontend-design, ui-ux-designer, etc.) +- Instructs the agent to visually verify changes using the `/web-design-reviewer` skill with browser screenshots + +## Arguments + +| Argument | Description | +|----------|-------------| +| `task` | Optional description of what to fix. If not provided, scans for build/lint errors. | + +## Options + +| Option | Description | Default | +|--------|-------------|---------| +| `--scan` | Force full project scan (build + lint + typecheck + tests) | false | +| `--agent ` | Specify agent (claude-code, cursor, codex, opencode) | auto-detect | +| `--commit` | Auto-commit the fix | false | +| `--max-iterations ` | Maximum fix iterations | 3 | +| `--output-dir ` | Project directory | cwd | + +## Examples + +### Fix Build Errors + +```bash +# Auto-detect and fix build/lint errors +ralph-starter fix + +# Force full project scan +ralph-starter fix --scan +``` + +### Fix Design Issues + +```bash +# Fix visual/CSS problems +ralph-starter fix "fix the paddings and make the colors brighter" + +# Fix responsive layout +ralph-starter fix "make the layout responsive on mobile" + +# Fix color theme +ralph-starter fix "change the color scheme to darker tones" +``` + +### With Options + +```bash +# Auto-commit the fix +ralph-starter fix --scan --commit + +# Use a specific agent +ralph-starter fix "fix lint errors" --agent claude-code + +# Allow more iterations for complex fixes +ralph-starter fix "fix all test failures" --max-iterations 5 +``` + +## Behavior + +1. **Error Detection**: + - If `task` provided → runs build check for baseline, then fixes the described issue + - If no task and previous failures exist → re-runs failed validations from `.ralph/activity.md` + - If `--scan` → runs full validation suite (build + lint + typecheck + tests) + +2. **Skill Detection**: + - Detects installed Claude Code skills relevant to the task + - For CSS/design tasks → auto-applies design skills and adds visual verification instructions + - Searches skills.sh for complementary skills if needed + +3. **Fix Loop**: + - Agent works on fixing issues (default: 3 iterations) + - Lint checks run between iterations (fast feedback) + - Full build check runs on final iteration + - If build fails on final iteration → extends loop by 2 extra iterations + +4. **Verification**: + - Re-runs original validation commands after the loop + - Reports success only if all checks pass (not just agent completion) + +## Exit Codes + +| Code | Description | +|------|-------------| +| 0 | All issues fixed | +| 1 | Could not fix all issues automatically | + +## See Also + +- [ralph-starter run](/docs/cli/run) +- [ralph-starter skill](/docs/cli/skill) +- [Validation](/docs/advanced/validation) +- [Skills System](/docs/guides/skills-system) + +--- + ## init # ralph-starter init @@ -2941,6 +3054,18 @@ installed skills from three locations: Detected skills are matched against the project's tech stack and included in the agent's prompt context when relevant. +## Auto Skill Discovery + +Auto skill discovery is enabled by default. ralph-starter +queries the skills.sh API to find and install relevant +skills automatically before each run. + +To disable it, set: + +```bash +RALPH_DISABLE_SKILL_AUTO_INSTALL=1 +``` + ## Behavior - The `add` action uses `npx add-skill` under the hood. @@ -7894,7 +8019,7 @@ $ ralph-starter ❯ Yes, I know what I want to build No, help me brainstorm ideas -? What's your idea for today? +? Which idea do you want to build? (e.g., "a habit tracker app" or "an API for managing recipes") > a personal finance tracker @@ -7918,7 +8043,7 @@ $ ralph-starter Complexity: Working MVP -? Does this look right? +? Is this the right specs? ❯ Yes, let's build it! I want to change something Start over with a different idea diff --git a/docs/static/llms.txt b/docs/static/llms.txt index 1ce9198..7c1116a 100644 --- a/docs/static/llms.txt +++ b/docs/static/llms.txt @@ -15,6 +15,7 @@ This file contains links to documentation sections following the llmstxt.org sta - [auto](https://ralphstarter.ai/docs/cli/auto): Autonomous batch task processing from GitHub and Linear - [check](https://ralphstarter.ai/docs/cli/check): Validate configuration and test LLM connection - [config](https://ralphstarter.ai/docs/cli/config): Manage source configuration and credentials +- [fix](https://ralphstarter.ai/docs/cli/fix): Fix build errors, lint issues, or design problems - [init](https://ralphstarter.ai/docs/cli/init): Initialize Ralph Playbook in a project - [integrations](https://ralphstarter.ai/docs/cli/integrations): Manage, test, and fetch data from integrations - [plan](https://ralphstarter.ai/docs/cli/plan): Create implementation plan from specs diff --git a/docs/static/sidebar.json b/docs/static/sidebar.json index 83b7ee3..0b834b0 100644 --- a/docs/static/sidebar.json +++ b/docs/static/sidebar.json @@ -1,7 +1,7 @@ { "name": "ralph-starter Documentation", "baseUrl": "https://ralphstarter.ai", - "generatedAt": "2026-02-09T23:59:04.120Z", + "generatedAt": "2026-02-13T21:19:43.300Z", "navigation": [ { "type": "category", @@ -159,6 +159,14 @@ "markdownUrl": "https://ralphstarter.ai/docs/cli/config.md", "description": "Manage source configuration and credentials" }, + { + "type": "doc", + "label": "fix", + "path": "/docs/cli/fix", + "url": "https://ralphstarter.ai/docs/cli/fix", + "markdownUrl": "https://ralphstarter.ai/docs/cli/fix.md", + "description": "Fix build errors, lint issues, or design problems" + }, { "type": "doc", "label": "init", @@ -419,6 +427,12 @@ "url": "https://ralphstarter.ai/docs/cli/config", "category": "Cli" }, + { + "title": "fix", + "path": "/docs/cli/fix", + "url": "https://ralphstarter.ai/docs/cli/fix", + "category": "Cli" + }, { "title": "init", "path": "/docs/cli/init", diff --git a/docs/static/sitemap.xml b/docs/static/sitemap.xml index 8bc72eb..c727c4f 100644 --- a/docs/static/sitemap.xml +++ b/docs/static/sitemap.xml @@ -1 +1 @@ -https://ralphstarter.ai/integrationsdaily0.7https://ralphstarter.ai/markdown-pagedaily0.7https://ralphstarter.ai/templatesdaily0.7https://ralphstarter.ai/use-casesdaily0.7https://ralphstarter.ai/docs/advanced/circuit-breakerdaily0.7https://ralphstarter.ai/docs/advanced/git-automationdaily0.7https://ralphstarter.ai/docs/advanced/ralph-playbookdaily0.7https://ralphstarter.ai/docs/advanced/rate-limitingdaily0.7https://ralphstarter.ai/docs/advanced/validationdaily0.7https://ralphstarter.ai/docs/cli/authdaily0.7https://ralphstarter.ai/docs/cli/autodaily0.7https://ralphstarter.ai/docs/cli/checkdaily0.7https://ralphstarter.ai/docs/cli/configdaily0.7https://ralphstarter.ai/docs/cli/initdaily0.7https://ralphstarter.ai/docs/cli/integrationsdaily0.7https://ralphstarter.ai/docs/cli/plandaily0.7https://ralphstarter.ai/docs/cli/presetsdaily0.7https://ralphstarter.ai/docs/cli/rundaily0.7https://ralphstarter.ai/docs/cli/setupdaily0.7https://ralphstarter.ai/docs/cli/skilldaily0.7https://ralphstarter.ai/docs/cli/sourcedaily0.7https://ralphstarter.ai/docs/cli/templatedaily0.7https://ralphstarter.ai/docs/community/changelogdaily0.7https://ralphstarter.ai/docs/community/contributingdaily0.7https://ralphstarter.ai/docs/community/ideasdaily0.7https://ralphstarter.ai/docs/faqdaily0.7https://ralphstarter.ai/docs/guides/cost-trackingdaily0.7https://ralphstarter.ai/docs/guides/extending-ralph-starterdaily0.7https://ralphstarter.ai/docs/guides/prd-workflowdaily0.7https://ralphstarter.ai/docs/guides/skills-systemdaily0.7https://ralphstarter.ai/docs/guides/testing-integrationsdaily0.7https://ralphstarter.ai/docs/guides/workflow-presetsdaily0.7https://ralphstarter.ai/docs/installationdaily0.7https://ralphstarter.ai/docs/introdaily0.7https://ralphstarter.ai/docs/mcp/claude-desktopdaily0.7https://ralphstarter.ai/docs/mcp/setupdaily0.7https://ralphstarter.ai/docs/sources/figmadaily0.7https://ralphstarter.ai/docs/sources/githubdaily0.7https://ralphstarter.ai/docs/sources/lineardaily0.7https://ralphstarter.ai/docs/sources/notiondaily0.7https://ralphstarter.ai/docs/sources/overviewdaily0.7https://ralphstarter.ai/docs/wizard/idea-modedaily0.7https://ralphstarter.ai/docs/wizard/overviewdaily0.7https://ralphstarter.ai/daily0.7 \ No newline at end of file +https://ralphstarter.ai/integrationsdaily0.7https://ralphstarter.ai/markdown-pagedaily0.7https://ralphstarter.ai/templatesdaily0.7https://ralphstarter.ai/use-casesdaily0.7https://ralphstarter.ai/docs/advanced/circuit-breakerdaily0.7https://ralphstarter.ai/docs/advanced/git-automationdaily0.7https://ralphstarter.ai/docs/advanced/ralph-playbookdaily0.7https://ralphstarter.ai/docs/advanced/rate-limitingdaily0.7https://ralphstarter.ai/docs/advanced/validationdaily0.7https://ralphstarter.ai/docs/cli/authdaily0.7https://ralphstarter.ai/docs/cli/autodaily0.7https://ralphstarter.ai/docs/cli/checkdaily0.7https://ralphstarter.ai/docs/cli/configdaily0.7https://ralphstarter.ai/docs/cli/fixdaily0.7https://ralphstarter.ai/docs/cli/initdaily0.7https://ralphstarter.ai/docs/cli/integrationsdaily0.7https://ralphstarter.ai/docs/cli/plandaily0.7https://ralphstarter.ai/docs/cli/presetsdaily0.7https://ralphstarter.ai/docs/cli/rundaily0.7https://ralphstarter.ai/docs/cli/setupdaily0.7https://ralphstarter.ai/docs/cli/skilldaily0.7https://ralphstarter.ai/docs/cli/sourcedaily0.7https://ralphstarter.ai/docs/cli/templatedaily0.7https://ralphstarter.ai/docs/community/changelogdaily0.7https://ralphstarter.ai/docs/community/contributingdaily0.7https://ralphstarter.ai/docs/community/ideasdaily0.7https://ralphstarter.ai/docs/faqdaily0.7https://ralphstarter.ai/docs/guides/cost-trackingdaily0.7https://ralphstarter.ai/docs/guides/extending-ralph-starterdaily0.7https://ralphstarter.ai/docs/guides/prd-workflowdaily0.7https://ralphstarter.ai/docs/guides/skills-systemdaily0.7https://ralphstarter.ai/docs/guides/testing-integrationsdaily0.7https://ralphstarter.ai/docs/guides/workflow-presetsdaily0.7https://ralphstarter.ai/docs/installationdaily0.7https://ralphstarter.ai/docs/introdaily0.7https://ralphstarter.ai/docs/mcp/claude-desktopdaily0.7https://ralphstarter.ai/docs/mcp/setupdaily0.7https://ralphstarter.ai/docs/sources/figmadaily0.7https://ralphstarter.ai/docs/sources/githubdaily0.7https://ralphstarter.ai/docs/sources/lineardaily0.7https://ralphstarter.ai/docs/sources/notiondaily0.7https://ralphstarter.ai/docs/sources/overviewdaily0.7https://ralphstarter.ai/docs/wizard/idea-modedaily0.7https://ralphstarter.ai/docs/wizard/overviewdaily0.7https://ralphstarter.ai/daily0.7 \ No newline at end of file diff --git a/src/automation/git.ts b/src/automation/git.ts index a2b8b37..9ee555b 100644 --- a/src/automation/git.ts +++ b/src/automation/git.ts @@ -12,6 +12,43 @@ export async function hasUncommittedChanges(cwd: string): Promise { } } +/** + * Get the current HEAD commit hash. + * Returns empty string if not a git repo or no commits yet. + */ +export async function getHeadCommitHash(cwd: string): Promise { + const git: SimpleGit = simpleGit({ baseDir: cwd }); + try { + const hash = await git.revparse(['HEAD']); + return hash.trim(); + } catch { + return ''; + } +} + +/** + * Detect whether an iteration produced any changes — either uncommitted + * working tree changes OR new commits since `startHash`. + * + * This is critical because agents like Claude Code auto-commit during their + * run. If we only check `git status`, committed work looks like "no changes" + * and trips the stall detector / skips build validation. + */ +export async function hasIterationChanges(cwd: string, startHash: string): Promise { + // First check: uncommitted changes (staged or unstaged) + if (await hasUncommittedChanges(cwd)) { + return true; + } + + // Second check: new commits since iteration start + if (!startHash) return false; + + const currentHash = await getHeadCommitHash(cwd); + if (!currentHash) return false; + + return currentHash !== startHash; +} + export async function gitCommit(cwd: string, message: string): Promise { const git: SimpleGit = simpleGit({ baseDir: cwd }); diff --git a/src/cli.ts b/src/cli.ts index f5bac3a..4e99bdc 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -6,6 +6,7 @@ import { authCommand } from './commands/auth.js'; import { autoCommand } from './commands/auto.js'; import { checkCommand } from './commands/check.js'; import { configCommand } from './commands/config.js'; +import { fixCommand } from './commands/fix.js'; import { initCommand } from './commands/init.js'; import { integrationsCommand } from './commands/integrations.js'; import { pauseCommand } from './commands/pause.js'; @@ -82,6 +83,10 @@ program .option('--no-track-cost', 'Disable cost tracking') .option('--circuit-breaker-failures ', 'Max consecutive failures before stopping (default: 3)') .option('--circuit-breaker-errors ', 'Max same error occurrences before stopping (default: 5)') + .option( + '--validation-warmup ', + 'Skip validation until N tasks are completed (auto-detected for greenfield builds)' + ) .option( '--context-budget ', 'Max input tokens per iteration for smart context trimming (0 = unlimited)' @@ -100,6 +105,18 @@ program .option('--figma-mapping ', 'Custom content mapping file (content mode)') .action(runCommand); +// ralph-starter fix - Fix build errors and code quality issues +program + .command('fix [task]') + .description('Fix build errors and code quality issues (optional: describe what to fix)') + .option('--scan', 'Force full project scan (build + lint + typecheck + tests)') + .option('--agent ', 'Agent to use (default: auto-detect)') + .option('--commit', 'Auto-commit the fix') + .option('--max-iterations ', 'Max fix iterations (default: 3)') + .option('--output-dir ', 'Project directory (default: cwd)') + .option('--design', 'Visual-first design fix: screenshot, analyze, plan, and fix design issues') + .action(fixCommand); + // ralph-starter init - Initialize Ralph in a project program .command('init') diff --git a/src/commands/fix.ts b/src/commands/fix.ts new file mode 100644 index 0000000..65bb0b0 --- /dev/null +++ b/src/commands/fix.ts @@ -0,0 +1,334 @@ +import { existsSync, readdirSync, readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import chalk from 'chalk'; +import ora from 'ora'; +import { type Agent, detectAvailableAgents, detectBestAgent } from '../loop/agents.js'; +import { runLoop } from '../loop/executor.js'; +import { + detectBuildCommands, + detectValidationCommands, + formatValidationFeedback, + runAllValidations, + type ValidationCommand, +} from '../loop/validation.js'; +import { autoInstallSkillsFromTask } from '../skills/auto-install.js'; + +interface FixOptions { + agent?: string; + commit?: boolean; + maxIterations?: string; + outputDir?: string; + scan?: boolean; + design?: boolean; +} + +/** + * Parse the last validation failure from .ralph/activity.md. + * Returns the names of commands that failed (e.g., ["npm run build"]). + */ +function parseLastFailedValidations(cwd: string): string[] { + const activityPath = join(cwd, '.ralph', 'activity.md'); + if (!existsSync(activityPath)) return []; + + const content = readFileSync(activityPath, 'utf-8'); + // Split into iteration blocks and find the last one with a validation failure + const blocks = content.split(/^### Iteration/m); + const lastFailed = blocks.reverse().find((b) => b.includes('Validation Failed')); + if (!lastFailed) return []; + + const failedNames: string[] = []; + for (const match of lastFailed.matchAll(/- ❌\s+(.+)/g)) { + failedNames.push(match[1].trim()); + } + return failedNames; +} + +export async function fixCommand(customTask: string | undefined, options: FixOptions) { + const cwd = options.outputDir || process.cwd(); + + // --- Step 1: Determine which commands to run --- + let commands: ValidationCommand[] | undefined; + let mode: 'activity' | 'scan' | 'custom' = 'scan'; + let feedback = ''; + + if (customTask) { + // Custom task provided — still run build to check for errors, but don't bail if clean + mode = 'custom'; + commands = detectBuildCommands(cwd); + } else if (!options.scan) { + const failedNames = parseLastFailedValidations(cwd); + if (failedNames.length > 0) { + mode = 'activity'; + const allCommands = detectValidationCommands(cwd); + commands = allCommands.filter((c) => failedNames.some((name) => name.includes(c.name))); + if (commands.length === 0) commands = detectBuildCommands(cwd); + } + } + + if (!commands || commands.length === 0) { + if (mode !== 'custom') mode = 'scan'; + commands = detectValidationCommands(cwd); + if (commands.length === 0) commands = detectBuildCommands(cwd); + } + + // Run validations if we have commands + if (commands.length > 0) { + const spinner = ora( + mode === 'custom' + ? 'Checking project health...' + : `Scanning project (${mode === 'activity' ? 'from last run' : 'full scan'})...` + ).start(); + + const results = await runAllValidations(cwd, commands); + const failures = results.filter((r) => !r.success); + + if (failures.length === 0 && !customTask && !options.design) { + spinner.succeed(chalk.green('All checks passed — nothing to fix!')); + return; + } + + if (failures.length > 0) { + spinner.fail(chalk.red(`Found ${failures.length} issue(s):`)); + for (const f of failures) { + const errorText = f.error || f.output || ''; + const errorCount = (errorText.match(/error/gi) || []).length; + console.log(chalk.red(` ✗ ${f.command}${errorCount ? ` (${errorCount} errors)` : ''}`)); + } + feedback = formatValidationFeedback(results); + } else { + spinner.succeed(chalk.green('Build passing')); + } + console.log(); + } else if (!customTask && !options.design) { + console.log(chalk.yellow('No build/lint/test commands detected in this project.')); + return; + } + + // --- Step 3: Detect agent --- + let agent: Agent | null = null; + + if (options.agent) { + const agents = await detectAvailableAgents(); + const found = agents.find( + (a) => a.type === options.agent || a.name.toLowerCase() === options.agent?.toLowerCase() + ); + if (!found) { + console.log(chalk.red(`Agent not found: ${options.agent}`)); + return; + } + if (!found.available) { + console.log(chalk.red(`Agent not available: ${found.name}`)); + return; + } + agent = found; + } else { + agent = await detectBestAgent(); + } + + if (!agent) { + console.log( + chalk.red( + 'No coding agent detected. Install Claude Code, Cursor, or another supported agent.' + ) + ); + return; + } + + console.log(chalk.cyan(`Using ${agent.name} to fix issues...\n`)); + + // --- Step 4: Build task and run fix loop --- + let fixTask: string; + if (customTask) { + fixTask = feedback + ? `${customTask}\n\nAlso fix any build/validation errors found during the scan.` + : customTask; + } else if (mode === 'activity') { + fixTask = + 'Fix the build/validation errors in this project. Study the error output below, identify the root cause, and implement the minimal fix. Do not refactor or make unnecessary changes.'; + } else { + fixTask = + 'Fix all project issues found by the scan below. Prioritize: build errors first, then type errors, then lint violations, then test failures. Make minimal, focused fixes.'; + } + + // Include original spec context so the agent knows what "correct" looks like + const specsDir = join(cwd, 'specs'); + const planPath = join(cwd, 'IMPLEMENTATION_PLAN.md'); + let specContext = ''; + + if (existsSync(specsDir)) { + try { + const specFiles = readdirSync(specsDir).filter((f) => f.endsWith('.md')); + for (const file of specFiles) { + const content = readFileSync(join(specsDir, file), 'utf-8'); + const truncated = + content.length > 3000 + ? `${content.slice(0, 3000)}\n\n[... spec truncated for brevity ...]` + : content; + specContext += `\n### Spec: ${file}\n${truncated}\n`; + } + } catch { + // Specs directory unreadable + } + } + + if (existsSync(planPath)) { + try { + const planContent = readFileSync(planPath, 'utf-8'); + const planSummary = + planContent.length > 2000 + ? `${planContent.slice(0, 2000)}\n\n[... plan truncated ...]` + : planContent; + specContext += `\n### Implementation Plan\n${planSummary}\n`; + } catch { + // Plan file unreadable + } + } + + if (specContext) { + fixTask = `${fixTask}\n\n## Original Design Specification\n\nIMPORTANT: Use the following specification as the source of truth for what the design should look like. Match the described colors, spacing, layout, and styling exactly.\n${specContext}`; + } + + // For design/visual tasks, add instructions to visually verify with screenshots + const DESIGN_KEYWORDS = [ + 'css', + 'style', + 'styling', + 'padding', + 'margin', + 'spacing', + 'color', + 'colour', + 'background', + 'theme', + 'font', + 'typography', + 'border', + 'shadow', + 'layout', + 'responsive', + 'animation', + 'design', + 'ui', + 'ux', + 'brighter', + 'darker', + 'visual', + ]; + const isDesignTask = + options.design || + (customTask && DESIGN_KEYWORDS.some((kw) => customTask.toLowerCase().includes(kw))); + + // --design flag: structured visual-first fix flow + if (options.design) { + fixTask = `You are fixing design and visual issues in this project. Ignore IMPLEMENTATION_PLAN.md — this is a visual fix pass, not a feature build. + +IMPORTANT: Your VERY FIRST action must be to start the dev server and take screenshots. Do NOT read files or explore the codebase first — start visually. + +## Phase 1: Visual Audit (DO THIS FIRST) +1. Start the dev server (e.g. \`npm run dev\` or \`npx vite\`) — this OVERRIDES the "no dev server" rule +2. Take full-page screenshots at 3 viewports: desktop (1440px), tablet (768px), mobile (375px) +3. Analyze each screenshot carefully against the spec below + +## Phase 2: Issue Identification (be SPECIFIC, not generic) +Look at the screenshots and identify CONCRETE issues you can actually see. Do NOT list generic improvements — only list problems visible in the screenshots. + +Check in this priority order: +0. **CSS cascade conflicts** — If spacing/margin/padding from Tailwind classes aren't working but colors/fonts/grid work fine, check the main CSS file (e.g. index.css, globals.css) for unlayered rules like \`* { margin: 0; padding: 0; }\` that override Tailwind's @layer-based utilities. Remove any such rules — Tailwind v4's preflight already provides proper resets. +1. **Page structure** — Is content centered? Are sections contained in a max-width wrapper? Is anything stuck to the left/right edge when it shouldn't be? Are there huge empty gaps between sections? +2. **Layout & positioning** — Are grid/flex layouts rendering correctly? Are columns balanced? Is the hero section properly structured? Are elements overlapping or misaligned? +3. **Responsive issues** — Does the layout break at any viewport? Do elements overflow or get clipped? +4. **Spacing** — Is vertical rhythm consistent between sections? Are there abnormally large or small gaps? +5. **Typography & colors** — Are fonts loading? Is text readable against backgrounds? Are colors consistent? + +IMPORTANT: Focus on what looks BROKEN, not what could be "improved." A centered layout with wrong padding is lower priority than content pinned to the left edge. + +## Phase 3: Fix Plan +Create a DESIGN_FIX_PLAN.md. For each issue: +- Describe EXACTLY what's wrong (e.g., "Hero content is not centered — text hugs the left edge with no container") +- Specify the exact file and CSS property to change +- Keep fixes minimal — fix the actual problem, don't redesign the entire component + +Prioritize: page structure > layout positioning > responsive > spacing > cosmetic. + +## Phase 4: Execute & Verify +1. Fix structural issues FIRST (containers, centering, grid layout), then work down to cosmetic +2. After fixing each structural issue, re-screenshot to verify the layout improved +3. Final verification: screenshot all 3 viewports and confirm the page looks properly structured +4. CRITICAL: After confirming fixes look correct in final screenshots, output DESIGN_VERIFIED on its own line. Do NOT output this until you have taken verification screenshots and confirmed the design is correct. + +## Phase 5: Cleanup +1. Stop the dev server (kill the process) when done — do NOT leave it running +2. If you have NOT already output DESIGN_VERIFIED, do it now after visual confirmation + +IMPORTANT: The loop will NOT accept completion without the exact token DESIGN_VERIFIED. Do NOT say "All tasks completed" — it will be ignored. + +${customTask ? `\nUser notes: ${customTask}\n` : ''}${specContext ? `\n## Original Design Specification\n${specContext}` : ''}${feedback ? `\n\n## Build Errors (also fix these)\n${feedback}` : ''}`; + } else if (isDesignTask) { + fixTask += `\n\nVisual verification (IMPORTANT — OVERRIDES the "no dev server" rule): +This is a visual/design task. After making your CSS and styling changes, you MUST visually verify the result: +1. Start a local dev server (e.g. npm run dev) — this is the ONE exception to the "never start a dev server" rule +2. Take browser screenshots at desktop (1440px) and mobile (375px) viewports +3. Compare screenshots against the spec above — check colors, spacing, layout, and typography match +4. Fix any visual issues you spot (spacing, colors, alignment, contrast) +5. CRITICAL: Stop the dev server (kill the process) when done — do NOT leave it running`; + } + + // Install relevant skills so the agent has design/quality context + // Use the user's custom task (not the full generated prompt) to avoid keyword-spam + // that triggers excessive skill searches from the design prompt boilerplate + await autoInstallSkillsFromTask(customTask || (options.design ? 'design fix' : 'fix'), cwd); + + const defaultIter = options.design ? 7 : isDesignTask ? 4 : 3; + const maxIter = options.maxIterations ? Number.parseInt(options.maxIterations, 10) : defaultIter; + + const result = await runLoop({ + task: fixTask, + cwd, + agent, + maxIterations: maxIter, + auto: true, + commit: options.commit, + initialValidationFeedback: feedback || undefined, + trackProgress: true, + checkFileCompletion: false, + validate: mode === 'scan', + maxSkills: options.design ? 4 : undefined, + skipPlanInstructions: options.design, + fixMode: options.design ? 'design' : customTask ? 'custom' : 'scan', + // Design mode: require explicit DESIGN_VERIFIED token after visual verification + ...(options.design && { + completionPromise: 'DESIGN_VERIFIED', + requireExitSignal: true, + }), + }); + + // --- Step 5: Verify fix by re-running validations --- + // The loop's exit reason may be max_iterations even if the build now passes. + // For the fix command, success = "do the checks pass now?", not "did the agent say done?" + let fixed = result.success; + + if (!fixed && commands.length > 0) { + const verifySpinner = ora('Verifying fix...').start(); + const verifyResults = await runAllValidations(cwd, commands); + const stillFailing = verifyResults.filter((r) => !r.success); + + if (stillFailing.length === 0) { + verifySpinner.succeed(chalk.green('All checks passing now!')); + fixed = true; + } else { + verifySpinner.fail(chalk.red(`${stillFailing.length} issue(s) still failing`)); + for (const f of stillFailing) { + console.log(chalk.red(` ✗ ${f.command}`)); + } + } + } + + // --- Step 6: Report --- + console.log(); + if (fixed) { + console.log(chalk.green('All issues fixed!')); + } else { + console.log(chalk.red('Could not fix all issues automatically.')); + console.log(chalk.dim(' Run again or fix remaining issues manually.')); + } +} diff --git a/src/commands/init.ts b/src/commands/init.ts index ec3754c..4e202dd 100644 --- a/src/commands/init.ts +++ b/src/commands/init.ts @@ -6,6 +6,11 @@ import ora from 'ora'; import YAML from 'yaml'; import { initGitRepo, isGitRepo } from '../automation/git.js'; import { type Agent, detectAvailableAgents, printAgentStatus } from '../loop/agents.js'; +import { + detectPackageManager, + formatRunCommand, + type PackageManager, +} from '../utils/package-manager.js'; interface InitOptions { name?: string; @@ -18,6 +23,7 @@ export type ProjectType = 'nodejs' | 'python' | 'rust' | 'go' | 'unknown'; export interface ProjectInfo { type: ProjectType; name: string; + packageManager?: PackageManager; testCmd?: string; buildCmd?: string; lintCmd?: string; @@ -29,12 +35,14 @@ export function detectProject(cwd: string): ProjectInfo { try { const pkg = JSON.parse(readFileSync(join(cwd, 'package.json'), 'utf-8')); const scripts = pkg.scripts || {}; + const pm = detectPackageManager(cwd); return { type: 'nodejs', name: pkg.name || 'project', - testCmd: scripts.test ? 'npm test' : undefined, - buildCmd: scripts.build ? 'npm run build' : undefined, - lintCmd: scripts.lint ? 'npm run lint' : undefined, + packageManager: pm, + testCmd: scripts.test ? formatRunCommand(pm, 'test') : undefined, + buildCmd: scripts.build ? formatRunCommand(pm, 'build') : undefined, + lintCmd: scripts.lint ? formatRunCommand(pm, 'lint') : undefined, }; } catch { return { type: 'nodejs', name: 'project' }; @@ -150,7 +158,7 @@ ${validationCmds.length > 0 ? validationCmds.join('\n') : '# Add your test/build ## Build Instructions -${project.type === 'nodejs' ? '1. Run `npm install` to install dependencies\n2. Run `npm run build` to build (if applicable)\n3. Run `npm test` to verify' : ''} +${project.type === 'nodejs' ? `1. Run \`${project.packageManager || 'npm'} install\` to install dependencies\n2. Run \`${project.buildCmd || `${project.packageManager || 'npm'} run build`}\` to build (if applicable)\n3. Run \`${project.testCmd || `${project.packageManager || 'npm'} test`}\` to verify` : ''} ${project.type === 'python' ? '1. Create virtual environment: `python -m venv venv`\n2. Install dependencies: `pip install -e .`\n3. Run tests: `pytest`' : ''} ${project.type === 'rust' ? '1. Run `cargo build` to compile\n2. Run `cargo test` to verify' : ''} ${project.type === 'go' ? '1. Run `go mod tidy` to sync dependencies\n2. Run `go build ./...` to compile\n3. Run `go test ./...` to verify' : ''} diff --git a/src/commands/resume.ts b/src/commands/resume.ts index efc8446..fb0f09b 100644 --- a/src/commands/resume.ts +++ b/src/commands/resume.ts @@ -127,6 +127,7 @@ export async function resumeCommand(options: ResumeCommandOptions = {}): Promise checkFileCompletion: session.options.checkFileCompletion, trackCost: session.options.trackCost, model: session.options.model, + initialValidationFeedback: session.lastValidationFeedback, }; // Run the loop diff --git a/src/commands/run.ts b/src/commands/run.ts index 58ed02e..e836f0e 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -20,6 +20,8 @@ import { formatPresetsHelp, getPreset, type PresetConfig } from '../presets/inde import { autoInstallSkillsFromTask } from '../skills/auto-install.js'; import { getSourceDefaults } from '../sources/config.js'; import { fetchFromSource } from '../sources/index.js'; +import { detectPackageManager, formatRunCommand, getRunCommand } from '../utils/package-manager.js'; +import { showWelcome } from '../wizard/ui.js'; /** Default fallback repo for GitHub issues when no project is specified */ const DEFAULT_GITHUB_ISSUES_REPO = 'multivmlabs/ralph-ideas'; @@ -42,19 +44,14 @@ function detectRunCommand( try { const pkg = JSON.parse(readFileSync(packageJsonPath, 'utf-8')); const scripts = pkg.scripts || {}; + const pm = detectPackageManager(cwd); // Priority order for dev commands - if (scripts.dev) { - return { command: 'npm', args: ['run', 'dev'], description: 'npm run dev' }; - } - if (scripts.start) { - return { command: 'npm', args: ['run', 'start'], description: 'npm run start' }; - } - if (scripts.serve) { - return { command: 'npm', args: ['run', 'serve'], description: 'npm run serve' }; - } - if (scripts.preview) { - return { command: 'npm', args: ['run', 'preview'], description: 'npm run preview' }; + for (const script of ['dev', 'start', 'serve', 'preview']) { + if (scripts[script]) { + const cmd = getRunCommand(pm, script); + return { ...cmd, description: formatRunCommand(pm, script) }; + } } } catch { // Ignore parse errors @@ -223,6 +220,7 @@ export interface RunCommandOptions { circuitBreakerFailures?: number; circuitBreakerErrors?: number; contextBudget?: number; + validationWarmup?: number; // Figma options figmaMode?: 'spec' | 'tokens' | 'components' | 'assets' | 'content'; figmaFramework?: 'react' | 'vue' | 'svelte' | 'astro' | 'nextjs' | 'nuxt' | 'html'; @@ -250,10 +248,7 @@ export async function runCommand( } } - console.log(); - console.log(chalk.cyan.bold('ralph-starter')); - console.log(chalk.dim('Ralph Wiggum made easy')); - console.log(); + showWelcome(); // Check for git repo if (options.commit || options.push || options.pr) { @@ -337,16 +332,39 @@ export async function runCommand( const isIntegrationSource = integrationSources.includes(options.from?.toLowerCase() || ''); if (isIntegrationSource && !options.auto && !options.outputDir) { + // Detect existing project markers to choose smart default ordering + const projectMarkers = [ + 'package.json', + '.git', + 'Cargo.toml', + 'go.mod', + 'pyproject.toml', + 'requirements.txt', + 'Gemfile', + 'pom.xml', + 'build.gradle', + ]; + const hasProjectMarkers = projectMarkers.some((f) => existsSync(join(cwd, f))); + + // If existing project detected, default to "Current directory" first + const choices = hasProjectMarkers + ? [ + { name: `Current directory (${cwd})`, value: 'current' }, + { name: 'Create new project folder', value: 'new' }, + { name: 'Enter custom path', value: 'custom' }, + ] + : [ + { name: 'Create new project folder', value: 'new' }, + { name: `Current directory (${cwd})`, value: 'current' }, + { name: 'Enter custom path', value: 'custom' }, + ]; + const { projectLocation } = await inquirer.prompt([ { - type: 'list', + type: 'select', name: 'projectLocation', message: 'Where do you want to run this task?', - choices: [ - { name: `Current directory (${cwd})`, value: 'current' }, - { name: 'Create new project folder', value: 'new' }, - { name: 'Enter custom path', value: 'custom' }, - ], + choices, }, ]); @@ -455,7 +473,7 @@ export async function runCommand( writeFileSync(implementationPlanPath, extractedPlan); console.log(chalk.cyan('Created IMPLEMENTATION_PLAN.md from spec')); - finalTask = `Build the following project based on this specification: + finalTask = `Study the following specification carefully: ${sourceSpec} @@ -463,13 +481,33 @@ ${sourceSpec} An IMPLEMENTATION_PLAN.md file has been created with tasks extracted from this spec. As you complete each task, mark it done by changing [ ] to [x] in IMPLEMENTATION_PLAN.md. -Focus on one task at a time.`; +Focus on ONE task at a time. Don't assume functionality is not already implemented — search the codebase first. +Implement completely — no placeholders or stubs.`; } else { - finalTask = `Build the following project based on this specification: + finalTask = `Study the following specification carefully: ${sourceSpec} -Analyze the specification and implement all required features. Create a proper project structure with all necessary files.`; +## Getting Started + +IMPORTANT: Before writing any code, you MUST first: +1. Study the specification above thoroughly +2. Search the codebase — don't assume functionality is not already implemented +3. Create an IMPLEMENTATION_PLAN.md file with tasks broken down as: + +### Task 1: [name] +- [ ] Subtask a +- [ ] Subtask b + +### Task 2: [name] +- [ ] Subtask a + +Break the spec into 3-8 logical tasks, sorted by priority. + +4. Then start working on Task 1 only. + +As you complete each subtask, mark it done by changing [ ] to [x] in IMPLEMENTATION_PLAN.md. +Focus on ONE task at a time. Implement completely — no placeholders or stubs.`; } console.log(chalk.cyan('Using fetched specification as task')); } @@ -545,7 +583,7 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN return; } - // Auto-install relevant skills from skills.sh (if available) + // Auto-install relevant skills from skills.sh (enabled by default) await autoInstallSkillsFromTask(finalTask, cwd); // Apply preset if specified @@ -563,7 +601,11 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN } // Calculate smart iterations based on tasks (always, unless explicitly overridden) - const { iterations: smartIterations, taskCount, reason } = calculateOptimalIterations(cwd); + const { + iterations: smartIterations, + taskCount, + reason, + } = calculateOptimalIterations(cwd, finalTask); if (!options.maxIterations && !preset?.maxIterations) { if (taskCount.total > 0) { console.log( @@ -573,6 +615,16 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN console.log(chalk.dim(`Max iterations: ${smartIterations} (${reason})`)); } + // Auto-detect greenfield builds: skip validation until enough tasks are done + const isGreenfield = taskCount.total > 0 && taskCount.completed === 0; + const autoWarmup = isGreenfield ? Math.max(2, Math.floor(taskCount.total * 0.5)) : 0; + const validationWarmup = options.validationWarmup ? Number(options.validationWarmup) : autoWarmup; + if (validationWarmup > 0 && options.validate) { + console.log( + chalk.dim(`Validation warm-up: skipping until ${validationWarmup} tasks completed`) + ); + } + // Apply preset values with CLI overrides const loopOptions: LoopOptions = { task: preset?.promptPrefix ? `${preset.promptPrefix}\n\n${finalTask}` : finalTask, @@ -587,6 +639,7 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN prIssueRef: sourceIssueRef, prLabels: options.auto ? ['AUTO'] : undefined, validate: options.validate ?? preset?.validate, + validationWarmup, sourceType: options.from?.toLowerCase(), // New options completionPromise: options.completionPromise ?? preset?.completionPromise, diff --git a/src/commands/skill.ts b/src/commands/skill.ts index ecfdf58..c2f3453 100644 --- a/src/commands/skill.ts +++ b/src/commands/skill.ts @@ -9,7 +9,7 @@ interface SkillOptions { global?: boolean; } -interface SkillEntry { +export interface SkillEntry { name: string; description: string; category: string; @@ -17,7 +17,7 @@ interface SkillEntry { } // Popular skills registry (curated list) -const POPULAR_SKILLS: SkillEntry[] = [ +export const POPULAR_SKILLS: SkillEntry[] = [ // Agents { name: 'vercel-labs/agent-skills', diff --git a/src/loop/__tests__/circuit-breaker.test.ts b/src/loop/__tests__/circuit-breaker.test.ts index fe149b9..3a299da 100644 --- a/src/loop/__tests__/circuit-breaker.test.ts +++ b/src/loop/__tests__/circuit-breaker.test.ts @@ -181,18 +181,64 @@ describe('CircuitBreaker', () => { }); describe('error normalization', () => { - it('should treat similar errors with different numbers as the same', () => { + it('should treat errors with different file:line:col locations as the same', () => { const customBreaker = new CircuitBreaker({ maxConsecutiveFailures: 100, maxSameErrorCount: 3, }); - // These should hash to similar values due to number normalization - customBreaker.recordFailure('Error at line 42'); + // Same error at different file locations should hash identically + customBreaker.recordFailure('Error in src/index.ts:42:5'); customBreaker.recordSuccess(); - customBreaker.recordFailure('Error at line 99'); + customBreaker.recordFailure('Error in src/index.ts:99:12'); customBreaker.recordSuccess(); - expect(customBreaker.recordFailure('Error at line 123')).toBe(true); + expect(customBreaker.recordFailure('Error in src/index.ts:123:3')).toBe(true); + }); + + it('should treat semantically different errors as distinct', () => { + const customBreaker = new CircuitBreaker({ + maxConsecutiveFailures: 100, + maxSameErrorCount: 3, + }); + + // Different error messages should NOT hash identically + customBreaker.recordFailure('port 8000 already in use'); + customBreaker.recordSuccess(); + customBreaker.recordFailure('file not found: config.json'); + customBreaker.recordSuccess(); + // Third unique error — should NOT trip (only 1 of each) + expect(customBreaker.recordFailure('permission denied: /etc/shadow')).toBe(false); + }); + + it('should normalize stack traces', () => { + const customBreaker = new CircuitBreaker({ + maxConsecutiveFailures: 100, + maxSameErrorCount: 3, + }); + + customBreaker.recordFailure('TypeError: cannot read property at Object.run (/src/a.ts:10:5)'); + customBreaker.recordSuccess(); + customBreaker.recordFailure('TypeError: cannot read property at Object.run (/src/b.ts:20:3)'); + customBreaker.recordSuccess(); + expect( + customBreaker.recordFailure( + 'TypeError: cannot read property at Object.run (/src/c.ts:30:1)' + ) + ).toBe(true); + }); + + it('should normalize timestamps correctly (before :line:col pattern)', () => { + const customBreaker = new CircuitBreaker({ + maxConsecutiveFailures: 100, + maxSameErrorCount: 3, + }); + + // Same error with different timestamps should hash identically + customBreaker.recordFailure('Error at 2026-02-13T14:07:39 in module'); + customBreaker.recordSuccess(); + customBreaker.recordFailure('Error at 2026-02-13T15:22:01 in module'); + customBreaker.recordSuccess(); + expect(customBreaker.recordFailure('Error at 2026-02-14T09:00:00 in module')).toBe(true); }); }); }); diff --git a/src/loop/__tests__/cost-tracker.test.ts b/src/loop/__tests__/cost-tracker.test.ts index b955772..c946d55 100644 --- a/src/loop/__tests__/cost-tracker.test.ts +++ b/src/loop/__tests__/cost-tracker.test.ts @@ -233,6 +233,36 @@ describe('cost-tracker', () => { }); }); + describe('isOverBudget', () => { + it('should return null when no maxCost is set', () => { + tracker.recordIteration('input', 'output'); + expect(tracker.isOverBudget()).toBeNull(); + }); + + it('should return null when under budget', () => { + const budgetTracker = new CostTracker({ + model: 'claude-3-sonnet', + maxCost: 100, // $100 budget + }); + budgetTracker.recordIteration('input', 'output'); + expect(budgetTracker.isOverBudget()).toBeNull(); + }); + + it('should return budget info when over budget', () => { + const budgetTracker = new CostTracker({ + model: 'claude-3-sonnet', + maxCost: 0.0001, // Extremely low budget + }); + // Record enough iterations to exceed tiny budget + budgetTracker.recordIteration('a'.repeat(10000), 'b'.repeat(10000)); + + const result = budgetTracker.isOverBudget(); + expect(result).not.toBeNull(); + expect(result?.maxCost).toBe(0.0001); + expect(result?.currentCost).toBeGreaterThan(0); + }); + }); + describe('model pricing', () => { it('should use default pricing for unknown models', () => { const unknownTracker = new CostTracker({ model: 'unknown-model' }); diff --git a/src/loop/__tests__/validation.test.ts b/src/loop/__tests__/validation.test.ts index f46a90c..5adb3d3 100644 --- a/src/loop/__tests__/validation.test.ts +++ b/src/loop/__tests__/validation.test.ts @@ -1,9 +1,11 @@ import { existsSync, readFileSync } from 'node:fs'; import { beforeEach, describe, expect, it, vi } from 'vitest'; import { + detectBuildCommands, detectValidationCommands, formatValidationFeedback, runAllValidations, + runBuildValidation, runValidation, type ValidationCommand, type ValidationResult, @@ -89,7 +91,7 @@ describe('validation', () => { expect(commands.find((c) => c.name === 'test')).toEqual({ name: 'test', command: 'npm', - args: ['run', 'test'], + args: ['test'], }); }); @@ -228,7 +230,7 @@ describe('validation', () => { expect(results.every((r) => r.success)).toBe(true); }); - it('should stop on first failure', async () => { + it('should run all commands even when some fail', async () => { mockExeca .mockResolvedValueOnce({ exitCode: 0, stdout: 'Passed', stderr: '' } as any) .mockResolvedValueOnce({ exitCode: 1, stdout: '', stderr: 'Failed' } as any) @@ -242,9 +244,10 @@ describe('validation', () => { const results = await runAllValidations('/test/dir', commands); - expect(results).toHaveLength(2); // Stopped after lint failed + expect(results).toHaveLength(3); // All commands run expect(results[0].success).toBe(true); expect(results[1].success).toBe(false); + expect(results[2].success).toBe(true); }); it('should handle empty command list', async () => { @@ -307,4 +310,136 @@ describe('validation', () => { expect(feedback).toContain('Output error'); }); }); + + describe('detectBuildCommands', () => { + it('should return empty array when no config files exist', () => { + mockExistsSync.mockReturnValue(false); + const commands = detectBuildCommands('/test/dir'); + expect(commands).toHaveLength(0); + }); + + it('should detect build command from AGENTS.md', () => { + mockExistsSync.mockImplementation((path: any) => path.toString().includes('AGENTS.md')); + mockReadFileSync.mockReturnValue('- **Build**: `pnpm run build`'); + + const commands = detectBuildCommands('/test/dir'); + + expect(commands).toHaveLength(1); + expect(commands[0].name).toBe('build'); + expect(commands[0].command).toBe('pnpm'); + expect(commands[0].args).toEqual(['run', 'build']); + }); + + it('should detect both build and typecheck from AGENTS.md', () => { + mockExistsSync.mockImplementation((path: any) => path.toString().includes('AGENTS.md')); + mockReadFileSync.mockReturnValue( + '- **Build**: `npm run build`\n- **Typecheck**: `npm run typecheck`' + ); + + const commands = detectBuildCommands('/test/dir'); + + expect(commands).toHaveLength(2); + expect(commands.map((c) => c.name)).toEqual(['build', 'typecheck']); + }); + + it('should NOT include test or lint commands from package.json', () => { + mockExistsSync.mockImplementation((path: any) => path.toString().includes('package.json')); + mockReadFileSync.mockReturnValue( + JSON.stringify({ + scripts: { test: 'vitest', lint: 'eslint .', build: 'tsc', typecheck: 'tsc --noEmit' }, + }) + ); + + const commands = detectBuildCommands('/test/dir'); + const names = commands.map((c) => c.name); + + expect(names).not.toContain('test'); + expect(names).not.toContain('lint'); + expect(names).toContain('build'); + expect(names).toContain('typecheck'); + }); + + it('should fall back to npx tsc --noEmit for TypeScript projects without build script', () => { + mockExistsSync.mockImplementation((path: any) => { + if (path.toString().includes('tsconfig.json')) return true; + if (path.toString().includes('package.json')) return true; + return false; + }); + mockReadFileSync.mockReturnValue(JSON.stringify({ scripts: {} })); + + const commands = detectBuildCommands('/test/dir'); + + expect(commands).toHaveLength(1); + expect(commands[0]).toEqual({ name: 'typecheck', command: 'npx', args: ['tsc', '--noEmit'] }); + }); + + it('should NOT use tsc fallback when build script exists', () => { + mockExistsSync.mockImplementation((path: any) => { + if (path.toString().includes('tsconfig.json')) return true; + if (path.toString().includes('package.json')) return true; + return false; + }); + mockReadFileSync.mockReturnValue(JSON.stringify({ scripts: { build: 'next build' } })); + + const commands = detectBuildCommands('/test/dir'); + + expect(commands).toHaveLength(1); + expect(commands[0].name).toBe('build'); + }); + }); + + describe('runBuildValidation', () => { + beforeEach(() => { + mockExeca.mockReset(); // Clear persistent mockResolvedValue from runAllValidations tests + }); + + it('should use 2-minute timeout', async () => { + mockExeca.mockResolvedValueOnce({ + exitCode: 0, + stdout: 'Built successfully', + stderr: '', + } as any); + + const command: ValidationCommand = { name: 'build', command: 'npm', args: ['run', 'build'] }; + await runBuildValidation('/test/dir', command); + + expect(mockExeca).toHaveBeenCalledWith('npm', ['run', 'build'], { + cwd: '/test/dir', + timeout: 120000, + reject: false, + }); + }); + + it('should return success on exit code 0', async () => { + mockExeca.mockResolvedValueOnce({ + exitCode: 0, + stdout: 'Built successfully', + stderr: '', + } as any); + + const command: ValidationCommand = { name: 'build', command: 'npm', args: ['run', 'build'] }; + const result = await runBuildValidation('/test/dir', command); + + expect(result.success).toBe(true); + }); + + it('should return failure with error on non-zero exit', async () => { + // Use same pattern as runValidation tests (which pass) + mockExeca.mockResolvedValueOnce({ + exitCode: 1, + stdout: 'Build output', + stderr: 'Cannot find module Testimonials', + } as any); + + const command: ValidationCommand = { name: 'build', command: 'npm', args: ['run', 'build'] }; + + // Verify mock is set up + expect(mockExeca).toBeDefined(); + + const result = await runBuildValidation('/test/dir', command); + + expect(result.success).toBe(false); + expect(result.error).toContain('Cannot find module'); + }); + }); }); diff --git a/src/loop/agents.ts b/src/loop/agents.ts index e772cce..2662707 100644 --- a/src/loop/agents.ts +++ b/src/loop/agents.ts @@ -20,6 +20,10 @@ export interface AgentRunOptions { streamOutput?: boolean; /** Callback for each line of output */ onOutput?: (line: string) => void; + /** Agent timeout in milliseconds (default: 300000 = 5 min) */ + timeoutMs?: number; + /** Maximum output size in bytes before truncating (default: 50MB) */ + maxOutputBytes?: number; } const AGENTS: Record = { @@ -63,21 +67,19 @@ export async function checkAgentAvailable(type: AgentType): Promise { } export async function detectAvailableAgents(): Promise { - const agents: Agent[] = []; + const entries = Object.entries(AGENTS).filter(([type]) => type !== 'unknown'); - for (const [type, config] of Object.entries(AGENTS)) { - if (type === 'unknown') continue; - - const available = await checkAgentAvailable(type as AgentType); - agents.push({ + // Check all agents in parallel — each spawns an independent subprocess + const results = await Promise.all( + entries.map(async ([type, config]) => ({ type: type as AgentType, name: config.name, command: config.command, - available, - }); - } + available: await checkAgentAvailable(type as AgentType), + })) + ); - return agents; + return results; } export async function detectBestAgent(): Promise { @@ -161,27 +163,33 @@ export async function runAgent( }); let output = ''; + let outputBytes = 0; let stdoutBuffer = ''; + const maxOutputBytes = options.maxOutputBytes || 50 * 1024 * 1024; // Default 50MB - // Track data timing for debugging and silence warnings + // Track data timing for debugging and silence notifications let lastDataTime = Date.now(); let silenceWarningShown = false; + let extendedSilenceShown = false; - // Warn if no data received for 30 seconds + // Notify if no data received for 30+ seconds (calm, non-alarming) const silenceChecker = setInterval(() => { const silentMs = Date.now() - lastDataTime; - if (silentMs > 30000 && !silenceWarningShown) { + if (silentMs > 60000 && !extendedSilenceShown) { + extendedSilenceShown = true; + console.log(chalk.dim(' Still working... Use RALPH_DEBUG=1 for verbose output.')); + } else if (silentMs > 30000 && !silenceWarningShown) { silenceWarningShown = true; - console.warn('\n[WARNING] No output from agent for 30+ seconds. Claude may be:'); - console.warn(' - Processing a complex task'); - console.warn(' - Stuck/rate limited'); - console.warn(' - Waiting for something'); - console.warn('Use RALPH_DEBUG=1 for detailed output\n'); + console.log( + chalk.dim( + '\n Agent is thinking... (no output for 30s, this is normal for complex tasks)' + ) + ); } }, 5000); - // Timeout: 5 minutes for actual work - const timeoutMs = 300000; + // Configurable timeout (default: 5 minutes) + const timeoutMs = options.timeoutMs || 300000; const timeout = setTimeout(() => { clearInterval(silenceChecker); if (process.env.RALPH_DEBUG) { @@ -195,6 +203,21 @@ export async function runAgent( // Process stdout line-by-line for real-time updates proc.stdout?.on('data', (data: Buffer) => { const chunk = data.toString(); + outputBytes += data.byteLength; + + // Guard against unbounded memory growth — keep last portion if over limit. + // Repeatable: no flag gate, so output stays bounded even with continuous streaming. + if (outputBytes > maxOutputBytes) { + const keepBytes = Math.floor(maxOutputBytes * 0.8); + output = output.slice(-keepBytes); + outputBytes = Buffer.byteLength(output); // Reset counter to actual buffer size + if (process.env.RALPH_DEBUG) { + console.error( + `[DEBUG] Output exceeded ${maxOutputBytes} bytes, truncated to ~${outputBytes}` + ); + } + } + output += chunk; stdoutBuffer += chunk; lastDataTime = Date.now(); @@ -225,6 +248,7 @@ export async function runAgent( proc.stderr?.on('data', (data: Buffer) => { const chunk = data.toString(); + outputBytes += data.byteLength; // Include stderr in byte accounting output += chunk; // Debug: log stderr output if (process.env.RALPH_DEBUG) { diff --git a/src/loop/circuit-breaker.ts b/src/loop/circuit-breaker.ts index 6734823..26e1330 100644 --- a/src/loop/circuit-breaker.ts +++ b/src/loop/circuit-breaker.ts @@ -35,17 +35,21 @@ export class CircuitBreaker { } /** - * Hash an error message to track similar errors + * Hash an error message to track similar errors. + * Normalizes variable parts (line numbers, timestamps, hex, stack traces) + * while preserving semantically meaningful content like error messages. */ private hashError(error: string): string { - // Normalize the error by removing variable parts like line numbers, timestamps + // Order matters: timestamps must be normalized before :line:col, otherwise + // "14:07:39" in a timestamp matches :\d+:\d+ and gets mangled first. const normalized = error - .replace(/\d+/g, 'N') // Replace numbers - .replace(/0x[a-fA-F0-9]+/g, 'HEX') // Replace hex values + .replace(/0x[a-fA-F0-9]+/g, 'HEX') // Replace hex addresses .replace(/at\s+\S+\s+\(\S+:\d+:\d+\)/g, 'STACK') // Replace stack traces + .replace(/\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}/g, 'TIMESTAMP') // Replace timestamps (before :line:col) + .replace(/:\d+:\d+/g, ':N:N') // Replace file:line:col locations .toLowerCase() .trim() - .slice(0, 500); // Limit length + .slice(0, 500); return crypto.createHash('md5').update(normalized).digest('hex').slice(0, 8); } diff --git a/src/loop/context-builder.ts b/src/loop/context-builder.ts index 2504d0f..30c0627 100644 --- a/src/loop/context-builder.ts +++ b/src/loop/context-builder.ts @@ -7,6 +7,8 @@ * - Iterations 4+: Current task only + error summary */ +import { existsSync, readdirSync, readFileSync } from 'node:fs'; +import { join } from 'node:path'; import { estimateTokens } from './cost-tracker.js'; import type { PlanTask, TaskCount } from './task-counter.js'; @@ -27,6 +29,12 @@ export interface ContextBuildOptions { validationFeedback?: string; /** Maximum input tokens budget (0 = unlimited) */ maxInputTokens?: number; + /** Abbreviated spec summary for later iterations (avoids agent re-reading specs/) */ + specSummary?: string; + /** Skip IMPLEMENTATION_PLAN.md instructions in preamble (used by fix --design) */ + skipPlanInstructions?: boolean; + /** Iteration log content from .ralph/iteration-log.md (previous iteration summaries) */ + iterationLog?: string; } export interface BuiltContext { @@ -63,9 +71,29 @@ export function compressValidationFeedback(feedback: string, maxChars: number = const lines = stripped.split('\n'); const compressed: string[] = ['## Validation Failed\n']; let currentLength = compressed[0].length; + let sectionCount = 0; + let totalSections = 0; + // Count total ### sections for the omission summary for (const line of lines) { - // Always include headers (### command name) + if (line.startsWith('### ')) totalSections++; + } + + for (const line of lines) { + // Track section headers (### command name) + if (line.startsWith('### ')) { + // If we already have one complete section and are over budget, stop + if (sectionCount >= 1 && currentLength + line.length + 1 > maxChars - 100) { + const remaining = totalSections - sectionCount; + if (remaining > 0) { + compressed.push(`\n[${remaining} more failing section(s) omitted]`); + } + break; + } + sectionCount++; + } + + // Always include ## and ### headers if (line.startsWith('### ') || line.startsWith('## ')) { compressed.push(line); currentLength += line.length + 1; @@ -83,6 +111,43 @@ export function compressValidationFeedback(feedback: string, maxChars: number = return compressed.join('\n'); } +/** + * Build an abbreviated spec summary from the specs/ directory. + * Gives later iterations a quick design reference without requiring + * the agent to re-read spec files via tool calls. + */ +export function buildSpecSummary(cwd: string, maxChars: number = 1500): string | undefined { + const specsDir = join(cwd, 'specs'); + if (!existsSync(specsDir)) return undefined; + + try { + const specFiles = readdirSync(specsDir).filter((f) => f.endsWith('.md')); + if (specFiles.length === 0) return undefined; + + const parts: string[] = []; + let totalLength = 0; + + for (const file of specFiles) { + const content = readFileSync(join(specsDir, file), 'utf-8'); + const available = maxChars - totalLength; + if (available <= 100) { + parts.push(`\n[${specFiles.length - parts.length} more spec file(s) omitted]`); + break; + } + const truncated = + content.length > available + ? `${content.slice(0, available)}\n[... truncated ...]` + : content; + parts.push(truncated); + totalLength += truncated.length; + } + + return parts.join('\n---\n'); + } catch { + return undefined; + } +} + /** * Build a trimmed implementation plan context showing only the current task * with a summary of completed and pending tasks. @@ -136,27 +201,83 @@ export function buildIterationContext(opts: ContextBuildOptions): BuiltContext { iteration, validationFeedback, maxInputTokens = 0, + specSummary, + skipPlanInstructions = false, } = opts; const totalTasks = taskInfo.total; const completedTasks = taskInfo.completed; const debugParts: string[] = []; let prompt: string; - - // No structured tasks — just pass the task as-is + let wasTrimmed = false; + + // Plan-related rules — omitted for fix/design passes where IMPLEMENTATION_PLAN.md is irrelevant + const planRules = skipPlanInstructions + ? '- This is a fix/review pass. Focus on the specific instructions in the task below.' + : `- Study IMPLEMENTATION_PLAN.md and work on ONE task at a time +- Mark each subtask [x] in IMPLEMENTATION_PLAN.md immediately when done +- Study specs/ directory for original requirements`; + + // Loop-aware preamble — gives the agent behavioral context per Ralph Playbook patterns + const preamble = `You are a coding agent in an autonomous development loop (iteration ${iteration}/${opts.maxIterations}). + +Rules: +- IMPORTANT: The current working directory IS the project root. Create ALL files here — do NOT create a subdirectory for the project (e.g., do NOT run \`mkdir my-app\` or \`npx create-vite my-app\`). If you use a scaffolding tool, run it with \`.\` as the target (e.g., \`npm create vite@latest . -- --template react\`). +${planRules} +- Don't assume functionality is not already implemented — search the codebase first +- Implement completely — no placeholders or stubs +- Create files before importing them — never import components or modules that don't exist yet +- Do NOT run build or dev server commands yourself — the loop automatically runs lint checks between iterations and a full build on the final iteration. NEVER start a dev server (\`npm run dev\`, \`npx vite\`, etc.) — it blocks forever and wastes resources. (Exception: if explicitly told to do visual verification, you may briefly start a dev server and MUST kill it when done.) +${skipPlanInstructions ? '- Follow the completion instructions in the task below' : '- When ALL tasks are complete, explicitly state "All tasks completed"'} +- If you learn how to run/build the project, update AGENTS.md + +Technology gotchas (CRITICAL — follow these exactly): +- Tailwind CSS v4 (current version): The setup has changed significantly from v3. + * Install: \`npm install tailwindcss @tailwindcss/postcss postcss\` + * postcss.config.js must use: \`plugins: { '@tailwindcss/postcss': {} }\` (NOT \`tailwindcss\`) + * CSS file must use: \`@import "tailwindcss";\` (NOT \`@tailwind base/components/utilities\` — those are v3 directives) + * Do NOT create tailwind.config.js — Tailwind v4 uses CSS-based configuration +- JSX: Never put unescaped quotes inside attribute strings. For SVG backgrounds or data URLs, use a CSS file or encodeURIComponent(). +- Do NOT run \`npm run build\` or \`npm run dev\` manually — the loop handles validation automatically (lint between tasks, full build at the end). + +Design quality (IMPORTANT): +- FIRST PRIORITY: If specs/ contains a design specification, follow it EXACTLY — match the described colors, spacing, layout, typography, and visual style faithfully. The spec is the source of truth. +- If no spec exists, choose ONE clear design direction (bold/minimal/retro/editorial/playful) and commit to it +- Use a specific color palette with max 3-4 colors, not rainbow gradients +- Avoid generic AI aesthetics: no purple-blue gradient backgrounds/text, no glass morphism/neumorphism, no Inter/Roboto defaults — pick distinctive typography (e.g. DM Sans, Playfair Display, Space Mono) +`; + + // Inject iteration log for iterations 2+ (gives agent memory of what happened before) + const iterationLogSection = + iteration > 1 && opts.iterationLog + ? `\n## Previous Iterations\n${opts.iterationLog}\nUse this history to avoid repeating failed approaches.\n` + : ''; + + // No structured tasks — pass the task with preamble if (!currentTask || totalTasks === 0) { - prompt = taskWithSkills; + if (iteration > 1) { + // Later iterations without structured tasks — remind agent to create a plan + prompt = `${preamble}${iterationLogSection} +Continue working on the project. +If you haven't already, create an IMPLEMENTATION_PLAN.md with structured tasks. +Study the specs/ directory for the original specification. + +${taskWithSkills}`; + } else { + prompt = `${preamble}\n${taskWithSkills}`; + } if (validationFeedback) { const compressed = compressValidationFeedback(validationFeedback); prompt = `${prompt}\n\n${compressed}`; } debugParts.push('mode=raw (no structured tasks)'); } else if (iteration === 1) { - // Iteration 1: Full context — spec + skills + full current task details + // Iteration 1: Full context — preamble + spec + skills + full current task details const taskNum = completedTasks + 1; const subtasksList = currentTask.subtasks?.map((st) => `- [ ] ${st.name}`).join('\n') || ''; - prompt = `${taskWithSkills} + prompt = `${preamble} +${taskWithSkills} ## Current Task (${taskNum}/${totalTasks}): ${currentTask.name} @@ -166,13 +287,17 @@ ${subtasksList} Complete these subtasks, then mark them done in IMPLEMENTATION_PLAN.md by changing [ ] to [x].`; debugParts.push('mode=full (iteration 1)'); - debugParts.push(`included: full spec + skills + task ${taskNum}/${totalTasks}`); + debugParts.push(`included: preamble + full spec + skills + task ${taskNum}/${totalTasks}`); } else if (iteration <= 3) { - // Iterations 2-3: Trimmed plan context + abbreviated spec reference + // Iterations 2-3: Preamble + trimmed plan context + spec summary const planContext = buildTrimmedPlanContext(currentTask, taskInfo); + const specRef = specSummary + ? `\n## Spec Summary (reference — follow this faithfully)\n${specSummary}\n` + : '\nStudy specs/ for requirements if needed.'; - prompt = `Continue working on the project. Check IMPLEMENTATION_PLAN.md for full progress. - + prompt = `${preamble}${iterationLogSection} +Continue working on the project. Check IMPLEMENTATION_PLAN.md for full progress. +${specRef} ${planContext}`; // Add compressed validation feedback if present @@ -182,14 +307,19 @@ ${planContext}`; debugParts.push('included: compressed validation feedback'); } + wasTrimmed = true; debugParts.push(`mode=trimmed (iteration ${iteration})`); debugParts.push(`excluded: full spec, skills`); } else { - // Iterations 4+: Minimal context — just current task + // Iterations 4+: Preamble + minimal context + truncated spec hint const planContext = buildTrimmedPlanContext(currentTask, taskInfo); + const specHint = specSummary + ? `\nSpec key points:\n${specSummary.slice(0, 500)}${specSummary.length > 500 ? '\n[... see specs/ for full details ...]' : ''}\n` + : '\nSpecs in specs/.'; - prompt = `Continue working on the project. - + prompt = `${preamble}${iterationLogSection} +Continue working on the project. Check IMPLEMENTATION_PLAN.md for progress. +${specHint} ${planContext}`; // Add heavily compressed validation feedback if present @@ -199,19 +329,29 @@ ${planContext}`; debugParts.push('included: minimal validation feedback (500 chars)'); } + wasTrimmed = true; debugParts.push(`mode=minimal (iteration ${iteration})`); debugParts.push('excluded: spec, skills, plan history'); } // Apply token budget if set - let wasTrimmed = iteration > 1 && currentTask !== null && totalTasks > 0; const estimatedTokens = estimateTokens(prompt); if (maxInputTokens > 0 && estimatedTokens > maxInputTokens) { - // Aggressively trim: truncate the prompt to fit budget + // Semantic trimming: cut at paragraph/line boundaries instead of mid-instruction const targetChars = maxInputTokens * 3.5; // rough chars-per-token if (prompt.length > targetChars) { - prompt = `${prompt.slice(0, targetChars)}\n\n[Context truncated to fit ${maxInputTokens} token budget]`; + // Find the last paragraph break before the budget + let cutPoint = prompt.lastIndexOf('\n\n', targetChars); + if (cutPoint < targetChars * 0.5) { + // No paragraph break in the second half — fall back to last line break + cutPoint = prompt.lastIndexOf('\n', targetChars); + } + if (cutPoint < targetChars * 0.5) { + // No suitable break found — hard cut (rare edge case) + cutPoint = targetChars; + } + prompt = `${prompt.slice(0, cutPoint)}\n\n[Context truncated to fit ${maxInputTokens} token budget]`; wasTrimmed = true; debugParts.push(`truncated: ${estimatedTokens} -> ~${maxInputTokens} tokens`); } diff --git a/src/loop/cost-tracker.ts b/src/loop/cost-tracker.ts index 97700af..bf88555 100644 --- a/src/loop/cost-tracker.ts +++ b/src/loop/cost-tracker.ts @@ -92,6 +92,8 @@ export interface CostTrackerStats { export interface CostTrackerConfig { model: string; maxIterations?: number; + /** Maximum cost in USD before the loop should stop (0 = unlimited) */ + maxCost?: number; } /** @@ -359,6 +361,19 @@ ${stats.totalCacheSavings > 0 ? `| Cache Savings | ${formatCost(stats.totalCache `; } + /** + * Check if accumulated cost exceeds the configured budget. + * Returns the budget and current total if over, null otherwise. + */ + isOverBudget(): { maxCost: number; currentCost: number } | null { + if (!this.config.maxCost || this.config.maxCost <= 0) return null; + const total = this.iterations.reduce((sum, i) => sum + i.cost.totalCost, 0); + if (total >= this.config.maxCost) { + return { maxCost: this.config.maxCost, currentCost: total }; + } + return null; + } + /** * Get the last iteration's cost */ diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 6d35e63..dcf79cd 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -1,3 +1,5 @@ +import { execSync } from 'node:child_process'; +import { appendFileSync, existsSync, mkdirSync, readFileSync } from 'node:fs'; import { readdir, stat } from 'node:fs/promises'; import { join } from 'node:path'; import chalk from 'chalk'; @@ -7,8 +9,10 @@ import { formatPrBody, generateSemanticPrTitle, getCurrentBranch, + getHeadCommitHash, gitCommit, gitPush, + hasIterationChanges, hasUncommittedChanges, type IssueRef, type SemanticPrType, @@ -18,12 +22,11 @@ import { ProgressRenderer } from '../ui/progress-renderer.js'; import { displayRateLimitStats, parseRateLimitFromOutput, - type RateLimitInfo, type SessionContext, } from '../utils/rate-limit-display.js'; import { type Agent, type AgentRunOptions, runAgent } from './agents.js'; import { CircuitBreaker, type CircuitBreakerConfig } from './circuit-breaker.js'; -import { buildIterationContext, compressValidationFeedback } from './context-builder.js'; +import { buildIterationContext, buildSpecSummary } from './context-builder.js'; import { CostTracker, type CostTrackerStats, formatCost } from './cost-tracker.js'; import { estimateLoop, formatEstimateDetailed } from './estimator.js'; import { checkFileBasedCompletion, createProgressTracker, type ProgressEntry } from './progress.js'; @@ -31,11 +34,15 @@ import { RateLimiter } from './rate-limiter.js'; import { analyzeResponse, hasExitSignal } from './semantic-analyzer.js'; import { detectClaudeSkills, formatSkillsForPrompt } from './skills.js'; import { detectStepFromOutput } from './step-detector.js'; -import { getCurrentTask, parsePlanTasks } from './task-counter.js'; +import { getCurrentTask, MAX_ESTIMATED_ITERATIONS, parsePlanTasks } from './task-counter.js'; import { + detectBuildCommands, + detectLintCommands, detectValidationCommands, formatValidationFeedback, runAllValidations, + runBuildValidation, + runLintValidation, type ValidationResult, } from './validation.js'; @@ -140,6 +147,43 @@ async function getLatestMtime(dir: string): Promise { return latestMtime; } +/** + * Filesystem snapshot for git-independent change detection. + * Counts files and total bytes, skipping node_modules/.git/hidden dirs. + */ +async function getFilesystemSnapshot( + dir: string +): Promise<{ fileCount: number; totalSize: number }> { + let fileCount = 0; + let totalSize = 0; + + async function walk(currentDir: string): Promise { + try { + const entries = await readdir(currentDir, { withFileTypes: true }); + for (const entry of entries) { + if (entry.name.startsWith('.') || entry.name === 'node_modules') continue; + const fullPath = join(currentDir, entry.name); + try { + const stats = await stat(fullPath); + if (entry.isDirectory()) { + await walk(fullPath); + } else { + fileCount++; + totalSize += stats.size; + } + } catch { + // File may have been deleted during walk + } + } + } catch { + // Directory unreadable + } + } + + await walk(dir); + return { fileCount, totalSize }; +} + /** * Wait for filesystem to settle (no new writes) */ @@ -190,6 +234,13 @@ export interface LoopOptions { trackCost?: boolean; // Track token usage and cost model?: string; // Model name for cost estimation contextBudget?: number; // Max input tokens per iteration (0 = unlimited) + validationWarmup?: number; // Skip validation until N tasks completed (for greenfield builds) + maxCost?: number; // Maximum cost in USD before stopping (0 = unlimited) + agentTimeout?: number; // Agent timeout in milliseconds (default: 300000 = 5 min) + initialValidationFeedback?: string; // Pre-populate with errors (used by `fix` command) + maxSkills?: number; // Cap skills included in prompt (default: 5) + skipPlanInstructions?: boolean; // Skip IMPLEMENTATION_PLAN.md rules in preamble (fix --design) + fixMode?: 'design' | 'scan' | 'custom'; // Display mode for fix command headers } export interface LoopResult { @@ -203,7 +254,8 @@ export interface LoopResult { | 'max_iterations' | 'circuit_breaker' | 'rate_limit' - | 'file_signal'; + | 'file_signal' + | 'cost_ceiling'; stats?: { totalDuration: number; avgIterationDuration: number; @@ -235,112 +287,127 @@ interface CompletionOptions { minCompletionIndicators?: number; } -function detectCompletion( +/** + * Detect completion status AND reason in a single pass. + * Avoids running analyzeResponse() twice by combining detectCompletion + getCompletionReason. + */ +function detectCompletionWithReason( output: string, options: CompletionOptions = {} -): 'done' | 'blocked' | 'continue' { +): { status: 'done' | 'blocked' | 'continue'; reason: string } { const { completionPromise, requireExitSignal = false, minCompletionIndicators = 1 } = options; - // 1. Check explicit completion promise first (highest priority) + // --- Cheap checks first (string includes / simple regex) --- + + // 1. Explicit completion promise (highest priority) if (completionPromise && output.includes(completionPromise)) { - return 'done'; + return { status: 'done', reason: `Found completion promise: "${completionPromise}"` }; } - // 2. Check for COMPLETE tag + // 2. COMPLETE tag if (/COMPLETE<\/promise>/i.test(output)) { - return 'done'; + return { status: 'done', reason: 'Found COMPLETE marker' }; } - // 3. Use semantic analyzer for more nuanced detection - const analysis = analyzeResponse(output); - - // Check for blocked status - if (analysis.stuckScore >= 0.7 && analysis.confidence !== 'low') { - return 'blocked'; + // 3. Explicit EXIT_SIGNAL (cheap regex) + const hasExplicitSignal = hasExitSignal(output); + if (hasExplicitSignal && !requireExitSignal) { + return { status: 'done', reason: 'Found EXIT_SIGNAL: true' }; } - // Check blocked markers (legacy support) + // 4. Legacy completion markers (cheap string search) const upperOutput = output.toUpperCase(); + if (!requireExitSignal) { + for (const marker of COMPLETION_MARKERS) { + if (upperOutput.includes(marker.toUpperCase())) { + return { status: 'done', reason: `Found completion marker: "${marker}"` }; + } + } + } + + // 5. Blocked markers (cheap string search) for (const marker of BLOCKED_MARKERS) { if (upperOutput.includes(marker.toUpperCase())) { - return 'blocked'; + return { status: 'blocked', reason: `Found blocked marker: "${marker}"` }; } } - // Check for explicit EXIT_SIGNAL - const hasExplicitSignal = hasExitSignal(output); + // --- Expensive check last (semantic analysis with many regex patterns) --- + + const analysis = analyzeResponse(output); - // If exit signal is required, check for it + if (analysis.stuckScore >= 0.7 && analysis.confidence !== 'low') { + return { status: 'blocked', reason: 'Semantic analysis detected stuck state' }; + } + + // When exit signal is required, validate it with semantic indicators if (requireExitSignal) { if (hasExplicitSignal && analysis.indicators.completion.length >= minCompletionIndicators) { - return 'done'; - } - // Continue if no explicit signal - if (!hasExplicitSignal) { - return 'continue'; + return { status: 'done', reason: 'Found EXIT_SIGNAL: true with completion indicators' }; } + return { status: 'continue', reason: '' }; } - // Check completion indicators + // Semantic completion detection (only reached when no explicit markers matched) if ( analysis.completionScore >= 0.7 && analysis.indicators.completion.length >= minCompletionIndicators ) { - return 'done'; - } - - // Explicit exit signals always count - if (hasExplicitSignal) { - return 'done'; - } - - // Legacy marker support - for (const marker of COMPLETION_MARKERS) { - if (upperOutput.includes(marker.toUpperCase())) { - return 'done'; - } + const indicators = analysis.indicators.completion.slice(0, 3); + return { + status: 'done', + reason: `Semantic analysis (${Math.round(analysis.completionScore * 100)}% confident): ${indicators.join(', ')}`, + }; } - return 'continue'; + return { status: 'continue', reason: '' }; } /** - * Get human-readable reason for completion (UX 3) + * Append an iteration summary to .ralph/iteration-log.md. + * Gives the agent inter-iteration memory without session continuity. */ -function getCompletionReason(output: string, options: CompletionOptions): string { - const { completionPromise } = options; - - // Check explicit completion promise first - if (completionPromise && output.includes(completionPromise)) { - return `Found completion promise: "${completionPromise}"`; +function appendIterationLog( + cwd: string, + iteration: number, + summary: string, + validationPassed: boolean, + hasChanges: boolean +): void { + try { + const ralphDir = join(cwd, '.ralph'); + if (!existsSync(ralphDir)) mkdirSync(ralphDir, { recursive: true }); + + const logPath = join(ralphDir, 'iteration-log.md'); + const entry = `## Iteration ${iteration} +- Status: ${validationPassed ? 'validation passed' : 'validation failed'} +- Changes: ${hasChanges ? 'yes' : 'no files changed'} +- Summary: ${summary.slice(0, 200)} +`; + appendFileSync(logPath, entry); + } catch { + // Non-critical — don't break the loop if we can't write the log } +} - // Check for COMPLETE tag - if (/COMPLETE<\/promise>/i.test(output)) { - return 'Found COMPLETE marker'; - } +/** + * Read the last N iteration summaries from .ralph/iteration-log.md. + * Used by context-builder to give the agent memory of previous iterations. + */ +export function readIterationLog(cwd: string, maxEntries = 3): string | undefined { + try { + const logPath = join(cwd, '.ralph', 'iteration-log.md'); + if (!existsSync(logPath)) return undefined; - // Check for explicit EXIT_SIGNAL - if (hasExitSignal(output)) { - return 'Found EXIT_SIGNAL: true'; - } + const content = readFileSync(logPath, 'utf-8'); + const entries = content.split(/^## Iteration /m).filter((e) => e.trim()); + if (entries.length === 0) return undefined; - // Check completion markers - const upperOutput = output.toUpperCase(); - for (const marker of COMPLETION_MARKERS) { - if (upperOutput.includes(marker.toUpperCase())) { - return `Found completion marker: "${marker}"`; - } - } - - // Use semantic analysis - const analysis = analyzeResponse(output); - if (analysis.completionScore >= 0.7) { - const indicators = analysis.indicators.completion.slice(0, 3); - return `Semantic analysis (${Math.round(analysis.completionScore * 100)}% confident): ${indicators.join(', ')}`; + const recent = entries.slice(-maxEntries).map((e) => `## Iteration ${e}`); + return recent.join('\n'); + } catch { + return undefined; } - - return 'Task marked as complete by agent'; } function summarizeChanges(output: string): string { @@ -391,12 +458,13 @@ function summarizeChanges(output: string): string { export async function runLoop(options: LoopOptions): Promise { const spinner = ora(); - const maxIterations = options.maxIterations || 50; + let maxIterations = options.maxIterations || 50; const commits: string[] = []; const startTime = Date.now(); let validationFailures = 0; let exitReason: LoopResult['exitReason'] = 'max_iterations'; let finalIteration = maxIterations; + let consecutiveIdleIterations = 0; // Initialize circuit breaker const circuitBreaker = new CircuitBreaker(options.circuitBreaker); @@ -416,20 +484,34 @@ export async function runLoop(options: LoopOptions): Promise { ? new CostTracker({ model: options.model || 'claude-3-sonnet', maxIterations: maxIterations, + maxCost: options.maxCost, }) : null; // Detect validation commands if validation is enabled const validationCommands = options.validate ? detectValidationCommands(options.cwd) : []; - // Detect Claude Code skills + // Always-on build validation (not gated by --validate flag) + // Re-detected inside the loop for greenfield projects where package.json appears mid-loop + let buildCommands = detectBuildCommands(options.cwd); + // Lightweight lint for intermediate iterations (build only runs on final iteration) + let lintCommands = detectLintCommands(options.cwd); + + // Detect Claude Code skills (capped by maxSkills option) const detectedSkills = detectClaudeSkills(options.cwd); let taskWithSkills = options.task; if (detectedSkills.length > 0) { - const skillsPrompt = formatSkillsForPrompt(detectedSkills, options.task); + const skillsPrompt = formatSkillsForPrompt(detectedSkills, options.task, options.maxSkills); taskWithSkills = `${options.task}\n\n${skillsPrompt}`; } + // Build abbreviated spec summary for context builder (iterations 2+) + const specSummary = buildSpecSummary(options.cwd); + + // Track validation feedback separately — don't mutate taskWithSkills + // initialValidationFeedback lets the `fix` command pre-populate errors for iteration 1 + let lastValidationFeedback = options.initialValidationFeedback || ''; + // Completion detection options const completionOptions: CompletionOptions = { completionPromise: options.completionPromise, @@ -454,7 +536,12 @@ export async function runLoop(options: LoopOptions): Promise { startupLines.push(` Auto-commit: ${chalk.green('enabled')}`); } if (detectedSkills.length > 0) { - startupLines.push(` Skills: ${chalk.white(`${detectedSkills.length} detected`)}`); + const effectiveSkills = options.maxSkills + ? Math.min(detectedSkills.length, options.maxSkills) + : Math.min(detectedSkills.length, 5); + startupLines.push( + ` Skills: ${chalk.white(`${effectiveSkills} active (${detectedSkills.length} installed)`)}` + ); } if (rateLimiter) { startupLines.push(` Rate limit: ${chalk.white(`${options.rateLimit}/hour`)}`); @@ -486,6 +573,15 @@ export async function runLoop(options: LoopOptions): Promise { // Track completed tasks to show progress diff between iterations let previousCompletedTasks = initialTaskCount.completed; + let previousTotalTasks = initialTaskCount.total; + + // Track whether we've already extended the loop for build-fix retries + // When the build fails on the "final" iteration, we grant 2 extra iterations to fix it (once) + let buildFixExtended = false; + const BUILD_FIX_EXTRA_ITERATIONS = 2; + + // Filesystem snapshot for git-independent change detection + let previousSnapshot = await getFilesystemSnapshot(options.cwd); for (let i = 1; i <= maxIterations; i++) { const iterationStart = Date.now(); @@ -531,7 +627,8 @@ export async function runLoop(options: LoopOptions): Promise { } // Check for file-based completion signals - if (options.checkFileCompletion) { + // Skip if validation just failed — the agent needs a chance to fix build errors first + if (options.checkFileCompletion && !lastValidationFeedback) { const fileCompletion = await checkFileBasedCompletion(options.cwd); if (fileCompletion.completed) { spinner.succeed(chalk.green(`File-based completion: ${fileCompletion.reason}`)); @@ -541,6 +638,21 @@ export async function runLoop(options: LoopOptions): Promise { } } + // Check cost ceiling before starting iteration + if (costTracker) { + const overBudget = costTracker.isOverBudget(); + if (overBudget) { + console.log( + chalk.red( + `\n Cost ceiling reached: ${formatCost(overBudget.currentCost)} >= ${formatCost(overBudget.maxCost)} budget` + ) + ); + finalIteration = i - 1; + exitReason = 'cost_ceiling'; + break; + } + } + // Log iteration warnings const progressPercent = (i / maxIterations) * 100; if (progressPercent >= 90 && progressPercent < 95) { @@ -569,6 +681,9 @@ export async function runLoop(options: LoopOptions): Promise { // Check if tasks were completed since last iteration const newlyCompleted = completedTasks - previousCompletedTasks; if (newlyCompleted > 0 && i > 1) { + // Task completion is forward progress — reset circuit breaker consecutive failures + circuitBreaker.recordSuccess(); + // Get names of newly completed tasks (strip markdown) const maxNameWidth = Math.max(30, getTerminalWidth() - 30); const completedNames = taskInfo.tasks @@ -583,6 +698,26 @@ export async function runLoop(options: LoopOptions): Promise { } previousCompletedTasks = completedTasks; + // Dynamic iteration budget: if agent expanded the plan (added more tasks), + // recalculate maxIterations so we don't run out mid-project + if (totalTasks > previousTotalTasks && totalTasks > 0) { + const buffer = Math.max(3, Math.ceil(totalTasks * 0.3)); + const newMax = Math.min( + MAX_ESTIMATED_ITERATIONS, + Math.max(maxIterations, totalTasks + buffer) + ); + if (newMax > maxIterations) { + console.log( + chalk.dim( + ` Adjusting iterations: ${maxIterations} → ${newMax} (plan expanded to ${totalTasks} tasks)` + ) + ); + maxIterations = newMax; + finalIteration = maxIterations; + } + previousTotalTasks = totalTasks; + } + // Show loop header with task info const sourceIcon = getSourceIcon(options.sourceType); const headerLines: string[] = []; @@ -603,11 +738,26 @@ export async function runLoop(options: LoopOptions): Promise { chalk.dim(truncateToFit(` ${options.agent.name} │ Iter ${i}/${maxIterations}`, innerWidth)) ); } else { - const fallbackLine = ` ${sourceIcon} Loop ${i}/${maxIterations} │ Running ${options.agent.name}`; + const modeLabel = + options.fixMode === 'design' + ? 'Design Fix' + : options.fixMode + ? 'Fix' + : `Running ${options.agent.name}`; + const fallbackLine = ` ${sourceIcon} Loop ${i}/${maxIterations} │ ${modeLabel}`; headerLines.push(chalk.white.bold(truncateToFit(fallbackLine, innerWidth))); } console.log(); console.log(drawBox(headerLines, { color: chalk.cyan, width: boxWidth })); + + // Show subtask tree if current task has subtasks + if (currentTask?.subtasks && currentTask.subtasks.length > 0) { + for (const st of currentTask.subtasks) { + const icon = st.completed ? chalk.green(' [x]') : chalk.dim(' [ ]'); + const name = truncateToFit(cleanTaskName(st.name), innerWidth - 8); + console.log(`${icon} ${chalk.dim(name)}`); + } + } console.log(); // Create progress renderer for this iteration @@ -616,6 +766,9 @@ export async function runLoop(options: LoopOptions): Promise { iterProgress.updateProgress(i, maxIterations, costTracker?.getStats()?.totalCost?.totalCost); // Build iteration-specific task with smart context windowing + // Read iteration log for inter-iteration memory (iterations 2+) + const iterationLog = i > 1 ? readIterationLog(options.cwd) : undefined; + const builtContext = buildIterationContext({ fullTask: options.task, taskWithSkills, @@ -623,8 +776,11 @@ export async function runLoop(options: LoopOptions): Promise { taskInfo, iteration: i, maxIterations, - validationFeedback: undefined, // Validation feedback handled separately below + validationFeedback: lastValidationFeedback || undefined, maxInputTokens: options.contextBudget || 0, + specSummary, + skipPlanInstructions: options.skipPlanInstructions, + iterationLog, }); const iterationTask = builtContext.prompt; @@ -647,12 +803,16 @@ export async function runLoop(options: LoopOptions): Promise { // Run the agent with step detection (include skills in task) // NOTE: Don't use maxTurns - it can cause issues. Let agent complete naturally. + // Snapshot HEAD before agent runs — used to detect commits made during iteration + const iterationStartHash = await getHeadCommitHash(options.cwd); + const agentOptions: AgentRunOptions = { task: iterationTask, cwd: options.cwd, auto: options.auto, // maxTurns removed - was causing issues, match wizard behavior streamOutput: !!process.env.RALPH_DEBUG, // Show raw JSON when debugging + timeoutMs: options.agentTimeout, onOutput: (line: string) => { const step = detectStepFromOutput(line); if (step) { @@ -680,31 +840,110 @@ export async function runLoop(options: LoopOptions): Promise { iterProgress.stop('Iteration complete'); + // Kill orphaned dev servers after design iterations (agent may crash without cleanup) + if (options.fixMode === 'design') { + try { + const pids = execSync('lsof -ti :3000,:5173,:4321,:8080 2>/dev/null', { + encoding: 'utf-8', + timeout: 3000, + }).trim(); + if (pids) { + for (const pid of pids.split('\n').filter(Boolean)) { + try { + process.kill(Number(pid), 'SIGTERM'); + } catch { + /* already dead */ + } + } + } + } catch { + /* no processes on those ports — normal */ + } + } + // Track cost for this iteration (silent - summary shown at end) if (costTracker) { costTracker.recordIteration(options.task, result.output); + + // Post-iteration cost ceiling check — prevent starting another expensive iteration + const overBudget = costTracker.isOverBudget(); + if (overBudget) { + console.log( + chalk.red( + `\n Cost ceiling reached after iteration ${i}: ${formatCost(overBudget.currentCost)} >= ${formatCost(overBudget.maxCost)} budget` + ) + ); + finalIteration = i; + exitReason = 'cost_ceiling'; + break; + } } - // Check for completion using enhanced detection - let status = detectCompletion(result.output, completionOptions); + // Check for completion using enhanced detection (single-pass: status + reason) + const completionResult = detectCompletionWithReason(result.output, completionOptions); + let status = completionResult.status; + + // Track file changes between iterations for stall detection + // Primary: filesystem snapshot (works without git) + // Secondary: git-based detection (catches committed changes when git available) + const currentSnapshot = await getFilesystemSnapshot(options.cwd); + const fsChanged = + currentSnapshot.fileCount !== previousSnapshot.fileCount || + currentSnapshot.totalSize !== previousSnapshot.totalSize; + const gitChanged = await hasIterationChanges(options.cwd, iterationStartHash); + const hasChanges = fsChanged || gitChanged; + previousSnapshot = currentSnapshot; + + // Task-aware stall detection: check both file changes AND task progress + // Re-parse tasks after agent runs to catch newly completed tasks + const postIterationTaskInfo = parsePlanTasks(options.cwd); + const tasksProgressedThisIteration = postIterationTaskInfo.completed > previousCompletedTasks; + // Build/validation failures are NOT idle — agent is actively debugging + const hadValidationFailure = !!lastValidationFeedback; + // Design mode: screenshot analysis is productive even without file changes + const outputLower = result.output.toLowerCase(); + const hasDesignActivity = + options.fixMode === 'design' && + (outputLower.includes('screenshot') || outputLower.includes('viewport')); + const hasProductiveProgress = + hasChanges || tasksProgressedThisIteration || hadValidationFailure || hasDesignActivity; + + if (!hasProductiveProgress) { + consecutiveIdleIterations++; + } else { + consecutiveIdleIterations = 0; + } // Verify completion - check if files were actually changed - if (status === 'done' && i === 1) { - // On first iteration, verify that files were actually created/modified - const hasChanges = await hasUncommittedChanges(options.cwd); - if (!hasChanges) { + if (status === 'done' && !hasChanges) { + if (i === 1) { console.log(chalk.yellow(' Agent reported done but no files changed - continuing...')); status = 'continue'; - } else { - // Wait for filesystem to settle before declaring done - await waitForFilesystemQuiescence(options.cwd, 2000); } + // On later iterations, allow done if agent genuinely finished (no more work to do) + } else if (status === 'done' && hasChanges) { + // Wait for filesystem to settle before declaring done + await waitForFilesystemQuiescence(options.cwd, 2000); } - // In build mode, don't allow completion while plan tasks remain - if (status === 'done' && options.task.includes('IMPLEMENTATION_PLAN.md')) { + // Stall detection: stop if no productive progress for consecutive iterations + // More lenient for larger projects (5+ tasks) which need more iterations for scaffolding + const staleThreshold = taskInfo.total > 5 ? 4 : 3; + if (consecutiveIdleIterations >= staleThreshold && i > 3) { + console.log( + chalk.yellow( + ` No progress for ${consecutiveIdleIterations} consecutive iterations - stopping` + ) + ); + finalIteration = i; + exitReason = 'completed'; + break; + } + + // Don't allow completion while plan tasks remain (check plan file if it exists) + if (status === 'done') { const latestTaskInfo = parsePlanTasks(options.cwd); - if (latestTaskInfo.pending > 0) { + if (latestTaskInfo.total > 0 && latestTaskInfo.pending > 0) { console.log( chalk.yellow( ` Agent reported done but ${latestTaskInfo.pending} task(s) remain - continuing...` @@ -797,18 +1036,136 @@ export async function runLoop(options: LoopOptions): Promise { }; } - // Run validation (backpressure) if enabled and there are changes - let _validationPassed = true; + // --- Tiered validation: lint on intermediate iterations, build on final --- + // Re-detect commands if none found yet (greenfield: package.json may appear mid-loop) + if (buildCommands.length === 0) { + buildCommands = detectBuildCommands(options.cwd); + if (buildCommands.length > 0 && process.env.RALPH_DEBUG) { + console.error( + `[DEBUG] Build commands detected: ${buildCommands.map((c) => c.name).join(', ')}` + ); + } + } + if (lintCommands.length === 0) { + lintCommands = detectLintCommands(options.cwd); + } + + const buildCoveredByFullValidation = + options.validate && + validationCommands.some((vc) => vc.name === 'build' || vc.name === 'typecheck'); + + // Determine if this is a "final" iteration where the full build should run: + // - Last allowed iteration, OR all plan tasks are complete + const preValidationTaskInfo = parsePlanTasks(options.cwd); + const isFinalIteration = i === maxIterations || preValidationTaskInfo.pending === 0; + + if (!buildCoveredByFullValidation && i > 1) { + const checkResults: ValidationResult[] = []; + let checkLabel = ''; + + if (isFinalIteration && buildCommands.length > 0) { + // Final iteration: run full build validation (catches compile errors) + checkLabel = 'build'; + spinner.start(chalk.yellow(`Loop ${i}: Running build check...`)); + for (const cmd of buildCommands) { + checkResults.push(await runBuildValidation(options.cwd, cmd)); + } + } else if (!isFinalIteration && lintCommands.length > 0) { + // Intermediate iteration: run lightweight lint check (fast feedback) + checkLabel = 'lint'; + spinner.start(chalk.yellow(`Loop ${i}: Running lint check...`)); + for (const cmd of lintCommands) { + checkResults.push(await runLintValidation(options.cwd, cmd)); + } + } + + if (checkResults.length > 0) { + const allPassed = checkResults.every((r) => r.success); + + if (!allPassed) { + validationFailures++; + const feedback = formatValidationFeedback(checkResults); + spinner.fail( + chalk.red(`Loop ${i}: ${checkLabel === 'build' ? 'Build' : 'Lint'} check failed`) + ); + + const failedSummaries: string[] = []; + for (const vr of checkResults) { + if (!vr.success) { + const errorText = vr.error || vr.output || ''; + const errorCount = (errorText.match(/error/gi) || []).length; + const hint = errorCount > 0 ? `${errorCount} errors` : 'failed'; + failedSummaries.push(`${vr.command} (${hint})`); + } + } + console.log(chalk.red(` ✗ ${failedSummaries.join(' │ ')}`)); + + const errorMsg = checkResults + .filter((r) => !r.success) + .map((r) => r.error?.slice(0, 200) || r.output?.slice(0, 200) || r.command) + .join('\n'); + const tripped = circuitBreaker.recordFailure(errorMsg); + + if (tripped) { + const reason = circuitBreaker.getTripReason(); + console.log(chalk.red(`Circuit breaker tripped: ${reason}`)); + if (progressTracker && progressEntry) { + progressEntry.status = 'failed'; + progressEntry.summary = `Circuit breaker tripped (${checkLabel}): ${reason}`; + progressEntry.validationResults = checkResults; + progressEntry.duration = Date.now() - iterationStart; + await progressTracker.appendEntry(progressEntry); + } + finalIteration = i; + exitReason = 'circuit_breaker'; + break; + } + + if (progressTracker && progressEntry) { + progressEntry.status = 'validation_failed'; + progressEntry.summary = `${checkLabel === 'build' ? 'Build' : 'Lint'} check failed`; + progressEntry.validationResults = checkResults; + progressEntry.duration = Date.now() - iterationStart; + await progressTracker.appendEntry(progressEntry); + } + + // If build failed on the final iteration, extend the loop to let the agent fix it + if (checkLabel === 'build' && isFinalIteration && !buildFixExtended) { + const newMax = maxIterations + BUILD_FIX_EXTRA_ITERATIONS; + console.log( + chalk.yellow( + ` Extending loop by ${BUILD_FIX_EXTRA_ITERATIONS} iterations to fix build errors (${maxIterations} → ${newMax})` + ) + ); + maxIterations = newMax; + finalIteration = maxIterations; + buildFixExtended = true; + } + + lastValidationFeedback = feedback; + continue; + } + spinner.succeed( + chalk.green(`Loop ${i}: ${checkLabel === 'build' ? 'Build' : 'Lint'} check passed`) + ); + circuitBreaker.recordSuccess(); + lastValidationFeedback = ''; + } + } + + // Run full validation (backpressure) if enabled and there are changes + // Skip validation during warm-up period (greenfield builds where early tasks can't pass tests) let validationResults: ValidationResult[] = []; + const warmupThreshold = options.validationWarmup ?? 0; + const pastWarmup = completedTasks >= warmupThreshold; - if (validationCommands.length > 0 && (await hasUncommittedChanges(options.cwd))) { + if (validationCommands.length > 0 && pastWarmup && i > 1) { spinner.start(chalk.yellow(`Loop ${i}: Running validation...`)); validationResults = await runAllValidations(options.cwd, validationCommands); const allPassed = validationResults.every((r) => r.success); if (!allPassed) { - _validationPassed = false; validationFailures++; const feedback = formatValidationFeedback(validationResults); spinner.fail(chalk.red(`Loop ${i}: Validation failed`)); @@ -863,14 +1220,15 @@ export async function runLoop(options: LoopOptions): Promise { await progressTracker.appendEntry(progressEntry); } - // Continue loop with compressed validation feedback - const compressedFeedback = compressValidationFeedback(feedback); - taskWithSkills = `${taskWithSkills}\n\n${compressedFeedback}`; + // Pass validation feedback to context builder for next iteration + // (don't mutate taskWithSkills — that defeats context trimming) + lastValidationFeedback = feedback; continue; // Go to next iteration to fix issues } else { - // Validation passed - record success + // Validation passed - record success and clear feedback spinner.succeed(chalk.green(`Loop ${i}: Validation passed`)); circuitBreaker.recordSuccess(); + lastValidationFeedback = ''; } } @@ -905,7 +1263,7 @@ export async function runLoop(options: LoopOptions): Promise { // Update progress entry if (progressTracker && progressEntry) { - progressEntry.status = status === 'done' ? 'completed' : 'completed'; + progressEntry.status = status === 'done' ? 'completed' : 'partial'; progressEntry.summary = summarizeChanges(result.output); progressEntry.validationResults = validationResults.length > 0 ? validationResults : undefined; @@ -922,8 +1280,13 @@ export async function runLoop(options: LoopOptions): Promise { await progressTracker.appendEntry(progressEntry); } + // Write iteration summary for inter-iteration memory + const iterSummary = summarizeChanges(result.output); + const iterValidationPassed = validationResults.every((r) => r.success); + appendIterationLog(options.cwd, i, iterSummary, iterValidationPassed, hasChanges); + if (status === 'done') { - const completionReason = getCompletionReason(result.output, completionOptions); + const completionReason = completionResult.reason || 'Task marked as complete by agent'; const duration = Date.now() - startTime; const minutes = Math.floor(duration / 60000); const seconds = Math.floor((duration % 60000) / 1000); @@ -960,9 +1323,6 @@ export async function runLoop(options: LoopOptions): Promise { `Iter ${i}/${maxIterations}${taskLabel}${costLabel} │ ${elapsedMin}m ${elapsedSec}s` ) ); - - // Small delay between iterations - await new Promise((resolve) => setTimeout(resolve, 1000)); } // Post-loop actions diff --git a/src/loop/progress.ts b/src/loop/progress.ts index c0d6f04..e00e04a 100644 --- a/src/loop/progress.ts +++ b/src/loop/progress.ts @@ -6,7 +6,7 @@ import type { ValidationResult } from './validation.js'; export interface ProgressEntry { timestamp: string; iteration: number; - status: 'started' | 'completed' | 'failed' | 'blocked' | 'validation_failed'; + status: 'started' | 'completed' | 'partial' | 'failed' | 'blocked' | 'validation_failed'; summary: string; validationResults?: ValidationResult[]; commitHash?: string; @@ -86,6 +86,8 @@ function getStatusBadge(status: ProgressEntry['status']): string { return '🔄 Started'; case 'completed': return '✅ Completed'; + case 'partial': + return '🔶 Partial'; case 'failed': return '❌ Failed'; case 'blocked': diff --git a/src/loop/session.ts b/src/loop/session.ts index be8429a..3716ab8 100644 --- a/src/loop/session.ts +++ b/src/loop/session.ts @@ -66,6 +66,8 @@ export interface SessionState { pauseReason?: string; /** Error message (if failed) */ error?: string; + /** Last validation feedback (preserved for resume) */ + lastValidationFeedback?: string; /** Exit reason */ exitReason?: | 'completed' @@ -197,7 +199,11 @@ export async function updateSessionIteration( /** * Pause the current session */ -export async function pauseSession(cwd: string, reason?: string): Promise { +export async function pauseSession( + cwd: string, + reason?: string, + validationFeedback?: string +): Promise { const session = await loadSession(cwd); if (!session) return null; @@ -205,6 +211,7 @@ export async function pauseSession(cwd: string, reason?: string): Promise taskLower.includes(kw)); const isDesignSkill = text.includes('design') || @@ -273,24 +312,40 @@ function shouldAutoApplySkill(skill: ClaudeSkill, task: string): boolean { return false; } -export function formatSkillsForPrompt(skills: ClaudeSkill[], task?: string): string { +export function formatSkillsForPrompt( + skills: ClaudeSkill[], + task?: string, + maxSkills?: number +): string { if (skills.length === 0) return ''; + const MAX_SKILLS_IN_PROMPT = maxSkills || 5; + + // When we have a task, only include relevant skills to avoid prompt bloat + let selected: ClaudeSkill[]; + if (task) { + const relevant = skills.filter((skill) => shouldAutoApplySkill(skill, task)); + selected = + relevant.length > 0 + ? relevant.slice(0, MAX_SKILLS_IN_PROMPT) + : skills.slice(0, MAX_SKILLS_IN_PROMPT); + } else { + selected = skills.slice(0, MAX_SKILLS_IN_PROMPT); + } + const lines = ['## Available Claude Code Skills', '']; - for (const skill of skills) { + for (const skill of selected) { lines.push(`- **${skill.name}**: ${skill.description || 'No description'}`); } lines.push(''); if (task) { - const autoApply = skills.filter((skill) => shouldAutoApplySkill(skill, task)); - if (autoApply.length > 0) { - const skillList = autoApply.map((skill) => `/${skill.name}`).join(', '); - lines.push(`Auto-apply these skills: ${skillList}`); - lines.push(''); - } + // All selected skills are already relevant — tell the agent to apply them + const skillList = selected.map((skill) => `/${skill.name}`).join(', '); + lines.push(`Auto-apply these skills: ${skillList}`); + lines.push(''); } lines.push('Use these skills when appropriate by invoking them with /skill-name.'); diff --git a/src/loop/task-counter.ts b/src/loop/task-counter.ts index ff928d6..29898e5 100644 --- a/src/loop/task-counter.ts +++ b/src/loop/task-counter.ts @@ -1,6 +1,25 @@ -import { existsSync, readFileSync } from 'node:fs'; +import { readFileSync, statSync } from 'node:fs'; import { join } from 'node:path'; +/** Maximum iterations for estimated calculations */ +export const MAX_ESTIMATED_ITERATIONS = 25; + +/** Mtime-based cache for parsePlanTasks to avoid redundant file reads within the same iteration */ +let _planCache: { path: string; mtimeMs: number; result: TaskCount } | null = null; + +/** Deep-clone a TaskCount to prevent cache mutation by consumers */ +function cloneTaskCount(tc: TaskCount): TaskCount { + return { + total: tc.total, + completed: tc.completed, + pending: tc.pending, + tasks: tc.tasks.map((t) => ({ + ...t, + subtasks: t.subtasks?.map((st) => ({ ...st })), + })), + }; +} + export interface PlanTask { name: string; completed: boolean; @@ -23,8 +42,15 @@ export interface TaskCount { export function parsePlanTasks(cwd: string): TaskCount { const planPath = join(cwd, 'IMPLEMENTATION_PLAN.md'); - if (!existsSync(planPath)) { - return { total: 0, completed: 0, pending: 0, tasks: [] }; + // Return cached result if file hasn't changed (avoids redundant reads within same iteration) + let preMtime = 0; + try { + preMtime = statSync(planPath).mtimeMs; + if (_planCache && _planCache.path === planPath && _planCache.mtimeMs === preMtime) { + return cloneTaskCount(_planCache.result); + } + } catch { + // stat failed (file may not exist) — fall through to read attempt } try { @@ -109,13 +135,26 @@ export function parsePlanTasks(cwd: string): TaskCount { const completed = tasks.filter((t) => t.completed).length; const pending = tasks.filter((t) => !t.completed).length; - return { + const result: TaskCount = { total: tasks.length, completed, pending, tasks, }; + + // Cache result only if file wasn't modified during parsing (double-stat guard) + try { + const postMtime = statSync(planPath).mtimeMs; + if (postMtime === preMtime) { + _planCache = { path: planPath, mtimeMs: postMtime, result }; + } + } catch { + // stat failed — skip caching + } + + return result; } catch { + _planCache = null; return { total: 0, completed: 0, pending: 0, tasks: [] }; } } @@ -136,29 +175,78 @@ export function getTaskByIndex(cwd: string, index: number): PlanTask | null { return tasks[index] || null; } +/** + * Estimate task complexity from spec/task content when no plan file exists. + * Counts structural elements (headings, bullet points, numbered items) + * and maps them to an estimated task count. + */ +export function estimateTasksFromContent(content: string): { estimated: number; reason: string } { + if (!content || content.length < 20) { + return { estimated: 0, reason: 'no content' }; + } + + const lines = content.split('\n'); + + // Count structural signals + const headings = lines.filter((l) => /^#{1,4}\s+/.test(l)).length; + const bullets = lines.filter((l) => /^\s*[-*]\s+/.test(l)).length; + const numbered = lines.filter((l) => /^\s*\d+[.)]\s+/.test(l)).length; + const checkboxes = lines.filter((l) => /^\s*[-*]\s*\[[ xX]\]/.test(l)).length; + + // If there are explicit checkboxes, use that count + if (checkboxes > 0) { + return { estimated: checkboxes, reason: `${checkboxes} checkboxes in spec` }; + } + + // Estimate from structural elements: headings define major tasks, + // dense bullet lists suggest subtasks within those + const majorTasks = Math.max(1, headings); + const detailItems = bullets + numbered; + + // Heuristic: ~4 detail items per iteration of work + const fromDetails = Math.ceil(detailItems / 4); + const estimated = Math.max(majorTasks, fromDetails, 1); + + return { + estimated, + reason: `estimated from spec (${headings} sections, ${bullets + numbered} items)`, + }; +} + /** * Calculate optimal number of loop iterations based on task count * * Formula: - * - If tasks exist: pendingTasks + buffer (for retries/validation fixes) + * - If plan exists: pendingTasks + buffer (for retries/validation fixes) * - Buffer = max(2, pendingTasks * 0.3) - at least 2, or 30% extra for retries + * - If no plan but spec content: estimate from spec structure * - Minimum: 3 (even for small tasks) * - Maximum: 25 (prevent runaway loops) - * - If no plan: 10 (sensible default) */ -export function calculateOptimalIterations(cwd: string): { +export function calculateOptimalIterations( + cwd: string, + taskContent?: string +): { iterations: number; taskCount: TaskCount; reason: string; } { const taskCount = parsePlanTasks(cwd); - // No implementation plan - use default + // No implementation plan - estimate from spec content if available if (taskCount.total === 0) { + const estimate = taskContent ? estimateTasksFromContent(taskContent) : null; + if (estimate && estimate.estimated > 0) { + const buffer = Math.max(3, Math.ceil(estimate.estimated * 0.3)); + let iterations = estimate.estimated + buffer; + iterations = Math.max(5, iterations); + iterations = Math.min(15, iterations); + return { iterations, taskCount, reason: estimate.reason }; + } return { iterations: 10, taskCount, - reason: 'No implementation plan found, using default', + reason: 'No plan or spec structure found, using default', }; } @@ -171,15 +259,15 @@ export function calculateOptimalIterations(cwd: string): { }; } - // Calculate buffer (at least 2, or 30% of pending tasks for retries) - const buffer = Math.max(2, Math.ceil(taskCount.pending * 0.3)); + // Calculate buffer (at least 3, or 30% of pending tasks for retries) + const buffer = Math.max(3, Math.ceil(taskCount.pending * 0.3)); // Calculate iterations: pending tasks + buffer let iterations = taskCount.pending + buffer; // Apply bounds - iterations = Math.max(3, iterations); // Minimum 3 - iterations = Math.min(25, iterations); // Maximum 25 + iterations = Math.max(5, iterations); // Minimum 5 + iterations = Math.min(MAX_ESTIMATED_ITERATIONS, iterations); return { iterations, diff --git a/src/loop/task-executor.ts b/src/loop/task-executor.ts index 1e88029..ed3db31 100644 --- a/src/loop/task-executor.ts +++ b/src/loop/task-executor.ts @@ -140,7 +140,9 @@ export async function executeTaskBatch(options: TaskExecutionOptions): Promise { + try { + const result = await execa(command.command, command.args, { + cwd, + timeout: 60000, // 1 minute timeout (lint is fast) + reject: false, + }); + + return { + success: result.exitCode === 0, + command: `${command.command} ${command.args.join(' ')}`, + output: result.stdout, + ...(result.exitCode !== 0 && { error: result.stderr || result.stdout }), + }; + } catch (error) { + return { + success: false, + command: `${command.command} ${command.args.join(' ')}`, + output: '', + error: error instanceof Error ? error.message : 'Unknown error', + }; + } +} + +/** + * Detect build-only commands for always-on build validation. + * Unlike detectValidationCommands(), this: + * 1. Only returns build/typecheck commands (not test/lint) + * 2. Has TypeScript fallback (npx tsc --noEmit) when no build script exists + * 3. Is designed to be called per-iteration (re-detects if package.json appears mid-loop) + */ +export function detectBuildCommands(cwd: string): ValidationCommand[] { + const commands: ValidationCommand[] = []; + + // Check AGENTS.md for build command + const agentsPath = join(cwd, 'AGENTS.md'); + if (existsSync(agentsPath)) { + const content = readFileSync(agentsPath, 'utf-8'); + + const buildMatch = content.match(/[-*]\s*\*?\*?build\*?\*?[:\s]+`([^`]+)`/i); + if (buildMatch) { + const parts = buildMatch[1].trim().split(/\s+/); + commands.push({ name: 'build', command: parts[0], args: parts.slice(1) }); + } + + const typecheckMatch = content.match(/[-*]\s*\*?\*?typecheck\*?\*?[:\s]+`([^`]+)`/i); + if (typecheckMatch) { + const parts = typecheckMatch[1].trim().split(/\s+/); + commands.push({ name: 'typecheck', command: parts[0], args: parts.slice(1) }); + } + } + + // Fallback to package.json + if (commands.length === 0) { + const packagePath = join(cwd, 'package.json'); + if (existsSync(packagePath)) { + try { + const pkg = JSON.parse(readFileSync(packagePath, 'utf-8')); + const scripts = pkg.scripts || {}; + const pm = detectPackageManager(cwd); + + if (scripts.build) { + const cmd = getRunCommand(pm, 'build'); + commands.push({ name: 'build', ...cmd }); + } + if (scripts.typecheck) { + const cmd = getRunCommand(pm, 'typecheck'); + commands.push({ name: 'typecheck', ...cmd }); + } + } catch { + // Invalid package.json + } + } + } + + // TypeScript fallback: if no build/typecheck script but tsconfig.json exists + if (commands.length === 0) { + const tsconfigPath = join(cwd, 'tsconfig.json'); + if (existsSync(tsconfigPath)) { + commands.push({ name: 'typecheck', command: 'npx', args: ['tsc', '--noEmit'] }); + } + } + + return commands; +} + +/** + * Run a single build validation command with a shorter timeout. + */ +export async function runBuildValidation( + cwd: string, + command: ValidationCommand +): Promise { + try { + const result = await execa(command.command, command.args, { + cwd, + timeout: 120000, // 2 minute timeout (vs 5 min for full validation) + reject: false, + }); + + return { + success: result.exitCode === 0, + command: `${command.command} ${command.args.join(' ')}`, + output: result.stdout, + ...(result.exitCode !== 0 && { error: result.stderr || result.stdout }), + }; + } catch (error) { + return { + success: false, + command: `${command.command} ${command.args.join(' ')}`, + output: '', + error: error instanceof Error ? error.message : 'Unknown error', + }; + } +} + /** * Run a single validation command */ @@ -129,7 +298,9 @@ export async function runValidation( } /** - * Run all validation commands + * Run all validation commands. + * Runs every command regardless of individual failures — this gives the agent + * a complete picture of all issues, enabling multi-fix iterations. */ export async function runAllValidations( cwd: string, @@ -140,11 +311,6 @@ export async function runAllValidations( for (const command of commands) { const result = await runValidation(cwd, command); results.push(result); - - // Stop on first failure - if (!result.success) { - break; - } } return results; diff --git a/src/mcp/core/run.ts b/src/mcp/core/run.ts index bda2585..852886f 100644 --- a/src/mcp/core/run.ts +++ b/src/mcp/core/run.ts @@ -122,7 +122,7 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN : `Ralph: ${finalTask.slice(0, 50)}`; // Calculate smart iterations based on tasks - const { iterations: smartIterations } = calculateOptimalIterations(cwd); + const { iterations: smartIterations } = calculateOptimalIterations(cwd, finalTask); const loopOptions: LoopOptions = { task: finalTask, diff --git a/src/skills/auto-install.ts b/src/skills/auto-install.ts index 6697dc3..84c00b2 100644 --- a/src/skills/auto-install.ts +++ b/src/skills/auto-install.ts @@ -1,29 +1,67 @@ import chalk from 'chalk'; -import { execa } from 'execa'; import ora from 'ora'; -import { findSkill } from '../loop/skills.js'; +import { POPULAR_SKILLS } from '../commands/skill.js'; +import { type ClaudeSkill, detectClaudeSkills, WEB_TASK_KEYWORDS } from '../loop/skills.js'; export interface SkillCandidate { fullName: string; // owner/repo@skill repo: string; skill: string; + installs: number; score: number; } -const MAX_SKILLS_TO_INSTALL = 2; +const MAX_SKILLS_TO_INSTALL = 3; + +/** + * Normalize a skill identifier for comparison. + * Handles mismatches between YAML frontmatter names (may use spaces/caps) + * and skills.sh API skillIds (always hyphenated lowercase). + */ +function normalizeSkillId(name: string): string { + return name.toLowerCase().replace(/[\s_]+/g, '-'); +} +const SKILLS_API_URL = 'https://skills.sh/api/search'; const SKILLS_CLI = 'skills'; +/** Shape of a single skill from the skills.sh search API */ +interface SkillsApiSkill { + id: string; + skillId: string; + name: string; + installs: number; + source: string; +} + +/** Keywords that indicate a skill is NOT relevant for standard web projects */ +const WEB_NEGATIVE_KEYWORDS = [ + 'react-native', + 'mobile', + 'ios', + 'android', + 'flutter', + 'swift', + 'kotlin', +]; + function buildSkillQueries(task: string): string[] { const queries = new Set(); const text = task.toLowerCase(); + // Framework-specific skills if (text.includes('astro')) queries.add('astro'); - if (text.includes('react')) queries.add('react'); + if (text.includes('react') || text.includes('jsx')) { + queries.add('react best practices'); + queries.add('react composition patterns'); + } if (text.includes('next')) queries.add('nextjs'); + if (text.includes('vue')) queries.add('vue'); + if (text.includes('svelte')) queries.add('svelte'); if (text.includes('tailwind')) queries.add('tailwind'); if (text.includes('seo')) queries.add('seo'); if (text.includes('accessibility') || text.includes('a11y')) queries.add('accessibility'); + // Landing/marketing pages get SEO + design skills automatically if ( text.includes('landing') || text.includes('website') || @@ -32,13 +70,40 @@ function buildSkillQueries(task: string): string[] { text.includes('marketing') ) { queries.add('frontend design'); - queries.add('web design'); + queries.add('seo'); } if (text.includes('design') || text.includes('ui') || text.includes('ux')) { queries.add('ui design'); } + // CSS/styling tasks get design skills + const cssKeywords = [ + 'css', + 'style', + 'styling', + 'padding', + 'margin', + 'spacing', + 'color', + 'colour', + 'background', + 'theme', + 'font', + 'typography', + 'border', + 'shadow', + 'layout', + 'responsive', + 'animation', + 'grid', + 'flex', + ]; + if (cssKeywords.some((kw) => text.includes(kw))) { + queries.add('frontend design'); + queries.add('ui design'); + } + if (queries.size === 0) { queries.add('web design'); } @@ -46,20 +111,47 @@ function buildSkillQueries(task: string): string[] { return Array.from(queries); } -function parseSkillLine(line: string): SkillCandidate | null { - const match = line.match(/([a-z0-9_.-]+\/[a-z0-9_.-]+@[a-z0-9_.-]+)/i); - if (!match) return null; +/** + * Check if a skill is relevant to the given task. + * Reuses the same logic as the executor's shouldAutoApplySkill. + */ +function isSkillRelevantToTask(skill: ClaudeSkill, task: string): boolean { + const name = skill.name.toLowerCase(); + const desc = (skill.description || '').toLowerCase(); + const text = `${name} ${desc}`; + const taskLower = task.toLowerCase(); - const fullName = match[1]; - const [repo, skill] = fullName.split('@'); - if (!repo || !skill) return null; + const taskIsWeb = WEB_TASK_KEYWORDS.some((kw) => taskLower.includes(kw)); - return { - fullName, - repo, - skill, - score: 0, - }; + const isDesignSkill = + text.includes('design') || + text.includes('ui') || + text.includes('ux') || + text.includes('frontend'); + + if (taskIsWeb && isDesignSkill) return true; + if (taskLower.includes('astro') && text.includes('astro')) return true; + if (taskLower.includes('tailwind') && text.includes('tailwind')) return true; + if (taskLower.includes('seo') && text.includes('seo')) return true; + + return false; +} + +/** + * Check if a candidate skill is irrelevant to the task (negative filtering). + * E.g., react-native-design for a web landing page. + */ +function isCandidateIrrelevant(candidate: SkillCandidate, task: string): boolean { + const taskLower = task.toLowerCase(); + const skillText = `${candidate.fullName} ${candidate.skill}`.toLowerCase(); + + // If the task explicitly mentions a platform, don't filter it out + for (const keyword of WEB_NEGATIVE_KEYWORDS) { + if (taskLower.includes(keyword)) return false; + } + + // For standard web tasks, filter out mobile/native skills + return WEB_NEGATIVE_KEYWORDS.some((keyword) => skillText.includes(keyword)); } function scoreCandidate(candidate: SkillCandidate, task: string): number { @@ -73,14 +165,22 @@ function scoreCandidate(candidate: SkillCandidate, task: string): number { boost('frontend', 3); boost('design', 3); + boost('best-practices', 2); + boost('composition', 2); + boost('guidelines', 2); boost('ui', 2); boost('ux', 2); boost('landing', 2); boost('astro', taskLower.includes('astro') ? 3 : 1); - boost('react', taskLower.includes('react') ? 2 : 0); - boost('next', taskLower.includes('next') ? 2 : 0); - boost('tailwind', taskLower.includes('tailwind') ? 2 : 0); - boost('seo', taskLower.includes('seo') ? 2 : 0); + boost('react', taskLower.includes('react') ? 3 : 0); + boost('next', taskLower.includes('next') ? 3 : 0); + boost('tailwind', taskLower.includes('tailwind') ? 3 : 0); + boost('seo', taskLower.includes('seo') ? 3 : 2); // SEO is always useful for web projects + + // Boost based on install count (popularity as quality signal) + if (candidate.installs > 10000) score += 5; + else if (candidate.installs > 1000) score += 3; + else if (candidate.installs > 100) score += 1; return score; } @@ -93,29 +193,62 @@ function rankCandidates(candidates: SkillCandidate[], task: string): SkillCandid return candidates.sort((a, b) => b.score - a.score); } +/** + * Search skills.sh HTTP API for skills matching a query. + * Returns structured results with real repo names and install counts. + */ async function findSkillsByQuery(query: string): Promise { try { - const result = await execa('npx', [SKILLS_CLI, 'find', query], { - stdio: 'pipe', - }); - - const lines = result.stdout.split('\n').map((line) => line.trim()); - const candidates: SkillCandidate[] = []; + const url = `${SKILLS_API_URL}?q=${encodeURIComponent(query)}`; + const resp = await fetch(url, { signal: AbortSignal.timeout(5000) }); + if (!resp.ok) return []; + + const data = (await resp.json()) as { skills?: SkillsApiSkill[] }; + return (data.skills || []).map((s) => ({ + fullName: `${s.source}@${s.skillId}`, + repo: s.source, + skill: s.skillId, + installs: s.installs ?? 0, + score: 0, + })); + } catch { + return []; // Timeout or network error — caller falls back to POPULAR_SKILLS + } +} - for (const line of lines) { - const candidate = parseSkillLine(line); - if (candidate) { - candidates.push(candidate); +/** + * Fallback: match task keywords against the curated POPULAR_SKILLS registry + * when the skills.sh API is unreachable. + */ +function fallbackFromPopularSkills(task: string): SkillCandidate[] { + const taskLower = task.toLowerCase(); + const candidates: SkillCandidate[] = []; + + for (const entry of POPULAR_SKILLS) { + const entryText = `${entry.name} ${entry.description} ${entry.category}`.toLowerCase(); + const matches = + entry.skills.some((s) => taskLower.includes(s.split('-')[0])) || + entryText.includes('frontend') || + entryText.includes('design'); + + if (matches) { + for (const skill of entry.skills) { + candidates.push({ + fullName: `${entry.name}@${skill}`, + repo: entry.name, + skill, + installs: 0, + score: 0, + }); } } - - return candidates; - } catch { - return []; } + + return candidates; } async function installSkill(candidate: SkillCandidate, globalInstall: boolean): Promise { + const { execa } = await import('execa'); const args = [SKILLS_CLI, 'add', candidate.fullName, '-y']; if (globalInstall) args.push('-g'); @@ -129,13 +262,33 @@ async function installSkill(candidate: SkillCandidate, globalInstall: boolean): export async function autoInstallSkillsFromTask(task: string, cwd: string): Promise { if (!task.trim()) return []; - const autoInstallEnabled = process.env.RALPH_ENABLE_SKILL_AUTO_INSTALL === '1'; - if (!autoInstallEnabled || process.env.RALPH_DISABLE_SKILL_AUTO_INSTALL === '1') return []; + // Explicit disable is the only way to turn this off + if (process.env.RALPH_DISABLE_SKILL_AUTO_INSTALL === '1') return []; + // Detect what's already installed + const installedSkills = detectClaudeSkills(cwd); + const relevantInstalled = installedSkills.filter((s) => isSkillRelevantToTask(s, task)); + + // Show installed skills to the user + if (relevantInstalled.length > 0) { + const names = relevantInstalled.map((s) => s.name); + console.log(chalk.cyan(`Using installed skills: ${names.join(', ')}`)); + } + + // Skip API search when enough relevant skills are already installed + const SUFFICIENT_RELEVANT_SKILLS = 3; + if (relevantInstalled.length >= SUFFICIENT_RELEVANT_SKILLS) { + console.log( + chalk.green(`Sufficient skills installed (${relevantInstalled.length}), skipping search.`) + ); + return relevantInstalled.map((s) => s.name); + } + + // Search for complementary skills if we don't have enough relevant ones const queries = buildSkillQueries(task); - if (queries.length === 0) return []; + if (queries.length === 0) return relevantInstalled.map((s) => s.name); - const spinner = ora('Searching skills.sh for relevant skills...').start(); + const spinner = ora('Checking for complementary skills...').start(); const allCandidates = new Map(); for (const query of queries) { @@ -147,25 +300,39 @@ export async function autoInstallSkillsFromTask(task: string, cwd: string): Prom } } + // Fallback to curated registry if API returned nothing + if (allCandidates.size === 0) { + const fallback = fallbackFromPopularSkills(task); + for (const candidate of fallback) { + if (!allCandidates.has(candidate.fullName)) { + allCandidates.set(candidate.fullName, candidate); + } + } + } + if (allCandidates.size === 0) { - spinner.warn('No skills found from skills.sh'); - return []; + spinner.stop(); + return relevantInstalled.map((s) => s.name); } const ranked = rankCandidates(Array.from(allCandidates.values()), task); const toInstall = ranked - .filter((candidate) => !findSkill(cwd, candidate.skill)) + .filter((candidate) => !isCandidateIrrelevant(candidate, task)) + .filter( + (candidate) => + !installedSkills.some((s) => normalizeSkillId(s.name) === normalizeSkillId(candidate.skill)) + ) .slice(0, MAX_SKILLS_TO_INSTALL); if (toInstall.length === 0) { - spinner.succeed('Relevant skills already installed'); - return []; + spinner.succeed('All relevant skills already installed'); + return relevantInstalled.map((s) => s.name); } spinner.stop(); - console.log(chalk.cyan('Installing recommended skills from skills.sh...')); + console.log(chalk.cyan('Installing complementary skills from skills.sh...')); - const installed: string[] = []; + const installed: string[] = relevantInstalled.map((s) => s.name); for (const candidate of toInstall) { console.log(chalk.dim(` • ${candidate.fullName}`)); const ok = await installSkill(candidate, true); @@ -174,10 +341,9 @@ export async function autoInstallSkillsFromTask(task: string, cwd: string): Prom } } - if (installed.length > 0) { - console.log(chalk.green(`Installed skills: ${installed.join(', ')}`)); - } else { - console.log(chalk.yellow('No skills were installed.')); + const newlyInstalled = installed.slice(relevantInstalled.length); + if (newlyInstalled.length > 0) { + console.log(chalk.green(`Installed: ${newlyInstalled.join(', ')}`)); } return installed; diff --git a/src/utils/__tests__/package-manager.test.ts b/src/utils/__tests__/package-manager.test.ts new file mode 100644 index 0000000..0ceff38 --- /dev/null +++ b/src/utils/__tests__/package-manager.test.ts @@ -0,0 +1,94 @@ +import { existsSync, readFileSync } from 'node:fs'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { detectPackageManager, formatRunCommand, getRunCommand } from '../package-manager.js'; + +vi.mock('node:fs', () => ({ + existsSync: vi.fn(), + readFileSync: vi.fn(), +})); + +const mockExistsSync = vi.mocked(existsSync); +const mockReadFileSync = vi.mocked(readFileSync); + +describe('detectPackageManager', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('should return pnpm when pnpm-lock.yaml exists', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('pnpm-lock.yaml')); + expect(detectPackageManager('/test')).toBe('pnpm'); + }); + + it('should return yarn when yarn.lock exists', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('yarn.lock')); + expect(detectPackageManager('/test')).toBe('yarn'); + }); + + it('should return bun when bun.lockb exists', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('bun.lockb')); + expect(detectPackageManager('/test')).toBe('bun'); + }); + + it('should return bun when bun.lock exists', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('bun.lock')); + expect(detectPackageManager('/test')).toBe('bun'); + }); + + it('should read packageManager field from package.json', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('package.json')); + mockReadFileSync.mockReturnValue(JSON.stringify({ packageManager: 'pnpm@9.0.0' })); + expect(detectPackageManager('/test')).toBe('pnpm'); + }); + + it('should prefer lockfile over packageManager field', () => { + mockExistsSync.mockImplementation( + (p: any) => p.toString().includes('yarn.lock') || p.toString().includes('package.json') + ); + mockReadFileSync.mockReturnValue(JSON.stringify({ packageManager: 'pnpm@9.0.0' })); + expect(detectPackageManager('/test')).toBe('yarn'); + }); + + it('should default to npm when no indicators found', () => { + mockExistsSync.mockReturnValue(false); + expect(detectPackageManager('/test')).toBe('npm'); + }); + + it('should default to npm for unrecognized packageManager', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('package.json')); + mockReadFileSync.mockReturnValue(JSON.stringify({ packageManager: 'unknown@1.0.0' })); + expect(detectPackageManager('/test')).toBe('npm'); + }); + + it('should handle invalid package.json gracefully', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('package.json')); + mockReadFileSync.mockReturnValue('not valid json'); + expect(detectPackageManager('/test')).toBe('npm'); + }); +}); + +describe('getRunCommand', () => { + it('should return shorthand for test script', () => { + expect(getRunCommand('pnpm', 'test')).toEqual({ command: 'pnpm', args: ['test'] }); + expect(getRunCommand('npm', 'test')).toEqual({ command: 'npm', args: ['test'] }); + expect(getRunCommand('bun', 'test')).toEqual({ command: 'bun', args: ['test'] }); + }); + + it('should use run for non-test scripts', () => { + expect(getRunCommand('pnpm', 'build')).toEqual({ command: 'pnpm', args: ['run', 'build'] }); + expect(getRunCommand('npm', 'lint')).toEqual({ command: 'npm', args: ['run', 'lint'] }); + expect(getRunCommand('bun', 'dev')).toEqual({ command: 'bun', args: ['run', 'dev'] }); + }); +}); + +describe('formatRunCommand', () => { + it('should format test commands', () => { + expect(formatRunCommand('pnpm', 'test')).toBe('pnpm test'); + expect(formatRunCommand('npm', 'test')).toBe('npm test'); + }); + + it('should format run commands', () => { + expect(formatRunCommand('pnpm', 'build')).toBe('pnpm run build'); + expect(formatRunCommand('yarn', 'lint')).toBe('yarn run lint'); + }); +}); diff --git a/src/utils/package-manager.ts b/src/utils/package-manager.ts new file mode 100644 index 0000000..354b2bb --- /dev/null +++ b/src/utils/package-manager.ts @@ -0,0 +1,61 @@ +import { existsSync, readFileSync } from 'node:fs'; +import { join } from 'node:path'; + +export type PackageManager = 'npm' | 'pnpm' | 'yarn' | 'bun'; + +/** + * Detect the package manager used in a project directory. + * + * Detection priority: + * 1. Lock file presence (most reliable — reflects actual usage) + * 2. packageManager field in package.json (explicit declaration) + * 3. Default: npm + */ +export function detectPackageManager(cwd: string): PackageManager { + // Check lock files first (most reliable indicator of actual usage) + if (existsSync(join(cwd, 'pnpm-lock.yaml'))) return 'pnpm'; + if (existsSync(join(cwd, 'yarn.lock'))) return 'yarn'; + if (existsSync(join(cwd, 'bun.lockb')) || existsSync(join(cwd, 'bun.lock'))) return 'bun'; + + // Check package.json packageManager field + const packageJsonPath = join(cwd, 'package.json'); + if (existsSync(packageJsonPath)) { + try { + const pkg = JSON.parse(readFileSync(packageJsonPath, 'utf-8')); + if (pkg.packageManager) { + const name = pkg.packageManager.split('@')[0]; + if (['pnpm', 'yarn', 'bun'].includes(name)) { + return name as PackageManager; + } + } + } catch { + // Invalid package.json — fall through to default + } + } + + return 'npm'; +} + +/** + * Get the run command for a package manager script. + * For 'test', uses the shorthand (e.g., `pnpm test`). + * For other scripts, uses `run` (e.g., `pnpm run build`). + */ +export function getRunCommand( + pm: PackageManager, + script: string +): { command: string; args: string[] } { + if (script === 'test') { + return { command: pm, args: ['test'] }; + } + return { command: pm, args: ['run', script] }; +} + +/** + * Format a run command as a display string. + * e.g., "pnpm run build" or "bun test" + */ +export function formatRunCommand(pm: PackageManager, script: string): string { + const { command, args } = getRunCommand(pm, script); + return `${command} ${args.join(' ')}`; +} diff --git a/src/wizard/index.ts b/src/wizard/index.ts index 1ba4840..44e9f22 100644 --- a/src/wizard/index.ts +++ b/src/wizard/index.ts @@ -64,6 +64,7 @@ function normalizeTechStack(stack: WizardAnswers['techStack']): WizardAnswers['t backend: normalizeTechStackValue(stack.backend), database: normalizeTechStackValue(stack.database), styling: normalizeTechStackValue(stack.styling), + uiLibrary: normalizeTechStackValue(stack.uiLibrary), language: normalizeTechStackValue(stack.language), }; } diff --git a/src/wizard/llm.ts b/src/wizard/llm.ts index b6de7ad..1b22f72 100644 --- a/src/wizard/llm.ts +++ b/src/wizard/llm.ts @@ -106,6 +106,7 @@ Return ONLY valid JSON (no markdown, no explanation) in this exact format: "backend": "nodejs|express|fastify|hono|python|django|flask|fastapi|go|gin|rust|null", "database": "sqlite|postgres|mysql|mongodb|redis|supabase|firebase|prisma|drizzle|null", "styling": "tailwind|css|scss|styled-components|null", + "uiLibrary": "shadcn|shadcn-vue|shadcn-svelte|mui|chakra|null", "language": "typescript|javascript|python|go|rust" }, "coreFeatures": ["feature1", "feature2", "feature3"], @@ -124,6 +125,7 @@ Guidelines: - Language: TypeScript, Python, Go, Rust, etc. - NEVER substitute a user-specified technology with a different one - Only suggest defaults when the user doesn't specify (e.g., TypeScript + React for unspecified web apps) +- For web projects, default to Tailwind CSS + shadcn/ui (or framework variant) + motion-primitives unless the user explicitly specifies different styling/UI libraries. Use shadcn for React/Next.js, shadcn-vue for Vue, shadcn-svelte for Svelte. - coreFeatures are essential features implied by the idea - suggestedFeatures are nice-to-haves that would enhance the project - estimatedComplexity is based on scope (prototype=hours, mvp=day, full=days/weeks)`; @@ -456,6 +458,19 @@ function getTemplateSuggestions(idea: string): RefinedIdea { detectedBackend || (detectedFrontend === 'astro' ? undefined : 'nodejs'); suggestedStack.database = detectedDatabase || (detectedFrontend === 'astro' ? undefined : 'sqlite'); + + // Default UI stack: Tailwind + shadcn (framework-appropriate variant) + motion-primitives + if (!detectedStyling) { + suggestedStack.styling = 'tailwind'; + const frontend = suggestedStack.frontend; + if (frontend === 'vue') { + suggestedStack.uiLibrary = 'shadcn-vue'; + } else if (frontend === 'svelte') { + suggestedStack.uiLibrary = 'shadcn-svelte'; + } else if (frontend && frontend !== 'vanilla' && frontend !== 'astro') { + suggestedStack.uiLibrary = 'shadcn'; + } + } } else if (projectType === 'api') { suggestedStack.backend = detectedBackend || 'nodejs'; suggestedStack.database = detectedDatabase || 'postgres'; diff --git a/src/wizard/spec-generator.ts b/src/wizard/spec-generator.ts index 66283b4..a67705d 100644 --- a/src/wizard/spec-generator.ts +++ b/src/wizard/spec-generator.ts @@ -1,3 +1,4 @@ +import { detectPackageManager, formatRunCommand } from '../utils/package-manager.js'; import type { TechStack, WizardAnswers } from './types.js'; import { formatComplexity, formatProjectType } from './ui.js'; @@ -39,10 +40,35 @@ export function generateSpec(answers: WizardAnswers): string { if (answers.techStack.styling) { sections.push(`- **Styling:** ${formatTech(answers.techStack.styling)}`); } + if (answers.techStack.uiLibrary) { + sections.push(`- **UI Library:** ${formatTech(answers.techStack.uiLibrary)}`); + } if (answers.techStack.language) { sections.push(`- **Language:** ${formatTech(answers.techStack.language)}`); } sections.push(''); + + // Technical setup notes (prevents common pitfalls like CSS cascade conflicts) + if (answers.techStack.styling === 'tailwind') { + sections.push('### Setup Notes'); + sections.push(''); + sections.push( + '- Use Tailwind CSS v4 with `@import "tailwindcss"` — do NOT use v3 `@tailwind` directives' + ); + sections.push( + '- Do NOT add manual CSS resets — Tailwind v4 preflight handles `box-sizing`, margin/padding resets' + ); + sections.push( + '- Custom CSS must be wrapped in `@layer base { }` or `@layer components { }` to avoid overriding Tailwind utilities' + ); + if (answers.techStack.uiLibrary) { + sections.push( + `- Use ${formatTech(answers.techStack.uiLibrary)} components — install and add components as needed` + ); + sections.push('- Use motion-primitives for page transitions and micro-interactions'); + } + sections.push(''); + } } // Features @@ -149,6 +175,45 @@ export function generateAgentsMd(answers: WizardAnswers): string { sections.push('- Use Prisma for database access'); } + // Styling-specific instructions + if (answers.techStack.styling === 'tailwind') { + sections.push(''); + sections.push('### Tailwind CSS v4 Setup (CRITICAL)'); + sections.push(''); + sections.push('- Install: `npm install tailwindcss @tailwindcss/postcss postcss`'); + sections.push("- postcss.config.js: `plugins: { '@tailwindcss/postcss': {} }`"); + sections.push( + '- CSS entry: `@import "tailwindcss";` (NOT `@tailwind base/components/utilities`)' + ); + sections.push('- Do NOT create tailwind.config.js (v4 uses CSS-based config)'); + sections.push( + '- Do NOT add a manual CSS reset (`* { margin: 0; padding: 0; }`) — Tailwind v4 preflight handles this' + ); + sections.push( + '- If you need custom base styles, wrap them in `@layer base { }` — unlayered CSS overrides ALL Tailwind utilities' + ); + } + + // UI Library setup + const uiLib = answers.techStack.uiLibrary; + if (uiLib === 'shadcn' || uiLib === 'shadcn-vue' || uiLib === 'shadcn-svelte') { + sections.push(''); + sections.push('### UI Components'); + sections.push(''); + if (uiLib === 'shadcn') { + sections.push('- Use shadcn/ui for UI components: `npx shadcn@latest init`'); + sections.push('- Add components as needed: `npx shadcn@latest add button card dialog`'); + } else if (uiLib === 'shadcn-vue') { + sections.push('- Use shadcn-vue for UI components'); + } else if (uiLib === 'shadcn-svelte') { + sections.push('- Use shadcn-svelte for UI components'); + } + sections.push('- Use motion-primitives for animations'); + sections.push( + '- Follow the component patterns from shadcn — composable, accessible, customizable' + ); + } + sections.push(''); sections.push('## Validation Commands'); sections.push(''); @@ -160,9 +225,11 @@ export function generateAgentsMd(answers: WizardAnswers): string { answers.techStack.backend === 'nodejs'; if (hasNodeStack) { - sections.push('- **lint**: `npm run lint`'); - sections.push('- **build**: `npm run build`'); - sections.push('- **test**: `npm test`'); + // Detect PM from working directory if available, default to npm for greenfield projects + const pm = answers.workingDirectory ? detectPackageManager(answers.workingDirectory) : 'npm'; + sections.push(`- **lint**: \`${formatRunCommand(pm, 'lint')}\``); + sections.push(`- **build**: \`${formatRunCommand(pm, 'build')}\``); + sections.push(`- **test**: \`${formatRunCommand(pm, 'test')}\``); } else if (answers.techStack.backend === 'python') { sections.push('- **lint**: `ruff check .`'); sections.push('- **test**: `pytest`'); @@ -190,7 +257,14 @@ export function generateAgentsMd(answers: WizardAnswers): string { * Check if tech stack has any values */ function hasTechStack(stack: TechStack): boolean { - return !!(stack.frontend || stack.backend || stack.database || stack.styling || stack.language); + return !!( + stack.frontend || + stack.backend || + stack.database || + stack.styling || + stack.uiLibrary || + stack.language + ); } /** @@ -216,6 +290,12 @@ function formatTech(tech: string): string { css: 'CSS', scss: 'SCSS', 'styled-components': 'styled-components', + shadcn: 'shadcn/ui', + 'shadcn-vue': 'shadcn-vue', + 'shadcn-svelte': 'shadcn-svelte', + mui: 'Material UI', + chakra: 'Chakra UI', + 'motion-primitives': 'motion-primitives', typescript: 'TypeScript', javascript: 'JavaScript', }; diff --git a/src/wizard/types.ts b/src/wizard/types.ts index 56395ab..dd5a8f0 100644 --- a/src/wizard/types.ts +++ b/src/wizard/types.ts @@ -8,6 +8,7 @@ export interface TechStack { backend?: string; database?: string; styling?: string; + uiLibrary?: string; language?: string; } diff --git a/src/wizard/ui.ts b/src/wizard/ui.ts index af28586..d39f733 100644 --- a/src/wizard/ui.ts +++ b/src/wizard/ui.ts @@ -64,6 +64,7 @@ export function showRefinedSummary( backend?: string; database?: string; styling?: string; + uiLibrary?: string; language?: string; }, features: string[], @@ -89,6 +90,11 @@ export function showRefinedSummary( css: 'CSS', scss: 'SCSS', 'styled-components': 'styled-components', + shadcn: 'shadcn/ui', + 'shadcn-vue': 'shadcn-vue', + 'shadcn-svelte': 'shadcn-svelte', + mui: 'Material UI', + chakra: 'Chakra UI', typescript: 'TypeScript', javascript: 'JavaScript', }; @@ -103,7 +109,14 @@ export function showRefinedSummary( console.log(` ${chalk.white('Type:')} ${projectType}`); console.log(); - if (stack.frontend || stack.backend || stack.database || stack.styling || stack.language) { + if ( + stack.frontend || + stack.backend || + stack.database || + stack.styling || + stack.uiLibrary || + stack.language + ) { console.log(` ${chalk.white('Tech Stack:')}`); if (stack.frontend) console.log(` ${chalk.dim('Frontend:')} ${formatTechLabel(stack.frontend)}`); @@ -113,6 +126,8 @@ export function showRefinedSummary( console.log(` ${chalk.dim('Database:')} ${formatTechLabel(stack.database)}`); if (stack.styling) console.log(` ${chalk.dim('Styling:')} ${formatTechLabel(stack.styling)}`); + if (stack.uiLibrary) + console.log(` ${chalk.dim('UI Library:')} ${formatTechLabel(stack.uiLibrary)}`); if (stack.language) console.log(` ${chalk.dim('Language:')} ${formatTechLabel(stack.language)}`); console.log();