From 7aea4a25c17dc10de17b3b2f75db5cdcab0891a9 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Thu, 12 Feb 2026 14:10:31 +0000 Subject: [PATCH 01/46] fix(loop): improve validation for greenfield builds - Reset circuit breaker when tasks advance (prevents false positives during multi-task greenfield builds where early tasks can't pass tests) - Add package manager detection: auto-detect pnpm/yarn/bun from lockfiles and packageManager field instead of hardcoding npm - Add validation warm-up: skip validation until enough tasks are done for greenfield builds (auto-detected, configurable via --validation-warmup) Co-Authored-By: Claude Opus 4.6 --- src/cli.ts | 4 + src/commands/init.ts | 16 +++- src/commands/run.ts | 30 ++++--- src/loop/__tests__/validation.test.ts | 2 +- src/loop/executor.ts | 9 +- src/loop/validation.ts | 14 ++- src/utils/__tests__/package-manager.test.ts | 94 +++++++++++++++++++++ src/utils/package-manager.ts | 61 +++++++++++++ src/wizard/spec-generator.ts | 9 +- 9 files changed, 215 insertions(+), 24 deletions(-) create mode 100644 src/utils/__tests__/package-manager.test.ts create mode 100644 src/utils/package-manager.ts diff --git a/src/cli.ts b/src/cli.ts index f5bac3a..8696969 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -82,6 +82,10 @@ program .option('--no-track-cost', 'Disable cost tracking') .option('--circuit-breaker-failures ', 'Max consecutive failures before stopping (default: 3)') .option('--circuit-breaker-errors ', 'Max same error occurrences before stopping (default: 5)') + .option( + '--validation-warmup ', + 'Skip validation until N tasks are completed (auto-detected for greenfield builds)' + ) .option( '--context-budget ', 'Max input tokens per iteration for smart context trimming (0 = unlimited)' diff --git a/src/commands/init.ts b/src/commands/init.ts index ec3754c..4e202dd 100644 --- a/src/commands/init.ts +++ b/src/commands/init.ts @@ -6,6 +6,11 @@ import ora from 'ora'; import YAML from 'yaml'; import { initGitRepo, isGitRepo } from '../automation/git.js'; import { type Agent, detectAvailableAgents, printAgentStatus } from '../loop/agents.js'; +import { + detectPackageManager, + formatRunCommand, + type PackageManager, +} from '../utils/package-manager.js'; interface InitOptions { name?: string; @@ -18,6 +23,7 @@ export type ProjectType = 'nodejs' | 'python' | 'rust' | 'go' | 'unknown'; export interface ProjectInfo { type: ProjectType; name: string; + packageManager?: PackageManager; testCmd?: string; buildCmd?: string; lintCmd?: string; @@ -29,12 +35,14 @@ export function detectProject(cwd: string): ProjectInfo { try { const pkg = JSON.parse(readFileSync(join(cwd, 'package.json'), 'utf-8')); const scripts = pkg.scripts || {}; + const pm = detectPackageManager(cwd); return { type: 'nodejs', name: pkg.name || 'project', - testCmd: scripts.test ? 'npm test' : undefined, - buildCmd: scripts.build ? 'npm run build' : undefined, - lintCmd: scripts.lint ? 'npm run lint' : undefined, + packageManager: pm, + testCmd: scripts.test ? formatRunCommand(pm, 'test') : undefined, + buildCmd: scripts.build ? formatRunCommand(pm, 'build') : undefined, + lintCmd: scripts.lint ? formatRunCommand(pm, 'lint') : undefined, }; } catch { return { type: 'nodejs', name: 'project' }; @@ -150,7 +158,7 @@ ${validationCmds.length > 0 ? validationCmds.join('\n') : '# Add your test/build ## Build Instructions -${project.type === 'nodejs' ? '1. Run `npm install` to install dependencies\n2. Run `npm run build` to build (if applicable)\n3. Run `npm test` to verify' : ''} +${project.type === 'nodejs' ? `1. Run \`${project.packageManager || 'npm'} install\` to install dependencies\n2. Run \`${project.buildCmd || `${project.packageManager || 'npm'} run build`}\` to build (if applicable)\n3. Run \`${project.testCmd || `${project.packageManager || 'npm'} test`}\` to verify` : ''} ${project.type === 'python' ? '1. Create virtual environment: `python -m venv venv`\n2. Install dependencies: `pip install -e .`\n3. Run tests: `pytest`' : ''} ${project.type === 'rust' ? '1. Run `cargo build` to compile\n2. Run `cargo test` to verify' : ''} ${project.type === 'go' ? '1. Run `go mod tidy` to sync dependencies\n2. Run `go build ./...` to compile\n3. Run `go test ./...` to verify' : ''} diff --git a/src/commands/run.ts b/src/commands/run.ts index 58ed02e..384eb0f 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -20,6 +20,7 @@ import { formatPresetsHelp, getPreset, type PresetConfig } from '../presets/inde import { autoInstallSkillsFromTask } from '../skills/auto-install.js'; import { getSourceDefaults } from '../sources/config.js'; import { fetchFromSource } from '../sources/index.js'; +import { detectPackageManager, formatRunCommand, getRunCommand } from '../utils/package-manager.js'; /** Default fallback repo for GitHub issues when no project is specified */ const DEFAULT_GITHUB_ISSUES_REPO = 'multivmlabs/ralph-ideas'; @@ -42,19 +43,14 @@ function detectRunCommand( try { const pkg = JSON.parse(readFileSync(packageJsonPath, 'utf-8')); const scripts = pkg.scripts || {}; + const pm = detectPackageManager(cwd); // Priority order for dev commands - if (scripts.dev) { - return { command: 'npm', args: ['run', 'dev'], description: 'npm run dev' }; - } - if (scripts.start) { - return { command: 'npm', args: ['run', 'start'], description: 'npm run start' }; - } - if (scripts.serve) { - return { command: 'npm', args: ['run', 'serve'], description: 'npm run serve' }; - } - if (scripts.preview) { - return { command: 'npm', args: ['run', 'preview'], description: 'npm run preview' }; + for (const script of ['dev', 'start', 'serve', 'preview']) { + if (scripts[script]) { + const cmd = getRunCommand(pm, script); + return { ...cmd, description: formatRunCommand(pm, script) }; + } } } catch { // Ignore parse errors @@ -223,6 +219,7 @@ export interface RunCommandOptions { circuitBreakerFailures?: number; circuitBreakerErrors?: number; contextBudget?: number; + validationWarmup?: number; // Figma options figmaMode?: 'spec' | 'tokens' | 'components' | 'assets' | 'content'; figmaFramework?: 'react' | 'vue' | 'svelte' | 'astro' | 'nextjs' | 'nuxt' | 'html'; @@ -573,6 +570,16 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN console.log(chalk.dim(`Max iterations: ${smartIterations} (${reason})`)); } + // Auto-detect greenfield builds: skip validation until enough tasks are done + const isGreenfield = taskCount.total > 0 && taskCount.completed === 0; + const autoWarmup = isGreenfield ? Math.max(2, Math.floor(taskCount.total * 0.5)) : 0; + const validationWarmup = options.validationWarmup ? Number(options.validationWarmup) : autoWarmup; + if (validationWarmup > 0 && options.validate) { + console.log( + chalk.dim(`Validation warm-up: skipping until ${validationWarmup} tasks completed`) + ); + } + // Apply preset values with CLI overrides const loopOptions: LoopOptions = { task: preset?.promptPrefix ? `${preset.promptPrefix}\n\n${finalTask}` : finalTask, @@ -587,6 +594,7 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN prIssueRef: sourceIssueRef, prLabels: options.auto ? ['AUTO'] : undefined, validate: options.validate ?? preset?.validate, + validationWarmup, sourceType: options.from?.toLowerCase(), // New options completionPromise: options.completionPromise ?? preset?.completionPromise, diff --git a/src/loop/__tests__/validation.test.ts b/src/loop/__tests__/validation.test.ts index f46a90c..66baa91 100644 --- a/src/loop/__tests__/validation.test.ts +++ b/src/loop/__tests__/validation.test.ts @@ -89,7 +89,7 @@ describe('validation', () => { expect(commands.find((c) => c.name === 'test')).toEqual({ name: 'test', command: 'npm', - args: ['run', 'test'], + args: ['test'], }); }); diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 6d35e63..102f69d 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -190,6 +190,7 @@ export interface LoopOptions { trackCost?: boolean; // Track token usage and cost model?: string; // Model name for cost estimation contextBudget?: number; // Max input tokens per iteration (0 = unlimited) + validationWarmup?: number; // Skip validation until N tasks completed (for greenfield builds) } export interface LoopResult { @@ -569,6 +570,9 @@ export async function runLoop(options: LoopOptions): Promise { // Check if tasks were completed since last iteration const newlyCompleted = completedTasks - previousCompletedTasks; if (newlyCompleted > 0 && i > 1) { + // Task completion is forward progress — reset circuit breaker consecutive failures + circuitBreaker.recordSuccess(); + // Get names of newly completed tasks (strip markdown) const maxNameWidth = Math.max(30, getTerminalWidth() - 30); const completedNames = taskInfo.tasks @@ -798,10 +802,13 @@ export async function runLoop(options: LoopOptions): Promise { } // Run validation (backpressure) if enabled and there are changes + // Skip validation during warm-up period (greenfield builds where early tasks can't pass tests) let _validationPassed = true; let validationResults: ValidationResult[] = []; + const warmupThreshold = options.validationWarmup ?? 0; + const pastWarmup = completedTasks >= warmupThreshold; - if (validationCommands.length > 0 && (await hasUncommittedChanges(options.cwd))) { + if (validationCommands.length > 0 && pastWarmup && (await hasUncommittedChanges(options.cwd))) { spinner.start(chalk.yellow(`Loop ${i}: Running validation...`)); validationResults = await runAllValidations(options.cwd, validationCommands); diff --git a/src/loop/validation.ts b/src/loop/validation.ts index 0f3962c..afef26e 100644 --- a/src/loop/validation.ts +++ b/src/loop/validation.ts @@ -1,6 +1,7 @@ import { existsSync, readFileSync } from 'node:fs'; import { join } from 'node:path'; import { execa } from 'execa'; +import { detectPackageManager, getRunCommand } from '../utils/package-manager.js'; export interface ValidationCommand { name: string; @@ -68,18 +69,23 @@ export function detectValidationCommands(cwd: string): ValidationCommand[] { try { const pkg = JSON.parse(readFileSync(packagePath, 'utf-8')); const scripts = pkg.scripts || {}; + const pm = detectPackageManager(cwd); if (scripts.test && scripts.test !== 'echo "Error: no test specified" && exit 1') { - commands.push({ name: 'test', command: 'npm', args: ['run', 'test'] }); + const cmd = getRunCommand(pm, 'test'); + commands.push({ name: 'test', ...cmd }); } if (scripts.lint) { - commands.push({ name: 'lint', command: 'npm', args: ['run', 'lint'] }); + const cmd = getRunCommand(pm, 'lint'); + commands.push({ name: 'lint', ...cmd }); } if (scripts.build) { - commands.push({ name: 'build', command: 'npm', args: ['run', 'build'] }); + const cmd = getRunCommand(pm, 'build'); + commands.push({ name: 'build', ...cmd }); } if (scripts.typecheck) { - commands.push({ name: 'typecheck', command: 'npm', args: ['run', 'typecheck'] }); + const cmd = getRunCommand(pm, 'typecheck'); + commands.push({ name: 'typecheck', ...cmd }); } } catch { // Invalid package.json diff --git a/src/utils/__tests__/package-manager.test.ts b/src/utils/__tests__/package-manager.test.ts new file mode 100644 index 0000000..0ceff38 --- /dev/null +++ b/src/utils/__tests__/package-manager.test.ts @@ -0,0 +1,94 @@ +import { existsSync, readFileSync } from 'node:fs'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { detectPackageManager, formatRunCommand, getRunCommand } from '../package-manager.js'; + +vi.mock('node:fs', () => ({ + existsSync: vi.fn(), + readFileSync: vi.fn(), +})); + +const mockExistsSync = vi.mocked(existsSync); +const mockReadFileSync = vi.mocked(readFileSync); + +describe('detectPackageManager', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('should return pnpm when pnpm-lock.yaml exists', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('pnpm-lock.yaml')); + expect(detectPackageManager('/test')).toBe('pnpm'); + }); + + it('should return yarn when yarn.lock exists', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('yarn.lock')); + expect(detectPackageManager('/test')).toBe('yarn'); + }); + + it('should return bun when bun.lockb exists', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('bun.lockb')); + expect(detectPackageManager('/test')).toBe('bun'); + }); + + it('should return bun when bun.lock exists', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('bun.lock')); + expect(detectPackageManager('/test')).toBe('bun'); + }); + + it('should read packageManager field from package.json', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('package.json')); + mockReadFileSync.mockReturnValue(JSON.stringify({ packageManager: 'pnpm@9.0.0' })); + expect(detectPackageManager('/test')).toBe('pnpm'); + }); + + it('should prefer lockfile over packageManager field', () => { + mockExistsSync.mockImplementation( + (p: any) => p.toString().includes('yarn.lock') || p.toString().includes('package.json') + ); + mockReadFileSync.mockReturnValue(JSON.stringify({ packageManager: 'pnpm@9.0.0' })); + expect(detectPackageManager('/test')).toBe('yarn'); + }); + + it('should default to npm when no indicators found', () => { + mockExistsSync.mockReturnValue(false); + expect(detectPackageManager('/test')).toBe('npm'); + }); + + it('should default to npm for unrecognized packageManager', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('package.json')); + mockReadFileSync.mockReturnValue(JSON.stringify({ packageManager: 'unknown@1.0.0' })); + expect(detectPackageManager('/test')).toBe('npm'); + }); + + it('should handle invalid package.json gracefully', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('package.json')); + mockReadFileSync.mockReturnValue('not valid json'); + expect(detectPackageManager('/test')).toBe('npm'); + }); +}); + +describe('getRunCommand', () => { + it('should return shorthand for test script', () => { + expect(getRunCommand('pnpm', 'test')).toEqual({ command: 'pnpm', args: ['test'] }); + expect(getRunCommand('npm', 'test')).toEqual({ command: 'npm', args: ['test'] }); + expect(getRunCommand('bun', 'test')).toEqual({ command: 'bun', args: ['test'] }); + }); + + it('should use run for non-test scripts', () => { + expect(getRunCommand('pnpm', 'build')).toEqual({ command: 'pnpm', args: ['run', 'build'] }); + expect(getRunCommand('npm', 'lint')).toEqual({ command: 'npm', args: ['run', 'lint'] }); + expect(getRunCommand('bun', 'dev')).toEqual({ command: 'bun', args: ['run', 'dev'] }); + }); +}); + +describe('formatRunCommand', () => { + it('should format test commands', () => { + expect(formatRunCommand('pnpm', 'test')).toBe('pnpm test'); + expect(formatRunCommand('npm', 'test')).toBe('npm test'); + }); + + it('should format run commands', () => { + expect(formatRunCommand('pnpm', 'build')).toBe('pnpm run build'); + expect(formatRunCommand('yarn', 'lint')).toBe('yarn run lint'); + }); +}); diff --git a/src/utils/package-manager.ts b/src/utils/package-manager.ts new file mode 100644 index 0000000..354b2bb --- /dev/null +++ b/src/utils/package-manager.ts @@ -0,0 +1,61 @@ +import { existsSync, readFileSync } from 'node:fs'; +import { join } from 'node:path'; + +export type PackageManager = 'npm' | 'pnpm' | 'yarn' | 'bun'; + +/** + * Detect the package manager used in a project directory. + * + * Detection priority: + * 1. Lock file presence (most reliable — reflects actual usage) + * 2. packageManager field in package.json (explicit declaration) + * 3. Default: npm + */ +export function detectPackageManager(cwd: string): PackageManager { + // Check lock files first (most reliable indicator of actual usage) + if (existsSync(join(cwd, 'pnpm-lock.yaml'))) return 'pnpm'; + if (existsSync(join(cwd, 'yarn.lock'))) return 'yarn'; + if (existsSync(join(cwd, 'bun.lockb')) || existsSync(join(cwd, 'bun.lock'))) return 'bun'; + + // Check package.json packageManager field + const packageJsonPath = join(cwd, 'package.json'); + if (existsSync(packageJsonPath)) { + try { + const pkg = JSON.parse(readFileSync(packageJsonPath, 'utf-8')); + if (pkg.packageManager) { + const name = pkg.packageManager.split('@')[0]; + if (['pnpm', 'yarn', 'bun'].includes(name)) { + return name as PackageManager; + } + } + } catch { + // Invalid package.json — fall through to default + } + } + + return 'npm'; +} + +/** + * Get the run command for a package manager script. + * For 'test', uses the shorthand (e.g., `pnpm test`). + * For other scripts, uses `run` (e.g., `pnpm run build`). + */ +export function getRunCommand( + pm: PackageManager, + script: string +): { command: string; args: string[] } { + if (script === 'test') { + return { command: pm, args: ['test'] }; + } + return { command: pm, args: ['run', script] }; +} + +/** + * Format a run command as a display string. + * e.g., "pnpm run build" or "bun test" + */ +export function formatRunCommand(pm: PackageManager, script: string): string { + const { command, args } = getRunCommand(pm, script); + return `${command} ${args.join(' ')}`; +} diff --git a/src/wizard/spec-generator.ts b/src/wizard/spec-generator.ts index 66283b4..bafa87e 100644 --- a/src/wizard/spec-generator.ts +++ b/src/wizard/spec-generator.ts @@ -1,3 +1,4 @@ +import { detectPackageManager, formatRunCommand } from '../utils/package-manager.js'; import type { TechStack, WizardAnswers } from './types.js'; import { formatComplexity, formatProjectType } from './ui.js'; @@ -160,9 +161,11 @@ export function generateAgentsMd(answers: WizardAnswers): string { answers.techStack.backend === 'nodejs'; if (hasNodeStack) { - sections.push('- **lint**: `npm run lint`'); - sections.push('- **build**: `npm run build`'); - sections.push('- **test**: `npm test`'); + // Detect PM from working directory if available, default to npm for greenfield projects + const pm = answers.workingDirectory ? detectPackageManager(answers.workingDirectory) : 'npm'; + sections.push(`- **lint**: \`${formatRunCommand(pm, 'lint')}\``); + sections.push(`- **build**: \`${formatRunCommand(pm, 'build')}\``); + sections.push(`- **test**: \`${formatRunCommand(pm, 'test')}\``); } else if (answers.techStack.backend === 'python') { sections.push('- **lint**: `ruff check .`'); sections.push('- **test**: `pytest`'); From 812cfc0cd5007aa395d0ca2fd4b48a14b34418ab Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 08:31:36 +0000 Subject: [PATCH 02/46] fix(loop): add stall detection and improve early termination - Track file changes across all iterations (not just iteration 1) - Stop loop after 2 consecutive idle iterations (no file changes) - Check IMPLEMENTATION_PLAN.md for pending tasks in all modes, not just when task string mentions the plan file - Lower default max-iterations from 10 to 7 when no plan file exists Fixes loops running all iterations for simple tasks where the agent finishes early but the loop doesn't detect completion. Co-Authored-By: Claude Opus 4.6 --- src/loop/executor.ts | 40 ++++++++++++++++++++++++++++++---------- src/loop/task-counter.ts | 6 +++--- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 102f69d..108c03c 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -398,6 +398,7 @@ export async function runLoop(options: LoopOptions): Promise { let validationFailures = 0; let exitReason: LoopResult['exitReason'] = 'max_iterations'; let finalIteration = maxIterations; + let consecutiveIdleIterations = 0; // Initialize circuit breaker const circuitBreaker = new CircuitBreaker(options.circuitBreaker); @@ -692,23 +693,42 @@ export async function runLoop(options: LoopOptions): Promise { // Check for completion using enhanced detection let status = detectCompletion(result.output, completionOptions); + // Track file changes between iterations for stall detection + const hasChanges = await hasUncommittedChanges(options.cwd); + if (!hasChanges) { + consecutiveIdleIterations++; + } else { + consecutiveIdleIterations = 0; + } + // Verify completion - check if files were actually changed - if (status === 'done' && i === 1) { - // On first iteration, verify that files were actually created/modified - const hasChanges = await hasUncommittedChanges(options.cwd); - if (!hasChanges) { + if (status === 'done' && !hasChanges) { + if (i === 1) { console.log(chalk.yellow(' Agent reported done but no files changed - continuing...')); status = 'continue'; - } else { - // Wait for filesystem to settle before declaring done - await waitForFilesystemQuiescence(options.cwd, 2000); } + // On later iterations, allow done if agent genuinely finished (no more work to do) + } else if (status === 'done' && hasChanges) { + // Wait for filesystem to settle before declaring done + await waitForFilesystemQuiescence(options.cwd, 2000); } - // In build mode, don't allow completion while plan tasks remain - if (status === 'done' && options.task.includes('IMPLEMENTATION_PLAN.md')) { + // Stall detection: stop if no file changes for 2+ consecutive iterations + if (consecutiveIdleIterations >= 2 && i > 1) { + console.log( + chalk.yellow( + ` No file changes for ${consecutiveIdleIterations} consecutive iterations - stopping` + ) + ); + finalIteration = i; + exitReason = 'completed'; + break; + } + + // Don't allow completion while plan tasks remain (check plan file if it exists) + if (status === 'done') { const latestTaskInfo = parsePlanTasks(options.cwd); - if (latestTaskInfo.pending > 0) { + if (latestTaskInfo.total > 0 && latestTaskInfo.pending > 0) { console.log( chalk.yellow( ` Agent reported done but ${latestTaskInfo.pending} task(s) remain - continuing...` diff --git a/src/loop/task-counter.ts b/src/loop/task-counter.ts index ff928d6..5da90f3 100644 --- a/src/loop/task-counter.ts +++ b/src/loop/task-counter.ts @@ -144,7 +144,7 @@ export function getTaskByIndex(cwd: string, index: number): PlanTask | null { * - Buffer = max(2, pendingTasks * 0.3) - at least 2, or 30% extra for retries * - Minimum: 3 (even for small tasks) * - Maximum: 25 (prevent runaway loops) - * - If no plan: 10 (sensible default) + * - If no plan: 7 (sensible default) */ export function calculateOptimalIterations(cwd: string): { iterations: number; @@ -153,10 +153,10 @@ export function calculateOptimalIterations(cwd: string): { } { const taskCount = parsePlanTasks(cwd); - // No implementation plan - use default + // No implementation plan - use conservative default if (taskCount.total === 0) { return { - iterations: 10, + iterations: 7, taskCount, reason: 'No implementation plan found, using default', }; From a5f89a1e3d049613dbf237b84838dd93facadfc1 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 08:36:58 +0000 Subject: [PATCH 03/46] fix(loop): dynamic iteration calculation from spec content MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When no IMPLEMENTATION_PLAN.md exists, estimate task count from the spec content by analyzing structural elements (headings, bullet points, numbered lists, checkboxes). This replaces the static default of 7 with a data-driven estimate. For the pet shop issue (#86): 4 headings + 12 bullets → ~5 iterations instead of the old static 10. Co-Authored-By: Claude Opus 4.6 --- src/commands/run.ts | 6 +++- src/loop/task-counter.ts | 59 ++++++++++++++++++++++++++++++++++++---- src/mcp/core/run.ts | 2 +- 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/src/commands/run.ts b/src/commands/run.ts index 384eb0f..1a2da38 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -560,7 +560,11 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN } // Calculate smart iterations based on tasks (always, unless explicitly overridden) - const { iterations: smartIterations, taskCount, reason } = calculateOptimalIterations(cwd); + const { + iterations: smartIterations, + taskCount, + reason, + } = calculateOptimalIterations(cwd, finalTask); if (!options.maxIterations && !preset?.maxIterations) { if (taskCount.total > 0) { console.log( diff --git a/src/loop/task-counter.ts b/src/loop/task-counter.ts index 5da90f3..ee8f13f 100644 --- a/src/loop/task-counter.ts +++ b/src/loop/task-counter.ts @@ -136,29 +136,78 @@ export function getTaskByIndex(cwd: string, index: number): PlanTask | null { return tasks[index] || null; } +/** + * Estimate task complexity from spec/task content when no plan file exists. + * Counts structural elements (headings, bullet points, numbered items) + * and maps them to an estimated task count. + */ +export function estimateTasksFromContent(content: string): { estimated: number; reason: string } { + if (!content || content.length < 20) { + return { estimated: 0, reason: 'no content' }; + } + + const lines = content.split('\n'); + + // Count structural signals + const headings = lines.filter((l) => /^#{1,4}\s+/.test(l)).length; + const bullets = lines.filter((l) => /^\s*[-*]\s+/.test(l)).length; + const numbered = lines.filter((l) => /^\s*\d+[.)]\s+/.test(l)).length; + const checkboxes = lines.filter((l) => /^\s*[-*]\s*\[[ xX]\]/.test(l)).length; + + // If there are explicit checkboxes, use that count + if (checkboxes > 0) { + return { estimated: checkboxes, reason: `${checkboxes} checkboxes in spec` }; + } + + // Estimate from structural elements: headings define major tasks, + // dense bullet lists suggest subtasks within those + const majorTasks = Math.max(1, headings); + const detailItems = bullets + numbered; + + // Heuristic: ~4 detail items per iteration of work + const fromDetails = Math.ceil(detailItems / 4); + const estimated = Math.max(majorTasks, fromDetails, 1); + + return { + estimated, + reason: `estimated from spec (${headings} sections, ${bullets + numbered} items)`, + }; +} + /** * Calculate optimal number of loop iterations based on task count * * Formula: - * - If tasks exist: pendingTasks + buffer (for retries/validation fixes) + * - If plan exists: pendingTasks + buffer (for retries/validation fixes) * - Buffer = max(2, pendingTasks * 0.3) - at least 2, or 30% extra for retries + * - If no plan but spec content: estimate from spec structure * - Minimum: 3 (even for small tasks) * - Maximum: 25 (prevent runaway loops) - * - If no plan: 7 (sensible default) */ -export function calculateOptimalIterations(cwd: string): { +export function calculateOptimalIterations( + cwd: string, + taskContent?: string +): { iterations: number; taskCount: TaskCount; reason: string; } { const taskCount = parsePlanTasks(cwd); - // No implementation plan - use conservative default + // No implementation plan - estimate from spec content if available if (taskCount.total === 0) { + const estimate = taskContent ? estimateTasksFromContent(taskContent) : null; + if (estimate && estimate.estimated > 0) { + const buffer = Math.max(2, Math.ceil(estimate.estimated * 0.3)); + let iterations = estimate.estimated + buffer; + iterations = Math.max(3, iterations); + iterations = Math.min(15, iterations); + return { iterations, taskCount, reason: estimate.reason }; + } return { iterations: 7, taskCount, - reason: 'No implementation plan found, using default', + reason: 'No plan or spec structure found, using default', }; } diff --git a/src/mcp/core/run.ts b/src/mcp/core/run.ts index bda2585..852886f 100644 --- a/src/mcp/core/run.ts +++ b/src/mcp/core/run.ts @@ -122,7 +122,7 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN : `Ralph: ${finalTask.slice(0, 50)}`; // Calculate smart iterations based on tasks - const { iterations: smartIterations } = calculateOptimalIterations(cwd); + const { iterations: smartIterations } = calculateOptimalIterations(cwd, finalTask); const loopOptions: LoopOptions = { task: finalTask, From 942917009fe25dd2d11f309925c14a7fefa3309c Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 10:43:08 +0000 Subject: [PATCH 04/46] feat(loop): add Ralph Playbook prompt engineering to loop context - Add loop-aware preamble to every iteration with key Ralph Playbook language patterns: "study" not "read", "don't assume not implemented", "no placeholders or stubs", and AGENTS.md self-improvement - For unstructured specs (no task headers), instruct agent to create IMPLEMENTATION_PLAN.md as first action instead of generic "implement all features" prompt - Add spec file references in iterations 2+ so agent can re-read requirements from specs/ directory - Add plan-creation reminder for later iterations without structured tasks - Use playbook language in structured spec prompt too Co-Authored-By: Claude Opus 4.6 --- src/commands/run.ts | 28 +++++++++++++++++++---- src/loop/context-builder.ts | 44 +++++++++++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 13 deletions(-) diff --git a/src/commands/run.ts b/src/commands/run.ts index 58ed02e..ebbff76 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -455,7 +455,7 @@ export async function runCommand( writeFileSync(implementationPlanPath, extractedPlan); console.log(chalk.cyan('Created IMPLEMENTATION_PLAN.md from spec')); - finalTask = `Build the following project based on this specification: + finalTask = `Study the following specification carefully: ${sourceSpec} @@ -463,13 +463,33 @@ ${sourceSpec} An IMPLEMENTATION_PLAN.md file has been created with tasks extracted from this spec. As you complete each task, mark it done by changing [ ] to [x] in IMPLEMENTATION_PLAN.md. -Focus on one task at a time.`; +Focus on ONE task at a time. Don't assume functionality is not already implemented — search the codebase first. +Implement completely — no placeholders or stubs.`; } else { - finalTask = `Build the following project based on this specification: + finalTask = `Study the following specification carefully: ${sourceSpec} -Analyze the specification and implement all required features. Create a proper project structure with all necessary files.`; +## Getting Started + +IMPORTANT: Before writing any code, you MUST first: +1. Study the specification above thoroughly +2. Search the codebase — don't assume functionality is not already implemented +3. Create an IMPLEMENTATION_PLAN.md file with tasks broken down as: + +### Task 1: [name] +- [ ] Subtask a +- [ ] Subtask b + +### Task 2: [name] +- [ ] Subtask a + +Break the spec into 3-8 logical tasks, sorted by priority. + +4. Then start working on Task 1 only. + +As you complete each subtask, mark it done by changing [ ] to [x] in IMPLEMENTATION_PLAN.md. +Focus on ONE task at a time. Implement completely — no placeholders or stubs.`; } console.log(chalk.cyan('Using fetched specification as task')); } diff --git a/src/loop/context-builder.ts b/src/loop/context-builder.ts index 2504d0f..99ca43e 100644 --- a/src/loop/context-builder.ts +++ b/src/loop/context-builder.ts @@ -143,20 +143,44 @@ export function buildIterationContext(opts: ContextBuildOptions): BuiltContext { const debugParts: string[] = []; let prompt: string; - // No structured tasks — just pass the task as-is + // Loop-aware preamble — gives the agent behavioral context per Ralph Playbook patterns + const preamble = `You are a coding agent in an autonomous development loop (iteration ${iteration}/${opts.maxIterations}). + +Rules: +- Study IMPLEMENTATION_PLAN.md and work on ONE task at a time +- Mark each subtask [x] in IMPLEMENTATION_PLAN.md immediately when done +- Study specs/ directory for original requirements +- Don't assume functionality is not already implemented — search the codebase first +- Implement completely — no placeholders or stubs +- When ALL tasks are complete, explicitly state "All tasks completed" +- If you learn how to run/build the project, update AGENTS.md +`; + + // No structured tasks — pass the task with preamble if (!currentTask || totalTasks === 0) { - prompt = taskWithSkills; + if (iteration > 1) { + // Later iterations without structured tasks — remind agent to create a plan + prompt = `${preamble} +Continue working on the project. +If you haven't already, create an IMPLEMENTATION_PLAN.md with structured tasks. +Study the specs/ directory for the original specification. + +${taskWithSkills}`; + } else { + prompt = `${preamble}\n${taskWithSkills}`; + } if (validationFeedback) { const compressed = compressValidationFeedback(validationFeedback); prompt = `${prompt}\n\n${compressed}`; } debugParts.push('mode=raw (no structured tasks)'); } else if (iteration === 1) { - // Iteration 1: Full context — spec + skills + full current task details + // Iteration 1: Full context — preamble + spec + skills + full current task details const taskNum = completedTasks + 1; const subtasksList = currentTask.subtasks?.map((st) => `- [ ] ${st.name}`).join('\n') || ''; - prompt = `${taskWithSkills} + prompt = `${preamble} +${taskWithSkills} ## Current Task (${taskNum}/${totalTasks}): ${currentTask.name} @@ -166,12 +190,13 @@ ${subtasksList} Complete these subtasks, then mark them done in IMPLEMENTATION_PLAN.md by changing [ ] to [x].`; debugParts.push('mode=full (iteration 1)'); - debugParts.push(`included: full spec + skills + task ${taskNum}/${totalTasks}`); + debugParts.push(`included: preamble + full spec + skills + task ${taskNum}/${totalTasks}`); } else if (iteration <= 3) { - // Iterations 2-3: Trimmed plan context + abbreviated spec reference + // Iterations 2-3: Preamble + trimmed plan context + spec reference const planContext = buildTrimmedPlanContext(currentTask, taskInfo); - prompt = `Continue working on the project. Check IMPLEMENTATION_PLAN.md for full progress. + prompt = `${preamble} +Continue working on the project. Study specs/ for requirements if needed. Check IMPLEMENTATION_PLAN.md for full progress. ${planContext}`; @@ -185,10 +210,11 @@ ${planContext}`; debugParts.push(`mode=trimmed (iteration ${iteration})`); debugParts.push(`excluded: full spec, skills`); } else { - // Iterations 4+: Minimal context — just current task + // Iterations 4+: Preamble + minimal context const planContext = buildTrimmedPlanContext(currentTask, taskInfo); - prompt = `Continue working on the project. + prompt = `${preamble} +Continue working on the project. Specs in specs/. Check IMPLEMENTATION_PLAN.md for progress. ${planContext}`; From d60f0afd84a1a0a10b98991f7e0bf880f85d3550 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 10:45:45 +0000 Subject: [PATCH 05/46] fix(ux): improve loop UX with ASCII art, smart directory, and calm warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Show Ralph ASCII art in run command via showWelcomeCompact() instead of plain text header - Smart project location: detect existing project markers (package.json, .git, Cargo.toml, etc.) and default to "Current directory" when found - Fix type:'list' → type:'select' for inquirer v13 compatibility in project location prompt (same bug fixed across 8 files previously) - Replace scary [WARNING] silence message with calm chalk.dim status: "Agent is thinking..." at 30s, "Still working..." at 60s Co-Authored-By: Claude Opus 4.6 --- src/commands/run.ts | 41 +++++++++++++++++++++++++++++++---------- src/loop/agents.ts | 20 ++++++++++++-------- 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/src/commands/run.ts b/src/commands/run.ts index 58ed02e..7166164 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -20,6 +20,7 @@ import { formatPresetsHelp, getPreset, type PresetConfig } from '../presets/inde import { autoInstallSkillsFromTask } from '../skills/auto-install.js'; import { getSourceDefaults } from '../sources/config.js'; import { fetchFromSource } from '../sources/index.js'; +import { showWelcomeCompact } from '../wizard/ui.js'; /** Default fallback repo for GitHub issues when no project is specified */ const DEFAULT_GITHUB_ISSUES_REPO = 'multivmlabs/ralph-ideas'; @@ -250,10 +251,7 @@ export async function runCommand( } } - console.log(); - console.log(chalk.cyan.bold('ralph-starter')); - console.log(chalk.dim('Ralph Wiggum made easy')); - console.log(); + showWelcomeCompact(); // Check for git repo if (options.commit || options.push || options.pr) { @@ -337,16 +335,39 @@ export async function runCommand( const isIntegrationSource = integrationSources.includes(options.from?.toLowerCase() || ''); if (isIntegrationSource && !options.auto && !options.outputDir) { + // Detect existing project markers to choose smart default ordering + const projectMarkers = [ + 'package.json', + '.git', + 'Cargo.toml', + 'go.mod', + 'pyproject.toml', + 'requirements.txt', + 'Gemfile', + 'pom.xml', + 'build.gradle', + ]; + const hasProjectMarkers = projectMarkers.some((f) => existsSync(join(cwd, f))); + + // If existing project detected, default to "Current directory" first + const choices = hasProjectMarkers + ? [ + { name: `Current directory (${cwd})`, value: 'current' }, + { name: 'Create new project folder', value: 'new' }, + { name: 'Enter custom path', value: 'custom' }, + ] + : [ + { name: 'Create new project folder', value: 'new' }, + { name: `Current directory (${cwd})`, value: 'current' }, + { name: 'Enter custom path', value: 'custom' }, + ]; + const { projectLocation } = await inquirer.prompt([ { - type: 'list', + type: 'select', name: 'projectLocation', message: 'Where do you want to run this task?', - choices: [ - { name: `Current directory (${cwd})`, value: 'current' }, - { name: 'Create new project folder', value: 'new' }, - { name: 'Enter custom path', value: 'custom' }, - ], + choices, }, ]); diff --git a/src/loop/agents.ts b/src/loop/agents.ts index e772cce..7640ace 100644 --- a/src/loop/agents.ts +++ b/src/loop/agents.ts @@ -163,20 +163,24 @@ export async function runAgent( let output = ''; let stdoutBuffer = ''; - // Track data timing for debugging and silence warnings + // Track data timing for debugging and silence notifications let lastDataTime = Date.now(); let silenceWarningShown = false; + let extendedSilenceShown = false; - // Warn if no data received for 30 seconds + // Notify if no data received for 30+ seconds (calm, non-alarming) const silenceChecker = setInterval(() => { const silentMs = Date.now() - lastDataTime; - if (silentMs > 30000 && !silenceWarningShown) { + if (silentMs > 60000 && !extendedSilenceShown) { + extendedSilenceShown = true; + console.log(chalk.dim(' Still working... Use RALPH_DEBUG=1 for verbose output.')); + } else if (silentMs > 30000 && !silenceWarningShown) { silenceWarningShown = true; - console.warn('\n[WARNING] No output from agent for 30+ seconds. Claude may be:'); - console.warn(' - Processing a complex task'); - console.warn(' - Stuck/rate limited'); - console.warn(' - Waiting for something'); - console.warn('Use RALPH_DEBUG=1 for detailed output\n'); + console.log( + chalk.dim( + '\n Agent is thinking... (no output for 30s, this is normal for complex tasks)' + ) + ); } }, 5000); From d81b73ffd12e5003a7bf5dc418e8589d031fdee6 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 10:47:43 +0000 Subject: [PATCH 06/46] fix(loop): remove iteration delay and fix validation feedback mutation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Speed: - Remove unnecessary 1-second sleep between loop iterations — saves ~1s per iteration (25s on a 25-iteration loop) Bug fix: - Fix validation feedback mutation that defeated context trimming. The executor was appending compressed errors to `taskWithSkills` (line 868), accumulating old validation errors across iterations. Now stores feedback in a separate variable and passes it through the context builder's existing `validationFeedback` parameter, which was previously passed as `undefined` (dead code). The context builder already handles per-iteration compression (2000 chars for iter 2-3, 500 for 4+). Co-Authored-By: Claude Opus 4.6 --- src/loop/executor.ts | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 6d35e63..57e076f 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -23,7 +23,7 @@ import { } from '../utils/rate-limit-display.js'; import { type Agent, type AgentRunOptions, runAgent } from './agents.js'; import { CircuitBreaker, type CircuitBreakerConfig } from './circuit-breaker.js'; -import { buildIterationContext, compressValidationFeedback } from './context-builder.js'; +import { buildIterationContext } from './context-builder.js'; import { CostTracker, type CostTrackerStats, formatCost } from './cost-tracker.js'; import { estimateLoop, formatEstimateDetailed } from './estimator.js'; import { checkFileBasedCompletion, createProgressTracker, type ProgressEntry } from './progress.js'; @@ -430,6 +430,9 @@ export async function runLoop(options: LoopOptions): Promise { taskWithSkills = `${options.task}\n\n${skillsPrompt}`; } + // Track validation feedback separately — don't mutate taskWithSkills + let lastValidationFeedback = ''; + // Completion detection options const completionOptions: CompletionOptions = { completionPromise: options.completionPromise, @@ -623,7 +626,7 @@ export async function runLoop(options: LoopOptions): Promise { taskInfo, iteration: i, maxIterations, - validationFeedback: undefined, // Validation feedback handled separately below + validationFeedback: lastValidationFeedback || undefined, maxInputTokens: options.contextBudget || 0, }); const iterationTask = builtContext.prompt; @@ -863,14 +866,15 @@ export async function runLoop(options: LoopOptions): Promise { await progressTracker.appendEntry(progressEntry); } - // Continue loop with compressed validation feedback - const compressedFeedback = compressValidationFeedback(feedback); - taskWithSkills = `${taskWithSkills}\n\n${compressedFeedback}`; + // Pass validation feedback to context builder for next iteration + // (don't mutate taskWithSkills — that defeats context trimming) + lastValidationFeedback = feedback; continue; // Go to next iteration to fix issues } else { - // Validation passed - record success + // Validation passed - record success and clear feedback spinner.succeed(chalk.green(`Loop ${i}: Validation passed`)); circuitBreaker.recordSuccess(); + lastValidationFeedback = ''; } } @@ -960,9 +964,6 @@ export async function runLoop(options: LoopOptions): Promise { `Iter ${i}/${maxIterations}${taskLabel}${costLabel} │ ${elapsedMin}m ${elapsedSec}s` ) ); - - // Small delay between iterations - await new Promise((resolve) => setTimeout(resolve, 1000)); } // Post-loop actions From c2111848ce508e08e490efb4ab5c90f5681782dc Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 13:40:53 +0000 Subject: [PATCH 07/46] fix(loop): fix progress status bug, deduplicate completion detection, improve error hashing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix progress entry always recording 'completed' even for non-done iterations (was ternary with identical branches). Now records 'partial' for iterations that didn't complete. - Merge detectCompletion() and getCompletionReason() into single-pass detectCompletionWithReason() to eliminate duplicate analyzeResponse() calls per iteration. - Remove unused _validationPassed variable. - Improve circuit breaker error hashing: only normalize file:line:col locations, timestamps, hex addresses, and stack traces — preserving semantically meaningful content so different errors (e.g. "port 8000 in use" vs "file not found") hash differently. - Add 'partial' status to ProgressEntry type with status badge. - Update circuit breaker tests for new normalization behavior. Co-Authored-By: Claude Opus 4.6 --- src/loop/__tests__/circuit-breaker.test.ts | 42 +++++++++-- src/loop/circuit-breaker.ts | 12 +-- src/loop/executor.ts | 87 +++++++--------------- src/loop/progress.ts | 4 +- 4 files changed, 74 insertions(+), 71 deletions(-) diff --git a/src/loop/__tests__/circuit-breaker.test.ts b/src/loop/__tests__/circuit-breaker.test.ts index fe149b9..36f65db 100644 --- a/src/loop/__tests__/circuit-breaker.test.ts +++ b/src/loop/__tests__/circuit-breaker.test.ts @@ -181,18 +181,50 @@ describe('CircuitBreaker', () => { }); describe('error normalization', () => { - it('should treat similar errors with different numbers as the same', () => { + it('should treat errors with different file:line:col locations as the same', () => { const customBreaker = new CircuitBreaker({ maxConsecutiveFailures: 100, maxSameErrorCount: 3, }); - // These should hash to similar values due to number normalization - customBreaker.recordFailure('Error at line 42'); + // Same error at different file locations should hash identically + customBreaker.recordFailure('Error in src/index.ts:42:5'); customBreaker.recordSuccess(); - customBreaker.recordFailure('Error at line 99'); + customBreaker.recordFailure('Error in src/index.ts:99:12'); customBreaker.recordSuccess(); - expect(customBreaker.recordFailure('Error at line 123')).toBe(true); + expect(customBreaker.recordFailure('Error in src/index.ts:123:3')).toBe(true); + }); + + it('should treat semantically different errors as distinct', () => { + const customBreaker = new CircuitBreaker({ + maxConsecutiveFailures: 100, + maxSameErrorCount: 3, + }); + + // Different error messages should NOT hash identically + customBreaker.recordFailure('port 8000 already in use'); + customBreaker.recordSuccess(); + customBreaker.recordFailure('file not found: config.json'); + customBreaker.recordSuccess(); + // Third unique error — should NOT trip (only 1 of each) + expect(customBreaker.recordFailure('permission denied: /etc/shadow')).toBe(false); + }); + + it('should normalize stack traces', () => { + const customBreaker = new CircuitBreaker({ + maxConsecutiveFailures: 100, + maxSameErrorCount: 3, + }); + + customBreaker.recordFailure('TypeError: cannot read property at Object.run (/src/a.ts:10:5)'); + customBreaker.recordSuccess(); + customBreaker.recordFailure('TypeError: cannot read property at Object.run (/src/b.ts:20:3)'); + customBreaker.recordSuccess(); + expect( + customBreaker.recordFailure( + 'TypeError: cannot read property at Object.run (/src/c.ts:30:1)' + ) + ).toBe(true); }); }); }); diff --git a/src/loop/circuit-breaker.ts b/src/loop/circuit-breaker.ts index 6734823..35bc95a 100644 --- a/src/loop/circuit-breaker.ts +++ b/src/loop/circuit-breaker.ts @@ -35,17 +35,19 @@ export class CircuitBreaker { } /** - * Hash an error message to track similar errors + * Hash an error message to track similar errors. + * Normalizes variable parts (line numbers, timestamps, hex, stack traces) + * while preserving semantically meaningful content like error messages. */ private hashError(error: string): string { - // Normalize the error by removing variable parts like line numbers, timestamps const normalized = error - .replace(/\d+/g, 'N') // Replace numbers - .replace(/0x[a-fA-F0-9]+/g, 'HEX') // Replace hex values + .replace(/0x[a-fA-F0-9]+/g, 'HEX') // Replace hex addresses .replace(/at\s+\S+\s+\(\S+:\d+:\d+\)/g, 'STACK') // Replace stack traces + .replace(/:\d+:\d+/g, ':N:N') // Replace file:line:col locations + .replace(/\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}/g, 'TIMESTAMP') // Replace timestamps .toLowerCase() .trim() - .slice(0, 500); // Limit length + .slice(0, 500); return crypto.createHash('md5').update(normalized).digest('hex').slice(0, 8); } diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 6d35e63..b8b6923 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -235,49 +235,52 @@ interface CompletionOptions { minCompletionIndicators?: number; } -function detectCompletion( +/** + * Detect completion status AND reason in a single pass. + * Avoids running analyzeResponse() twice by combining detectCompletion + getCompletionReason. + */ +function detectCompletionWithReason( output: string, options: CompletionOptions = {} -): 'done' | 'blocked' | 'continue' { +): { status: 'done' | 'blocked' | 'continue'; reason: string } { const { completionPromise, requireExitSignal = false, minCompletionIndicators = 1 } = options; // 1. Check explicit completion promise first (highest priority) if (completionPromise && output.includes(completionPromise)) { - return 'done'; + return { status: 'done', reason: `Found completion promise: "${completionPromise}"` }; } // 2. Check for COMPLETE tag if (/COMPLETE<\/promise>/i.test(output)) { - return 'done'; + return { status: 'done', reason: 'Found COMPLETE marker' }; } - // 3. Use semantic analyzer for more nuanced detection + // 3. Use semantic analyzer for more nuanced detection (single call) const analysis = analyzeResponse(output); // Check for blocked status if (analysis.stuckScore >= 0.7 && analysis.confidence !== 'low') { - return 'blocked'; + return { status: 'blocked', reason: 'Semantic analysis detected stuck state' }; } // Check blocked markers (legacy support) const upperOutput = output.toUpperCase(); for (const marker of BLOCKED_MARKERS) { if (upperOutput.includes(marker.toUpperCase())) { - return 'blocked'; + return { status: 'blocked', reason: `Found blocked marker: "${marker}"` }; } } - // Check for explicit EXIT_SIGNAL + // Check for explicit EXIT_SIGNAL (single call) const hasExplicitSignal = hasExitSignal(output); // If exit signal is required, check for it if (requireExitSignal) { if (hasExplicitSignal && analysis.indicators.completion.length >= minCompletionIndicators) { - return 'done'; + return { status: 'done', reason: 'Found EXIT_SIGNAL: true' }; } - // Continue if no explicit signal if (!hasExplicitSignal) { - return 'continue'; + return { status: 'continue', reason: '' }; } } @@ -286,61 +289,26 @@ function detectCompletion( analysis.completionScore >= 0.7 && analysis.indicators.completion.length >= minCompletionIndicators ) { - return 'done'; + const indicators = analysis.indicators.completion.slice(0, 3); + return { + status: 'done', + reason: `Semantic analysis (${Math.round(analysis.completionScore * 100)}% confident): ${indicators.join(', ')}`, + }; } // Explicit exit signals always count if (hasExplicitSignal) { - return 'done'; + return { status: 'done', reason: 'Found EXIT_SIGNAL: true' }; } // Legacy marker support for (const marker of COMPLETION_MARKERS) { if (upperOutput.includes(marker.toUpperCase())) { - return 'done'; + return { status: 'done', reason: `Found completion marker: "${marker}"` }; } } - return 'continue'; -} - -/** - * Get human-readable reason for completion (UX 3) - */ -function getCompletionReason(output: string, options: CompletionOptions): string { - const { completionPromise } = options; - - // Check explicit completion promise first - if (completionPromise && output.includes(completionPromise)) { - return `Found completion promise: "${completionPromise}"`; - } - - // Check for COMPLETE tag - if (/COMPLETE<\/promise>/i.test(output)) { - return 'Found COMPLETE marker'; - } - - // Check for explicit EXIT_SIGNAL - if (hasExitSignal(output)) { - return 'Found EXIT_SIGNAL: true'; - } - - // Check completion markers - const upperOutput = output.toUpperCase(); - for (const marker of COMPLETION_MARKERS) { - if (upperOutput.includes(marker.toUpperCase())) { - return `Found completion marker: "${marker}"`; - } - } - - // Use semantic analysis - const analysis = analyzeResponse(output); - if (analysis.completionScore >= 0.7) { - const indicators = analysis.indicators.completion.slice(0, 3); - return `Semantic analysis (${Math.round(analysis.completionScore * 100)}% confident): ${indicators.join(', ')}`; - } - - return 'Task marked as complete by agent'; + return { status: 'continue', reason: '' }; } function summarizeChanges(output: string): string { @@ -685,8 +653,9 @@ export async function runLoop(options: LoopOptions): Promise { costTracker.recordIteration(options.task, result.output); } - // Check for completion using enhanced detection - let status = detectCompletion(result.output, completionOptions); + // Check for completion using enhanced detection (single-pass: status + reason) + const completionResult = detectCompletionWithReason(result.output, completionOptions); + let status = completionResult.status; // Verify completion - check if files were actually changed if (status === 'done' && i === 1) { @@ -798,7 +767,6 @@ export async function runLoop(options: LoopOptions): Promise { } // Run validation (backpressure) if enabled and there are changes - let _validationPassed = true; let validationResults: ValidationResult[] = []; if (validationCommands.length > 0 && (await hasUncommittedChanges(options.cwd))) { @@ -808,7 +776,6 @@ export async function runLoop(options: LoopOptions): Promise { const allPassed = validationResults.every((r) => r.success); if (!allPassed) { - _validationPassed = false; validationFailures++; const feedback = formatValidationFeedback(validationResults); spinner.fail(chalk.red(`Loop ${i}: Validation failed`)); @@ -905,7 +872,7 @@ export async function runLoop(options: LoopOptions): Promise { // Update progress entry if (progressTracker && progressEntry) { - progressEntry.status = status === 'done' ? 'completed' : 'completed'; + progressEntry.status = status === 'done' ? 'completed' : 'partial'; progressEntry.summary = summarizeChanges(result.output); progressEntry.validationResults = validationResults.length > 0 ? validationResults : undefined; @@ -923,7 +890,7 @@ export async function runLoop(options: LoopOptions): Promise { } if (status === 'done') { - const completionReason = getCompletionReason(result.output, completionOptions); + const completionReason = completionResult.reason || 'Task marked as complete by agent'; const duration = Date.now() - startTime; const minutes = Math.floor(duration / 60000); const seconds = Math.floor((duration % 60000) / 1000); diff --git a/src/loop/progress.ts b/src/loop/progress.ts index c0d6f04..e00e04a 100644 --- a/src/loop/progress.ts +++ b/src/loop/progress.ts @@ -6,7 +6,7 @@ import type { ValidationResult } from './validation.js'; export interface ProgressEntry { timestamp: string; iteration: number; - status: 'started' | 'completed' | 'failed' | 'blocked' | 'validation_failed'; + status: 'started' | 'completed' | 'partial' | 'failed' | 'blocked' | 'validation_failed'; summary: string; validationResults?: ValidationResult[]; commitHash?: string; @@ -86,6 +86,8 @@ function getStatusBadge(status: ProgressEntry['status']): string { return '🔄 Started'; case 'completed': return '✅ Completed'; + case 'partial': + return '🔶 Partial'; case 'failed': return '❌ Failed'; case 'blocked': From 934059d71954d5847220f70d68ba5f2e9990c303 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 13:42:32 +0000 Subject: [PATCH 08/46] perf(loop): memoize parsePlanTasks with mtime cache, parallelize agent detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add mtime-based caching to parsePlanTasks() — the same IMPLEMENTATION_PLAN.md file was being read and regex-parsed 4 times per iteration (init, progress check, completion check, display). The cache returns the stored result if the file's mtimeMs hasn't changed, eliminating ~75 redundant file reads across a 25-iteration loop. - Parallelize agent detection in detectAvailableAgents() — each agent check spawns an independent subprocess (e.g. `claude --version`). Running them with Promise.all() instead of sequential for/of cuts startup time from ~2-3s to <1s. Co-Authored-By: Claude Opus 4.6 --- src/loop/agents.ts | 18 ++++++++---------- src/loop/task-counter.ts | 28 ++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/loop/agents.ts b/src/loop/agents.ts index e772cce..82fb482 100644 --- a/src/loop/agents.ts +++ b/src/loop/agents.ts @@ -63,21 +63,19 @@ export async function checkAgentAvailable(type: AgentType): Promise { } export async function detectAvailableAgents(): Promise { - const agents: Agent[] = []; + const entries = Object.entries(AGENTS).filter(([type]) => type !== 'unknown'); - for (const [type, config] of Object.entries(AGENTS)) { - if (type === 'unknown') continue; - - const available = await checkAgentAvailable(type as AgentType); - agents.push({ + // Check all agents in parallel — each spawns an independent subprocess + const results = await Promise.all( + entries.map(async ([type, config]) => ({ type: type as AgentType, name: config.name, command: config.command, - available, - }); - } + available: await checkAgentAvailable(type as AgentType), + })) + ); - return agents; + return results; } export async function detectBestAgent(): Promise { diff --git a/src/loop/task-counter.ts b/src/loop/task-counter.ts index ff928d6..7872816 100644 --- a/src/loop/task-counter.ts +++ b/src/loop/task-counter.ts @@ -1,6 +1,9 @@ -import { existsSync, readFileSync } from 'node:fs'; +import { existsSync, readFileSync, statSync } from 'node:fs'; import { join } from 'node:path'; +/** Mtime-based cache for parsePlanTasks to avoid redundant file reads within the same iteration */ +let _planCache: { path: string; mtimeMs: number; result: TaskCount } | null = null; + export interface PlanTask { name: string; completed: boolean; @@ -24,9 +27,20 @@ export function parsePlanTasks(cwd: string): TaskCount { const planPath = join(cwd, 'IMPLEMENTATION_PLAN.md'); if (!existsSync(planPath)) { + _planCache = null; return { total: 0, completed: 0, pending: 0, tasks: [] }; } + // Return cached result if file hasn't changed (avoids redundant reads within same iteration) + try { + const mtimeMs = statSync(planPath).mtimeMs; + if (_planCache && _planCache.path === planPath && _planCache.mtimeMs === mtimeMs) { + return _planCache.result; + } + } catch { + // stat failed — fall through to full parse + } + try { const content = readFileSync(planPath, 'utf-8'); const lines = content.split('\n'); @@ -109,13 +123,23 @@ export function parsePlanTasks(cwd: string): TaskCount { const completed = tasks.filter((t) => t.completed).length; const pending = tasks.filter((t) => !t.completed).length; - return { + const result: TaskCount = { total: tasks.length, completed, pending, tasks, }; + + // Cache result with mtime for subsequent calls within the same iteration + try { + _planCache = { path: planPath, mtimeMs: statSync(planPath).mtimeMs, result }; + } catch { + // stat failed — skip caching + } + + return result; } catch { + _planCache = null; return { total: 0, completed: 0, pending: 0, tasks: [] }; } } From 2689d9e821d58bda6daa0df78411e401e7e96c08 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 13:46:13 +0000 Subject: [PATCH 09/46] feat(loop): add cost ceiling, run all validators, output size limit, configurable timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Safety: - Add maxCost option to CostTracker and LoopOptions — the loop checks isOverBudget() before each iteration and exits with 'cost_ceiling' reason if exceeded. Prevents unexpected charges on long-running loops. - Add output size limit (default 50MB) in agent runner — truncates to last 80% of buffer if exceeded, preventing OOM from verbose agent output. UX: - Run all validation commands instead of stopping at first failure — the agent now sees lint AND test AND build failures in a single pass, enabling multi-fix iterations instead of fix-one-rerun-fix-another chains. Configuration: - Add agentTimeout option to LoopOptions (default: 5 min) — propagated to agent runner's timeoutMs. Complex tasks can set longer timeouts. - Add 'cost_ceiling' to LoopResult exit reasons. - Add 'partial' status to ProgressEntry for non-done iterations. Co-Authored-By: Claude Opus 4.6 --- src/loop/__tests__/cost-tracker.test.ts | 30 +++++++++++++++++++++++++ src/loop/__tests__/validation.test.ts | 5 +++-- src/loop/agents.ts | 25 +++++++++++++++++++-- src/loop/cost-tracker.ts | 15 +++++++++++++ src/loop/executor.ts | 22 +++++++++++++++++- src/loop/validation.ts | 9 +++----- 6 files changed, 95 insertions(+), 11 deletions(-) diff --git a/src/loop/__tests__/cost-tracker.test.ts b/src/loop/__tests__/cost-tracker.test.ts index b955772..c946d55 100644 --- a/src/loop/__tests__/cost-tracker.test.ts +++ b/src/loop/__tests__/cost-tracker.test.ts @@ -233,6 +233,36 @@ describe('cost-tracker', () => { }); }); + describe('isOverBudget', () => { + it('should return null when no maxCost is set', () => { + tracker.recordIteration('input', 'output'); + expect(tracker.isOverBudget()).toBeNull(); + }); + + it('should return null when under budget', () => { + const budgetTracker = new CostTracker({ + model: 'claude-3-sonnet', + maxCost: 100, // $100 budget + }); + budgetTracker.recordIteration('input', 'output'); + expect(budgetTracker.isOverBudget()).toBeNull(); + }); + + it('should return budget info when over budget', () => { + const budgetTracker = new CostTracker({ + model: 'claude-3-sonnet', + maxCost: 0.0001, // Extremely low budget + }); + // Record enough iterations to exceed tiny budget + budgetTracker.recordIteration('a'.repeat(10000), 'b'.repeat(10000)); + + const result = budgetTracker.isOverBudget(); + expect(result).not.toBeNull(); + expect(result?.maxCost).toBe(0.0001); + expect(result?.currentCost).toBeGreaterThan(0); + }); + }); + describe('model pricing', () => { it('should use default pricing for unknown models', () => { const unknownTracker = new CostTracker({ model: 'unknown-model' }); diff --git a/src/loop/__tests__/validation.test.ts b/src/loop/__tests__/validation.test.ts index f46a90c..d565522 100644 --- a/src/loop/__tests__/validation.test.ts +++ b/src/loop/__tests__/validation.test.ts @@ -228,7 +228,7 @@ describe('validation', () => { expect(results.every((r) => r.success)).toBe(true); }); - it('should stop on first failure', async () => { + it('should run all commands even when some fail', async () => { mockExeca .mockResolvedValueOnce({ exitCode: 0, stdout: 'Passed', stderr: '' } as any) .mockResolvedValueOnce({ exitCode: 1, stdout: '', stderr: 'Failed' } as any) @@ -242,9 +242,10 @@ describe('validation', () => { const results = await runAllValidations('/test/dir', commands); - expect(results).toHaveLength(2); // Stopped after lint failed + expect(results).toHaveLength(3); // All commands run expect(results[0].success).toBe(true); expect(results[1].success).toBe(false); + expect(results[2].success).toBe(true); }); it('should handle empty command list', async () => { diff --git a/src/loop/agents.ts b/src/loop/agents.ts index e772cce..fb9979a 100644 --- a/src/loop/agents.ts +++ b/src/loop/agents.ts @@ -20,6 +20,10 @@ export interface AgentRunOptions { streamOutput?: boolean; /** Callback for each line of output */ onOutput?: (line: string) => void; + /** Agent timeout in milliseconds (default: 300000 = 5 min) */ + timeoutMs?: number; + /** Maximum output size in bytes before truncating (default: 50MB) */ + maxOutputBytes?: number; } const AGENTS: Record = { @@ -161,7 +165,10 @@ export async function runAgent( }); let output = ''; + let outputBytes = 0; let stdoutBuffer = ''; + let outputTruncated = false; + const maxOutputBytes = options.maxOutputBytes || 50 * 1024 * 1024; // Default 50MB // Track data timing for debugging and silence warnings let lastDataTime = Date.now(); @@ -180,8 +187,8 @@ export async function runAgent( } }, 5000); - // Timeout: 5 minutes for actual work - const timeoutMs = 300000; + // Configurable timeout (default: 5 minutes) + const timeoutMs = options.timeoutMs || 300000; const timeout = setTimeout(() => { clearInterval(silenceChecker); if (process.env.RALPH_DEBUG) { @@ -195,6 +202,20 @@ export async function runAgent( // Process stdout line-by-line for real-time updates proc.stdout?.on('data', (data: Buffer) => { const chunk = data.toString(); + outputBytes += data.byteLength; + + // Guard against unbounded memory growth — keep last portion if over limit + if (outputBytes > maxOutputBytes && !outputTruncated) { + outputTruncated = true; + const keepBytes = Math.floor(maxOutputBytes * 0.8); + output = output.slice(-keepBytes); + if (process.env.RALPH_DEBUG) { + console.error( + `[DEBUG] Output exceeded ${maxOutputBytes} bytes, truncating to last ${keepBytes}` + ); + } + } + output += chunk; stdoutBuffer += chunk; lastDataTime = Date.now(); diff --git a/src/loop/cost-tracker.ts b/src/loop/cost-tracker.ts index 97700af..bf88555 100644 --- a/src/loop/cost-tracker.ts +++ b/src/loop/cost-tracker.ts @@ -92,6 +92,8 @@ export interface CostTrackerStats { export interface CostTrackerConfig { model: string; maxIterations?: number; + /** Maximum cost in USD before the loop should stop (0 = unlimited) */ + maxCost?: number; } /** @@ -359,6 +361,19 @@ ${stats.totalCacheSavings > 0 ? `| Cache Savings | ${formatCost(stats.totalCache `; } + /** + * Check if accumulated cost exceeds the configured budget. + * Returns the budget and current total if over, null otherwise. + */ + isOverBudget(): { maxCost: number; currentCost: number } | null { + if (!this.config.maxCost || this.config.maxCost <= 0) return null; + const total = this.iterations.reduce((sum, i) => sum + i.cost.totalCost, 0); + if (total >= this.config.maxCost) { + return { maxCost: this.config.maxCost, currentCost: total }; + } + return null; + } + /** * Get the last iteration's cost */ diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 6d35e63..ff335c8 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -190,6 +190,8 @@ export interface LoopOptions { trackCost?: boolean; // Track token usage and cost model?: string; // Model name for cost estimation contextBudget?: number; // Max input tokens per iteration (0 = unlimited) + maxCost?: number; // Maximum cost in USD before stopping (0 = unlimited) + agentTimeout?: number; // Agent timeout in milliseconds (default: 300000 = 5 min) } export interface LoopResult { @@ -203,7 +205,8 @@ export interface LoopResult { | 'max_iterations' | 'circuit_breaker' | 'rate_limit' - | 'file_signal'; + | 'file_signal' + | 'cost_ceiling'; stats?: { totalDuration: number; avgIterationDuration: number; @@ -416,6 +419,7 @@ export async function runLoop(options: LoopOptions): Promise { ? new CostTracker({ model: options.model || 'claude-3-sonnet', maxIterations: maxIterations, + maxCost: options.maxCost, }) : null; @@ -541,6 +545,21 @@ export async function runLoop(options: LoopOptions): Promise { } } + // Check cost ceiling before starting iteration + if (costTracker) { + const overBudget = costTracker.isOverBudget(); + if (overBudget) { + console.log( + chalk.red( + `\n Cost ceiling reached: ${formatCost(overBudget.currentCost)} >= ${formatCost(overBudget.maxCost)} budget` + ) + ); + finalIteration = i - 1; + exitReason = 'cost_ceiling'; + break; + } + } + // Log iteration warnings const progressPercent = (i / maxIterations) * 100; if (progressPercent >= 90 && progressPercent < 95) { @@ -653,6 +672,7 @@ export async function runLoop(options: LoopOptions): Promise { auto: options.auto, // maxTurns removed - was causing issues, match wizard behavior streamOutput: !!process.env.RALPH_DEBUG, // Show raw JSON when debugging + timeoutMs: options.agentTimeout, onOutput: (line: string) => { const step = detectStepFromOutput(line); if (step) { diff --git a/src/loop/validation.ts b/src/loop/validation.ts index 0f3962c..c419651 100644 --- a/src/loop/validation.ts +++ b/src/loop/validation.ts @@ -129,7 +129,9 @@ export async function runValidation( } /** - * Run all validation commands + * Run all validation commands. + * Runs every command regardless of individual failures — this gives the agent + * a complete picture of all issues, enabling multi-fix iterations. */ export async function runAllValidations( cwd: string, @@ -140,11 +142,6 @@ export async function runAllValidations( for (const command of commands) { const result = await runValidation(cwd, command); results.push(result); - - // Stop on first failure - if (!result.success) { - break; - } } return results; From 30ed661c507b458d3d7b9a4c62d264e8300c3e51 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 15:30:43 +0000 Subject: [PATCH 10/46] fix(circuit-breaker): normalize timestamps before :line:col pattern Move timestamp regex before the :\d+:\d+ replacement. Previously, a timestamp like "14:07:39" would match :\d+:\d+ first, mangling it to "14:N:N" so the timestamp regex could never match. This caused same errors with different timestamps to hash differently. Co-Authored-By: Claude Opus 4.6 --- src/loop/__tests__/circuit-breaker.test.ts | 14 ++++++++++++++ src/loop/circuit-breaker.ts | 4 +++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/loop/__tests__/circuit-breaker.test.ts b/src/loop/__tests__/circuit-breaker.test.ts index 36f65db..3a299da 100644 --- a/src/loop/__tests__/circuit-breaker.test.ts +++ b/src/loop/__tests__/circuit-breaker.test.ts @@ -226,5 +226,19 @@ describe('CircuitBreaker', () => { ) ).toBe(true); }); + + it('should normalize timestamps correctly (before :line:col pattern)', () => { + const customBreaker = new CircuitBreaker({ + maxConsecutiveFailures: 100, + maxSameErrorCount: 3, + }); + + // Same error with different timestamps should hash identically + customBreaker.recordFailure('Error at 2026-02-13T14:07:39 in module'); + customBreaker.recordSuccess(); + customBreaker.recordFailure('Error at 2026-02-13T15:22:01 in module'); + customBreaker.recordSuccess(); + expect(customBreaker.recordFailure('Error at 2026-02-14T09:00:00 in module')).toBe(true); + }); }); }); diff --git a/src/loop/circuit-breaker.ts b/src/loop/circuit-breaker.ts index 35bc95a..26e1330 100644 --- a/src/loop/circuit-breaker.ts +++ b/src/loop/circuit-breaker.ts @@ -40,11 +40,13 @@ export class CircuitBreaker { * while preserving semantically meaningful content like error messages. */ private hashError(error: string): string { + // Order matters: timestamps must be normalized before :line:col, otherwise + // "14:07:39" in a timestamp matches :\d+:\d+ and gets mangled first. const normalized = error .replace(/0x[a-fA-F0-9]+/g, 'HEX') // Replace hex addresses .replace(/at\s+\S+\s+\(\S+:\d+:\d+\)/g, 'STACK') // Replace stack traces + .replace(/\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}/g, 'TIMESTAMP') // Replace timestamps (before :line:col) .replace(/:\d+:\d+/g, ':N:N') // Replace file:line:col locations - .replace(/\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}/g, 'TIMESTAMP') // Replace timestamps .toLowerCase() .trim() .slice(0, 500); From ac4740898381ae73f0459abaf06fa42be20dde06 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 15:31:38 +0000 Subject: [PATCH 11/46] fix(task-counter): add double-stat guard against TOCTOU race in plan cache The file could change between stat (cache check) and readFileSync. Now stat before and after reading: only cache if both mtimes match, preventing stale content from being cached with a new mtime. Co-Authored-By: Claude Opus 4.6 --- src/loop/task-counter.ts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/loop/task-counter.ts b/src/loop/task-counter.ts index 7872816..68b1d75 100644 --- a/src/loop/task-counter.ts +++ b/src/loop/task-counter.ts @@ -32,9 +32,10 @@ export function parsePlanTasks(cwd: string): TaskCount { } // Return cached result if file hasn't changed (avoids redundant reads within same iteration) + let preMtime = 0; try { - const mtimeMs = statSync(planPath).mtimeMs; - if (_planCache && _planCache.path === planPath && _planCache.mtimeMs === mtimeMs) { + preMtime = statSync(planPath).mtimeMs; + if (_planCache && _planCache.path === planPath && _planCache.mtimeMs === preMtime) { return _planCache.result; } } catch { @@ -130,9 +131,12 @@ export function parsePlanTasks(cwd: string): TaskCount { tasks, }; - // Cache result with mtime for subsequent calls within the same iteration + // Cache result only if file wasn't modified during parsing (double-stat guard) try { - _planCache = { path: planPath, mtimeMs: statSync(planPath).mtimeMs, result }; + const postMtime = statSync(planPath).mtimeMs; + if (postMtime === preMtime) { + _planCache = { path: planPath, mtimeMs: postMtime, result }; + } } catch { // stat failed — skip caching } From af96514ced007ee34bf6298bc30a000e11fe2e32 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 15:34:11 +0000 Subject: [PATCH 12/46] fix: make output truncation repeatable and include stderr in byte accounting Addresses PR #185 review feedback: - Remove outputTruncated flag so truncation can fire more than once - Reset outputBytes after truncation to prevent counter drift - Include stderr data in byte accounting Co-Authored-By: Claude Opus 4.6 --- src/loop/agents.ts | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/loop/agents.ts b/src/loop/agents.ts index fb9979a..f67ede4 100644 --- a/src/loop/agents.ts +++ b/src/loop/agents.ts @@ -167,7 +167,6 @@ export async function runAgent( let output = ''; let outputBytes = 0; let stdoutBuffer = ''; - let outputTruncated = false; const maxOutputBytes = options.maxOutputBytes || 50 * 1024 * 1024; // Default 50MB // Track data timing for debugging and silence warnings @@ -204,14 +203,15 @@ export async function runAgent( const chunk = data.toString(); outputBytes += data.byteLength; - // Guard against unbounded memory growth — keep last portion if over limit - if (outputBytes > maxOutputBytes && !outputTruncated) { - outputTruncated = true; + // Guard against unbounded memory growth — keep last portion if over limit. + // Repeatable: no flag gate, so output stays bounded even with continuous streaming. + if (outputBytes > maxOutputBytes) { const keepBytes = Math.floor(maxOutputBytes * 0.8); output = output.slice(-keepBytes); + outputBytes = Buffer.byteLength(output); // Reset counter to actual buffer size if (process.env.RALPH_DEBUG) { console.error( - `[DEBUG] Output exceeded ${maxOutputBytes} bytes, truncating to last ${keepBytes}` + `[DEBUG] Output exceeded ${maxOutputBytes} bytes, truncated to ~${outputBytes}` ); } } @@ -246,6 +246,7 @@ export async function runAgent( proc.stderr?.on('data', (data: Buffer) => { const chunk = data.toString(); + outputBytes += data.byteLength; // Include stderr in byte accounting output += chunk; // Debug: log stderr output if (process.env.RALPH_DEBUG) { From ca1852ccd31f7006afcec0dedd272115b28562b5 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 16:07:41 +0000 Subject: [PATCH 13/46] fix: use full Ralph ASCII art in run command instead of compact version The compact RALPH_WELCOME_SMALL looked out of place compared to the full RALPH_FULL art used in the wizard. Use showWelcome() consistently. Co-Authored-By: Claude Opus 4.6 --- src/commands/run.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/commands/run.ts b/src/commands/run.ts index 7166164..de067d3 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -20,7 +20,7 @@ import { formatPresetsHelp, getPreset, type PresetConfig } from '../presets/inde import { autoInstallSkillsFromTask } from '../skills/auto-install.js'; import { getSourceDefaults } from '../sources/config.js'; import { fetchFromSource } from '../sources/index.js'; -import { showWelcomeCompact } from '../wizard/ui.js'; +import { showWelcome } from '../wizard/ui.js'; /** Default fallback repo for GitHub issues when no project is specified */ const DEFAULT_GITHUB_ISSUES_REPO = 'multivmlabs/ralph-ideas'; @@ -251,7 +251,7 @@ export async function runCommand( } } - showWelcomeCompact(); + showWelcome(); // Check for git repo if (options.commit || options.push || options.pr) { From 33a10f2ff7cc2b11ab59edd8bc2902b6c6edbe6d Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 16:31:26 +0000 Subject: [PATCH 14/46] feat(loop): always-on build validation for greenfield projects Build validation (build + typecheck) now runs after every iteration regardless of the --validate flag. This catches broken builds early: - Missing file imports (components that don't exist yet) - PostCSS/Tailwind misconfiguration - TypeScript compilation errors Key changes: - Add detectBuildCommands() with AGENTS.md > package.json > tsc fallback - Add runBuildValidation() with 2-min timeout (vs 5-min for full) - Re-detect build commands per iteration for greenfield projects - Skip when --validate already covers build/typecheck (no double-run) - Add preamble rules: "create files before importing" + "verify compilation" Co-Authored-By: Claude Opus 4.6 --- src/loop/__tests__/validation.test.ts | 134 ++++++++++++++++++++++++++ src/loop/context-builder.ts | 26 ++++- src/loop/executor.ts | 91 ++++++++++++++++- src/loop/validation.ts | 89 +++++++++++++++++ 4 files changed, 333 insertions(+), 7 deletions(-) diff --git a/src/loop/__tests__/validation.test.ts b/src/loop/__tests__/validation.test.ts index f46a90c..1abe455 100644 --- a/src/loop/__tests__/validation.test.ts +++ b/src/loop/__tests__/validation.test.ts @@ -1,9 +1,11 @@ import { existsSync, readFileSync } from 'node:fs'; import { beforeEach, describe, expect, it, vi } from 'vitest'; import { + detectBuildCommands, detectValidationCommands, formatValidationFeedback, runAllValidations, + runBuildValidation, runValidation, type ValidationCommand, type ValidationResult, @@ -307,4 +309,136 @@ describe('validation', () => { expect(feedback).toContain('Output error'); }); }); + + describe('detectBuildCommands', () => { + it('should return empty array when no config files exist', () => { + mockExistsSync.mockReturnValue(false); + const commands = detectBuildCommands('/test/dir'); + expect(commands).toHaveLength(0); + }); + + it('should detect build command from AGENTS.md', () => { + mockExistsSync.mockImplementation((path: any) => path.toString().includes('AGENTS.md')); + mockReadFileSync.mockReturnValue('- **Build**: `pnpm run build`'); + + const commands = detectBuildCommands('/test/dir'); + + expect(commands).toHaveLength(1); + expect(commands[0].name).toBe('build'); + expect(commands[0].command).toBe('pnpm'); + expect(commands[0].args).toEqual(['run', 'build']); + }); + + it('should detect both build and typecheck from AGENTS.md', () => { + mockExistsSync.mockImplementation((path: any) => path.toString().includes('AGENTS.md')); + mockReadFileSync.mockReturnValue( + '- **Build**: `npm run build`\n- **Typecheck**: `npm run typecheck`' + ); + + const commands = detectBuildCommands('/test/dir'); + + expect(commands).toHaveLength(2); + expect(commands.map((c) => c.name)).toEqual(['build', 'typecheck']); + }); + + it('should NOT include test or lint commands from package.json', () => { + mockExistsSync.mockImplementation((path: any) => path.toString().includes('package.json')); + mockReadFileSync.mockReturnValue( + JSON.stringify({ + scripts: { test: 'vitest', lint: 'eslint .', build: 'tsc', typecheck: 'tsc --noEmit' }, + }) + ); + + const commands = detectBuildCommands('/test/dir'); + const names = commands.map((c) => c.name); + + expect(names).not.toContain('test'); + expect(names).not.toContain('lint'); + expect(names).toContain('build'); + expect(names).toContain('typecheck'); + }); + + it('should fall back to npx tsc --noEmit for TypeScript projects without build script', () => { + mockExistsSync.mockImplementation((path: any) => { + if (path.toString().includes('tsconfig.json')) return true; + if (path.toString().includes('package.json')) return true; + return false; + }); + mockReadFileSync.mockReturnValue(JSON.stringify({ scripts: {} })); + + const commands = detectBuildCommands('/test/dir'); + + expect(commands).toHaveLength(1); + expect(commands[0]).toEqual({ name: 'typecheck', command: 'npx', args: ['tsc', '--noEmit'] }); + }); + + it('should NOT use tsc fallback when build script exists', () => { + mockExistsSync.mockImplementation((path: any) => { + if (path.toString().includes('tsconfig.json')) return true; + if (path.toString().includes('package.json')) return true; + return false; + }); + mockReadFileSync.mockReturnValue(JSON.stringify({ scripts: { build: 'next build' } })); + + const commands = detectBuildCommands('/test/dir'); + + expect(commands).toHaveLength(1); + expect(commands[0].name).toBe('build'); + }); + }); + + describe('runBuildValidation', () => { + beforeEach(() => { + mockExeca.mockReset(); // Clear persistent mockResolvedValue from runAllValidations tests + }); + + it('should use 2-minute timeout', async () => { + mockExeca.mockResolvedValueOnce({ + exitCode: 0, + stdout: 'Built successfully', + stderr: '', + } as any); + + const command: ValidationCommand = { name: 'build', command: 'npm', args: ['run', 'build'] }; + await runBuildValidation('/test/dir', command); + + expect(mockExeca).toHaveBeenCalledWith('npm', ['run', 'build'], { + cwd: '/test/dir', + timeout: 120000, + reject: false, + }); + }); + + it('should return success on exit code 0', async () => { + mockExeca.mockResolvedValueOnce({ + exitCode: 0, + stdout: 'Built successfully', + stderr: '', + } as any); + + const command: ValidationCommand = { name: 'build', command: 'npm', args: ['run', 'build'] }; + const result = await runBuildValidation('/test/dir', command); + + expect(result.success).toBe(true); + }); + + it('should return failure with error on non-zero exit', async () => { + // Use same pattern as runValidation tests (which pass) + mockExeca.mockResolvedValueOnce({ + exitCode: 1, + stdout: 'Build output', + stderr: 'Cannot find module Testimonials', + } as any); + + const command: ValidationCommand = { name: 'build', command: 'npm', args: ['run', 'build'] }; + + // Verify mock is set up + expect(mockExeca).toBeDefined(); + + const result = await runBuildValidation('/test/dir', command); + + expect(result.success).toBe(false); + expect(result.error).toContain('Cannot find module'); + }); + }); }); diff --git a/src/loop/context-builder.ts b/src/loop/context-builder.ts index 2504d0f..0624786 100644 --- a/src/loop/context-builder.ts +++ b/src/loop/context-builder.ts @@ -143,9 +143,24 @@ export function buildIterationContext(opts: ContextBuildOptions): BuiltContext { const debugParts: string[] = []; let prompt: string; + // Loop-aware preamble — gives the agent behavioral context + const preamble = `You are a coding agent in an autonomous development loop (iteration ${iteration}/${opts.maxIterations}). + +Rules: +- Study IMPLEMENTATION_PLAN.md and work on ONE task at a time +- Mark each subtask [x] in IMPLEMENTATION_PLAN.md immediately when done +- Study specs/ directory for original requirements +- Don't assume functionality is not already implemented — search the codebase first +- Implement completely — no placeholders or stubs +- Create files before importing them — never import components or modules that don't exist yet +- After creating or modifying files, verify the project compiles by running the build or dev command +- When ALL tasks are complete, explicitly state "All tasks completed" +- If you learn how to run/build the project, update AGENTS.md +`; + // No structured tasks — just pass the task as-is if (!currentTask || totalTasks === 0) { - prompt = taskWithSkills; + prompt = `${preamble}\n${taskWithSkills}`; if (validationFeedback) { const compressed = compressValidationFeedback(validationFeedback); prompt = `${prompt}\n\n${compressed}`; @@ -156,7 +171,8 @@ export function buildIterationContext(opts: ContextBuildOptions): BuiltContext { const taskNum = completedTasks + 1; const subtasksList = currentTask.subtasks?.map((st) => `- [ ] ${st.name}`).join('\n') || ''; - prompt = `${taskWithSkills} + prompt = `${preamble} +${taskWithSkills} ## Current Task (${taskNum}/${totalTasks}): ${currentTask.name} @@ -171,7 +187,8 @@ Complete these subtasks, then mark them done in IMPLEMENTATION_PLAN.md by changi // Iterations 2-3: Trimmed plan context + abbreviated spec reference const planContext = buildTrimmedPlanContext(currentTask, taskInfo); - prompt = `Continue working on the project. Check IMPLEMENTATION_PLAN.md for full progress. + prompt = `${preamble} +Continue working on the project. Study specs/ for requirements if needed. Check IMPLEMENTATION_PLAN.md for full progress. ${planContext}`; @@ -188,7 +205,8 @@ ${planContext}`; // Iterations 4+: Minimal context — just current task const planContext = buildTrimmedPlanContext(currentTask, taskInfo); - prompt = `Continue working on the project. + prompt = `${preamble} +Continue working on the project. Specs in specs/. Check IMPLEMENTATION_PLAN.md for progress. ${planContext}`; diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 6d35e63..b3cb6dd 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -33,9 +33,11 @@ import { detectClaudeSkills, formatSkillsForPrompt } from './skills.js'; import { detectStepFromOutput } from './step-detector.js'; import { getCurrentTask, parsePlanTasks } from './task-counter.js'; import { + detectBuildCommands, detectValidationCommands, formatValidationFeedback, runAllValidations, + runBuildValidation, type ValidationResult, } from './validation.js'; @@ -422,6 +424,10 @@ export async function runLoop(options: LoopOptions): Promise { // Detect validation commands if validation is enabled const validationCommands = options.validate ? detectValidationCommands(options.cwd) : []; + // Always-on build validation (not gated by --validate flag) + // Re-detected inside the loop for greenfield projects where package.json appears mid-loop + let buildCommands = detectBuildCommands(options.cwd); + // Detect Claude Code skills const detectedSkills = detectClaudeSkills(options.cwd); let taskWithSkills = options.task; @@ -797,8 +803,88 @@ export async function runLoop(options: LoopOptions): Promise { }; } - // Run validation (backpressure) if enabled and there are changes - let _validationPassed = true; + // --- Always-on build validation --- + // Re-detect build commands if none found yet (greenfield: package.json may appear mid-loop) + if (buildCommands.length === 0) { + buildCommands = detectBuildCommands(options.cwd); + if (buildCommands.length > 0 && process.env.RALPH_DEBUG) { + console.error( + `[DEBUG] Build commands detected: ${buildCommands.map((c) => c.name).join(', ')}` + ); + } + } + + // Run build validation if commands available and not already covered by full validation + const buildCoveredByFullValidation = + options.validate && + validationCommands.some((vc) => vc.name === 'build' || vc.name === 'typecheck'); + + if ( + buildCommands.length > 0 && + !buildCoveredByFullValidation && + (await hasUncommittedChanges(options.cwd)) + ) { + spinner.start(chalk.yellow(`Loop ${i}: Running build check...`)); + + const buildResults: ValidationResult[] = []; + for (const cmd of buildCommands) { + buildResults.push(await runBuildValidation(options.cwd, cmd)); + } + const allBuildsPassed = buildResults.every((r) => r.success); + + if (!allBuildsPassed) { + validationFailures++; + const feedback = formatValidationFeedback(buildResults); + spinner.fail(chalk.red(`Loop ${i}: Build check failed`)); + + const failedSummaries: string[] = []; + for (const vr of buildResults) { + if (!vr.success) { + const errorText = vr.error || vr.output || ''; + const errorCount = (errorText.match(/error/gi) || []).length; + const hint = errorCount > 0 ? `${errorCount} errors` : 'failed'; + failedSummaries.push(`${vr.command} (${hint})`); + } + } + console.log(chalk.red(` ✗ ${failedSummaries.join(' │ ')}`)); + + const errorMsg = buildResults + .filter((r) => !r.success) + .map((r) => r.error?.slice(0, 200) || r.output?.slice(0, 200) || r.command) + .join('\n'); + const tripped = circuitBreaker.recordFailure(errorMsg); + + if (tripped) { + const reason = circuitBreaker.getTripReason(); + console.log(chalk.red(`Circuit breaker tripped: ${reason}`)); + if (progressTracker && progressEntry) { + progressEntry.status = 'failed'; + progressEntry.summary = `Circuit breaker tripped (build): ${reason}`; + progressEntry.validationResults = buildResults; + progressEntry.duration = Date.now() - iterationStart; + await progressTracker.appendEntry(progressEntry); + } + finalIteration = i; + exitReason = 'circuit_breaker'; + break; + } + + if (progressTracker && progressEntry) { + progressEntry.status = 'validation_failed'; + progressEntry.summary = 'Build check failed'; + progressEntry.validationResults = buildResults; + progressEntry.duration = Date.now() - iterationStart; + await progressTracker.appendEntry(progressEntry); + } + + const compressedFeedback = compressValidationFeedback(feedback); + taskWithSkills = `${taskWithSkills}\n\n${compressedFeedback}`; + continue; // Go to next iteration to fix build issues + } + spinner.succeed(chalk.green(`Loop ${i}: Build check passed`)); + } + + // Run full validation (backpressure) if enabled and there are changes let validationResults: ValidationResult[] = []; if (validationCommands.length > 0 && (await hasUncommittedChanges(options.cwd))) { @@ -808,7 +894,6 @@ export async function runLoop(options: LoopOptions): Promise { const allPassed = validationResults.every((r) => r.success); if (!allPassed) { - _validationPassed = false; validationFailures++; const feedback = formatValidationFeedback(validationResults); spinner.fail(chalk.red(`Loop ${i}: Validation failed`)); diff --git a/src/loop/validation.ts b/src/loop/validation.ts index 0f3962c..073799e 100644 --- a/src/loop/validation.ts +++ b/src/loop/validation.ts @@ -90,6 +90,95 @@ export function detectValidationCommands(cwd: string): ValidationCommand[] { return commands; } +/** + * Detect build-only commands for always-on build validation. + * Unlike detectValidationCommands(), this: + * 1. Only returns build/typecheck commands (not test/lint) + * 2. Has TypeScript fallback (npx tsc --noEmit) when no build script exists + * 3. Is designed to be called per-iteration (re-detects if package.json appears mid-loop) + */ +export function detectBuildCommands(cwd: string): ValidationCommand[] { + const commands: ValidationCommand[] = []; + + // Check AGENTS.md for build command + const agentsPath = join(cwd, 'AGENTS.md'); + if (existsSync(agentsPath)) { + const content = readFileSync(agentsPath, 'utf-8'); + + const buildMatch = content.match(/[-*]\s*\*?\*?build\*?\*?[:\s]+`([^`]+)`/i); + if (buildMatch) { + const parts = buildMatch[1].trim().split(/\s+/); + commands.push({ name: 'build', command: parts[0], args: parts.slice(1) }); + } + + const typecheckMatch = content.match(/[-*]\s*\*?\*?typecheck\*?\*?[:\s]+`([^`]+)`/i); + if (typecheckMatch) { + const parts = typecheckMatch[1].trim().split(/\s+/); + commands.push({ name: 'typecheck', command: parts[0], args: parts.slice(1) }); + } + } + + // Fallback to package.json + if (commands.length === 0) { + const packagePath = join(cwd, 'package.json'); + if (existsSync(packagePath)) { + try { + const pkg = JSON.parse(readFileSync(packagePath, 'utf-8')); + const scripts = pkg.scripts || {}; + + if (scripts.build) { + commands.push({ name: 'build', command: 'npm', args: ['run', 'build'] }); + } + if (scripts.typecheck) { + commands.push({ name: 'typecheck', command: 'npm', args: ['run', 'typecheck'] }); + } + } catch { + // Invalid package.json + } + } + } + + // TypeScript fallback: if no build/typecheck script but tsconfig.json exists + if (commands.length === 0) { + const tsconfigPath = join(cwd, 'tsconfig.json'); + if (existsSync(tsconfigPath)) { + commands.push({ name: 'typecheck', command: 'npx', args: ['tsc', '--noEmit'] }); + } + } + + return commands; +} + +/** + * Run a single build validation command with a shorter timeout. + */ +export async function runBuildValidation( + cwd: string, + command: ValidationCommand +): Promise { + try { + const result = await execa(command.command, command.args, { + cwd, + timeout: 120000, // 2 minute timeout (vs 5 min for full validation) + reject: false, + }); + + return { + success: result.exitCode === 0, + command: `${command.command} ${command.args.join(' ')}`, + output: result.stdout, + ...(result.exitCode !== 0 && { error: result.stderr || result.stdout }), + }; + } catch (error) { + return { + success: false, + command: `${command.command} ${command.args.join(' ')}`, + output: '', + error: error instanceof Error ? error.message : 'Unknown error', + }; + } +} + /** * Run a single validation command */ From 2b7b33fd831319e9e0fbc4ffba5891f0bc33a9a4 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 17:57:07 +0000 Subject: [PATCH 15/46] feat(loop): filesystem change detection, directory anchoring, greenfield skills - Add filesystem-based change detection as primary method (git-independent) - Add getHeadCommitHash() and hasIterationChanges() for git-based secondary detection - Remove hasChanges gate from build/full validation (unconditional after iter 1) - Relax stall detection threshold (3 idle + i > 3) - Add directory anchoring rule to preamble (prevent nested project dirs) - Strengthen Tailwind v4 rules with exact setup instructions - Enable skills auto-install by default for greenfield projects (no package.json) Co-Authored-By: Claude Opus 4.6 --- src/automation/git.ts | 37 +++++++++++++++++++ src/commands/run.ts | 4 ++- src/loop/context-builder.ts | 10 ++++++ src/loop/executor.ts | 71 +++++++++++++++++++++++++++++++------ src/skills/auto-install.ts | 14 ++++++-- 5 files changed, 122 insertions(+), 14 deletions(-) diff --git a/src/automation/git.ts b/src/automation/git.ts index a2b8b37..9ee555b 100644 --- a/src/automation/git.ts +++ b/src/automation/git.ts @@ -12,6 +12,43 @@ export async function hasUncommittedChanges(cwd: string): Promise { } } +/** + * Get the current HEAD commit hash. + * Returns empty string if not a git repo or no commits yet. + */ +export async function getHeadCommitHash(cwd: string): Promise { + const git: SimpleGit = simpleGit({ baseDir: cwd }); + try { + const hash = await git.revparse(['HEAD']); + return hash.trim(); + } catch { + return ''; + } +} + +/** + * Detect whether an iteration produced any changes — either uncommitted + * working tree changes OR new commits since `startHash`. + * + * This is critical because agents like Claude Code auto-commit during their + * run. If we only check `git status`, committed work looks like "no changes" + * and trips the stall detector / skips build validation. + */ +export async function hasIterationChanges(cwd: string, startHash: string): Promise { + // First check: uncommitted changes (staged or unstaged) + if (await hasUncommittedChanges(cwd)) { + return true; + } + + // Second check: new commits since iteration start + if (!startHash) return false; + + const currentHash = await getHeadCommitHash(cwd); + if (!currentHash) return false; + + return currentHash !== startHash; +} + export async function gitCommit(cwd: string, message: string): Promise { const git: SimpleGit = simpleGit({ baseDir: cwd }); diff --git a/src/commands/run.ts b/src/commands/run.ts index 41a82d9..8c432a7 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -584,7 +584,9 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN } // Auto-install relevant skills from skills.sh (if available) - await autoInstallSkillsFromTask(finalTask, cwd); + // Enable by default for greenfield projects (no package.json yet) + const isLikelyGreenfield = !existsSync(join(cwd, 'package.json')); + await autoInstallSkillsFromTask(finalTask, cwd, { forceEnable: isLikelyGreenfield }); // Apply preset if specified let preset: PresetConfig | undefined; diff --git a/src/loop/context-builder.ts b/src/loop/context-builder.ts index 55c6d20..dfdd51c 100644 --- a/src/loop/context-builder.ts +++ b/src/loop/context-builder.ts @@ -147,6 +147,7 @@ export function buildIterationContext(opts: ContextBuildOptions): BuiltContext { const preamble = `You are a coding agent in an autonomous development loop (iteration ${iteration}/${opts.maxIterations}). Rules: +- IMPORTANT: The current working directory IS the project root. Create ALL files here — do NOT create a subdirectory for the project (e.g., do NOT run \`mkdir my-app\` or \`npx create-vite my-app\`). If you use a scaffolding tool, run it with \`.\` as the target (e.g., \`npm create vite@latest . -- --template react\`). - Study IMPLEMENTATION_PLAN.md and work on ONE task at a time - Mark each subtask [x] in IMPLEMENTATION_PLAN.md immediately when done - Study specs/ directory for original requirements @@ -156,6 +157,15 @@ Rules: - After creating or modifying files, verify the project compiles by running the build or dev command - When ALL tasks are complete, explicitly state "All tasks completed" - If you learn how to run/build the project, update AGENTS.md + +Technology gotchas (CRITICAL — follow these exactly): +- Tailwind CSS v4 (current version): The setup has changed significantly from v3. + * Install: \`npm install tailwindcss @tailwindcss/postcss postcss\` + * postcss.config.js must use: \`plugins: { '@tailwindcss/postcss': {} }\` (NOT \`tailwindcss\`) + * CSS file must use: \`@import "tailwindcss";\` (NOT \`@tailwind base/components/utilities\` — those are v3 directives) + * Do NOT create tailwind.config.js — Tailwind v4 uses CSS-based configuration +- JSX: Never put unescaped quotes inside attribute strings. For SVG backgrounds or data URLs, use a CSS file or encodeURIComponent(). +- Run the dev server or build command to verify the project works before marking setup tasks complete. `; // No structured tasks — pass the task with preamble diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 7139a3b..202f9d6 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -7,8 +7,10 @@ import { formatPrBody, generateSemanticPrTitle, getCurrentBranch, + getHeadCommitHash, gitCommit, gitPush, + hasIterationChanges, hasUncommittedChanges, type IssueRef, type SemanticPrType, @@ -141,6 +143,43 @@ async function getLatestMtime(dir: string): Promise { return latestMtime; } +/** + * Filesystem snapshot for git-independent change detection. + * Counts files and total bytes, skipping node_modules/.git/hidden dirs. + */ +async function getFilesystemSnapshot( + dir: string +): Promise<{ fileCount: number; totalSize: number }> { + let fileCount = 0; + let totalSize = 0; + + async function walk(currentDir: string): Promise { + try { + const entries = await readdir(currentDir, { withFileTypes: true }); + for (const entry of entries) { + if (entry.name.startsWith('.') || entry.name === 'node_modules') continue; + const fullPath = join(currentDir, entry.name); + try { + const stats = await stat(fullPath); + if (entry.isDirectory()) { + await walk(fullPath); + } else { + fileCount++; + totalSize += stats.size; + } + } catch { + // File may have been deleted during walk + } + } + } catch { + // Directory unreadable + } + } + + await walk(dir); + return { fileCount, totalSize }; +} + /** * Wait for filesystem to settle (no new writes) */ @@ -469,6 +508,9 @@ export async function runLoop(options: LoopOptions): Promise { // Track completed tasks to show progress diff between iterations let previousCompletedTasks = initialTaskCount.completed; + // Filesystem snapshot for git-independent change detection + let previousSnapshot = await getFilesystemSnapshot(options.cwd); + for (let i = 1; i <= maxIterations; i++) { const iterationStart = Date.now(); @@ -647,6 +689,9 @@ export async function runLoop(options: LoopOptions): Promise { // Run the agent with step detection (include skills in task) // NOTE: Don't use maxTurns - it can cause issues. Let agent complete naturally. + // Snapshot HEAD before agent runs — used to detect commits made during iteration + const iterationStartHash = await getHeadCommitHash(options.cwd); + const agentOptions: AgentRunOptions = { task: iterationTask, cwd: options.cwd, @@ -691,7 +736,16 @@ export async function runLoop(options: LoopOptions): Promise { let status = completionResult.status; // Track file changes between iterations for stall detection - const hasChanges = await hasUncommittedChanges(options.cwd); + // Primary: filesystem snapshot (works without git) + // Secondary: git-based detection (catches committed changes when git available) + const currentSnapshot = await getFilesystemSnapshot(options.cwd); + const fsChanged = + currentSnapshot.fileCount !== previousSnapshot.fileCount || + currentSnapshot.totalSize !== previousSnapshot.totalSize; + const gitChanged = await hasIterationChanges(options.cwd, iterationStartHash); + const hasChanges = fsChanged || gitChanged; + previousSnapshot = currentSnapshot; + if (!hasChanges) { consecutiveIdleIterations++; } else { @@ -711,7 +765,7 @@ export async function runLoop(options: LoopOptions): Promise { } // Stall detection: stop if no file changes for 2+ consecutive iterations - if (consecutiveIdleIterations >= 2 && i > 1) { + if (consecutiveIdleIterations >= 3 && i > 3) { console.log( chalk.yellow( ` No file changes for ${consecutiveIdleIterations} consecutive iterations - stopping` @@ -834,11 +888,7 @@ export async function runLoop(options: LoopOptions): Promise { options.validate && validationCommands.some((vc) => vc.name === 'build' || vc.name === 'typecheck'); - if ( - buildCommands.length > 0 && - !buildCoveredByFullValidation && - (await hasUncommittedChanges(options.cwd)) - ) { + if (buildCommands.length > 0 && !buildCoveredByFullValidation && i > 1) { spinner.start(chalk.yellow(`Loop ${i}: Running build check...`)); const buildResults: ValidationResult[] = []; @@ -892,8 +942,9 @@ export async function runLoop(options: LoopOptions): Promise { await progressTracker.appendEntry(progressEntry); } - const compressedFeedback = compressValidationFeedback(feedback); - taskWithSkills = `${taskWithSkills}\n\n${compressedFeedback}`; + // Pass build feedback to context builder for next iteration + // (don't mutate taskWithSkills — that defeats context trimming) + lastValidationFeedback = feedback; continue; // Go to next iteration to fix build issues } spinner.succeed(chalk.green(`Loop ${i}: Build check passed`)); @@ -905,7 +956,7 @@ export async function runLoop(options: LoopOptions): Promise { const warmupThreshold = options.validationWarmup ?? 0; const pastWarmup = completedTasks >= warmupThreshold; - if (validationCommands.length > 0 && pastWarmup && (await hasUncommittedChanges(options.cwd))) { + if (validationCommands.length > 0 && pastWarmup && i > 1) { spinner.start(chalk.yellow(`Loop ${i}: Running validation...`)); validationResults = await runAllValidations(options.cwd, validationCommands); diff --git a/src/skills/auto-install.ts b/src/skills/auto-install.ts index 6697dc3..0dffbcc 100644 --- a/src/skills/auto-install.ts +++ b/src/skills/auto-install.ts @@ -127,10 +127,18 @@ async function installSkill(candidate: SkillCandidate, globalInstall: boolean): } } -export async function autoInstallSkillsFromTask(task: string, cwd: string): Promise { +export async function autoInstallSkillsFromTask( + task: string, + cwd: string, + options?: { forceEnable?: boolean } +): Promise { if (!task.trim()) return []; - const autoInstallEnabled = process.env.RALPH_ENABLE_SKILL_AUTO_INSTALL === '1'; - if (!autoInstallEnabled || process.env.RALPH_DISABLE_SKILL_AUTO_INSTALL === '1') return []; + // Explicit disable always wins + if (process.env.RALPH_DISABLE_SKILL_AUTO_INSTALL === '1') return []; + // Enable if: env var set, OR caller opts in (greenfield projects) + const autoInstallEnabled = + process.env.RALPH_ENABLE_SKILL_AUTO_INSTALL === '1' || options?.forceEnable === true; + if (!autoInstallEnabled) return []; const queries = buildSkillQueries(task); if (queries.length === 0) return []; From a16533a3290ecca5b6de216e8a8016adc80c71dc Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 18:00:38 +0000 Subject: [PATCH 16/46] refactor(loop): high-value fixes from code review context-builder.ts: - Fix wasTrimmed bug (was always true for iterations > 1) - Replace unsafe prompt.slice() with semantic trimming at paragraph boundaries - Section-aware feedback compression (keep first complete section, summarize rest) task-counter.ts: - Protect cache from consumer mutation via deep-clone - Extract MAX_ESTIMATED_ITERATIONS constant (was magic number 25) task-executor.ts: - Don't cascade previousBranch on failure (prevents branching from broken state) - Populate result.cost from loop cost stats (was dead field) executor.ts: - Task-aware stall detection (reset idle counter on task progress, not just file changes) - Post-iteration cost ceiling check (prevents starting expensive iteration over budget) - Reorder completion detection: cheap checks first, expensive semantic analysis last Co-Authored-By: Claude Opus 4.6 --- src/loop/context-builder.ts | 40 ++++++++++++++++-- src/loop/executor.ts | 83 +++++++++++++++++++++++-------------- src/loop/task-counter.ts | 20 ++++++++- src/loop/task-executor.ts | 9 ++-- 4 files changed, 110 insertions(+), 42 deletions(-) diff --git a/src/loop/context-builder.ts b/src/loop/context-builder.ts index dfdd51c..359391f 100644 --- a/src/loop/context-builder.ts +++ b/src/loop/context-builder.ts @@ -63,9 +63,29 @@ export function compressValidationFeedback(feedback: string, maxChars: number = const lines = stripped.split('\n'); const compressed: string[] = ['## Validation Failed\n']; let currentLength = compressed[0].length; + let sectionCount = 0; + let totalSections = 0; + // Count total ### sections for the omission summary for (const line of lines) { - // Always include headers (### command name) + if (line.startsWith('### ')) totalSections++; + } + + for (const line of lines) { + // Track section headers (### command name) + if (line.startsWith('### ')) { + // If we already have one complete section and are over budget, stop + if (sectionCount >= 1 && currentLength + line.length + 1 > maxChars - 100) { + const remaining = totalSections - sectionCount; + if (remaining > 0) { + compressed.push(`\n[${remaining} more failing section(s) omitted]`); + } + break; + } + sectionCount++; + } + + // Always include ## and ### headers if (line.startsWith('### ') || line.startsWith('## ')) { compressed.push(line); currentLength += line.length + 1; @@ -142,6 +162,7 @@ export function buildIterationContext(opts: ContextBuildOptions): BuiltContext { const completedTasks = taskInfo.completed; const debugParts: string[] = []; let prompt: string; + let wasTrimmed = false; // Loop-aware preamble — gives the agent behavioral context per Ralph Playbook patterns const preamble = `You are a coding agent in an autonomous development loop (iteration ${iteration}/${opts.maxIterations}). @@ -219,6 +240,7 @@ ${planContext}`; debugParts.push('included: compressed validation feedback'); } + wasTrimmed = true; debugParts.push(`mode=trimmed (iteration ${iteration})`); debugParts.push(`excluded: full spec, skills`); } else { @@ -237,19 +259,29 @@ ${planContext}`; debugParts.push('included: minimal validation feedback (500 chars)'); } + wasTrimmed = true; debugParts.push(`mode=minimal (iteration ${iteration})`); debugParts.push('excluded: spec, skills, plan history'); } // Apply token budget if set - let wasTrimmed = iteration > 1 && currentTask !== null && totalTasks > 0; const estimatedTokens = estimateTokens(prompt); if (maxInputTokens > 0 && estimatedTokens > maxInputTokens) { - // Aggressively trim: truncate the prompt to fit budget + // Semantic trimming: cut at paragraph/line boundaries instead of mid-instruction const targetChars = maxInputTokens * 3.5; // rough chars-per-token if (prompt.length > targetChars) { - prompt = `${prompt.slice(0, targetChars)}\n\n[Context truncated to fit ${maxInputTokens} token budget]`; + // Find the last paragraph break before the budget + let cutPoint = prompt.lastIndexOf('\n\n', targetChars); + if (cutPoint < targetChars * 0.5) { + // No paragraph break in the second half — fall back to last line break + cutPoint = prompt.lastIndexOf('\n', targetChars); + } + if (cutPoint < targetChars * 0.5) { + // No suitable break found — hard cut (rare edge case) + cutPoint = targetChars; + } + prompt = `${prompt.slice(0, cutPoint)}\n\n[Context truncated to fit ${maxInputTokens} token budget]`; wasTrimmed = true; debugParts.push(`truncated: ${estimatedTokens} -> ~${maxInputTokens} tokens`); } diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 202f9d6..864fb38 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -289,46 +289,58 @@ function detectCompletionWithReason( ): { status: 'done' | 'blocked' | 'continue'; reason: string } { const { completionPromise, requireExitSignal = false, minCompletionIndicators = 1 } = options; - // 1. Check explicit completion promise first (highest priority) + // --- Cheap checks first (string includes / simple regex) --- + + // 1. Explicit completion promise (highest priority) if (completionPromise && output.includes(completionPromise)) { return { status: 'done', reason: `Found completion promise: "${completionPromise}"` }; } - // 2. Check for COMPLETE tag + // 2. COMPLETE tag if (/COMPLETE<\/promise>/i.test(output)) { return { status: 'done', reason: 'Found COMPLETE marker' }; } - // 3. Use semantic analyzer for more nuanced detection (single call) - const analysis = analyzeResponse(output); - - // Check for blocked status - if (analysis.stuckScore >= 0.7 && analysis.confidence !== 'low') { - return { status: 'blocked', reason: 'Semantic analysis detected stuck state' }; + // 3. Explicit EXIT_SIGNAL (cheap regex) + const hasExplicitSignal = hasExitSignal(output); + if (hasExplicitSignal && !requireExitSignal) { + return { status: 'done', reason: 'Found EXIT_SIGNAL: true' }; } - // Check blocked markers (legacy support) + // 4. Legacy completion markers (cheap string search) const upperOutput = output.toUpperCase(); + if (!requireExitSignal) { + for (const marker of COMPLETION_MARKERS) { + if (upperOutput.includes(marker.toUpperCase())) { + return { status: 'done', reason: `Found completion marker: "${marker}"` }; + } + } + } + + // 5. Blocked markers (cheap string search) for (const marker of BLOCKED_MARKERS) { if (upperOutput.includes(marker.toUpperCase())) { return { status: 'blocked', reason: `Found blocked marker: "${marker}"` }; } } - // Check for explicit EXIT_SIGNAL (single call) - const hasExplicitSignal = hasExitSignal(output); + // --- Expensive check last (semantic analysis with many regex patterns) --- - // If exit signal is required, check for it + const analysis = analyzeResponse(output); + + if (analysis.stuckScore >= 0.7 && analysis.confidence !== 'low') { + return { status: 'blocked', reason: 'Semantic analysis detected stuck state' }; + } + + // When exit signal is required, validate it with semantic indicators if (requireExitSignal) { if (hasExplicitSignal && analysis.indicators.completion.length >= minCompletionIndicators) { - return { status: 'done', reason: 'Found EXIT_SIGNAL: true' }; - } - if (!hasExplicitSignal) { - return { status: 'continue', reason: '' }; + return { status: 'done', reason: 'Found EXIT_SIGNAL: true with completion indicators' }; } + return { status: 'continue', reason: '' }; } - // Check completion indicators + // Semantic completion detection (only reached when no explicit markers matched) if ( analysis.completionScore >= 0.7 && analysis.indicators.completion.length >= minCompletionIndicators @@ -340,18 +352,6 @@ function detectCompletionWithReason( }; } - // Explicit exit signals always count - if (hasExplicitSignal) { - return { status: 'done', reason: 'Found EXIT_SIGNAL: true' }; - } - - // Legacy marker support - for (const marker of COMPLETION_MARKERS) { - if (upperOutput.includes(marker.toUpperCase())) { - return { status: 'done', reason: `Found completion marker: "${marker}"` }; - } - } - return { status: 'continue', reason: '' }; } @@ -729,6 +729,19 @@ export async function runLoop(options: LoopOptions): Promise { // Track cost for this iteration (silent - summary shown at end) if (costTracker) { costTracker.recordIteration(options.task, result.output); + + // Post-iteration cost ceiling check — prevent starting another expensive iteration + const overBudget = costTracker.isOverBudget(); + if (overBudget) { + console.log( + chalk.red( + `\n Cost ceiling reached after iteration ${i}: ${formatCost(overBudget.currentCost)} >= ${formatCost(overBudget.maxCost)} budget` + ) + ); + finalIteration = i; + exitReason = 'cost_ceiling'; + break; + } } // Check for completion using enhanced detection (single-pass: status + reason) @@ -746,7 +759,13 @@ export async function runLoop(options: LoopOptions): Promise { const hasChanges = fsChanged || gitChanged; previousSnapshot = currentSnapshot; - if (!hasChanges) { + // Task-aware stall detection: check both file changes AND task progress + // Re-parse tasks after agent runs to catch newly completed tasks + const postIterationTaskInfo = parsePlanTasks(options.cwd); + const tasksProgressedThisIteration = postIterationTaskInfo.completed > previousCompletedTasks; + const hasProductiveProgress = hasChanges || tasksProgressedThisIteration; + + if (!hasProductiveProgress) { consecutiveIdleIterations++; } else { consecutiveIdleIterations = 0; @@ -764,11 +783,11 @@ export async function runLoop(options: LoopOptions): Promise { await waitForFilesystemQuiescence(options.cwd, 2000); } - // Stall detection: stop if no file changes for 2+ consecutive iterations + // Stall detection: stop if no productive progress for 3+ consecutive iterations if (consecutiveIdleIterations >= 3 && i > 3) { console.log( chalk.yellow( - ` No file changes for ${consecutiveIdleIterations} consecutive iterations - stopping` + ` No progress for ${consecutiveIdleIterations} consecutive iterations - stopping` ) ); finalIteration = i; diff --git a/src/loop/task-counter.ts b/src/loop/task-counter.ts index 523c8c5..0c8d8aa 100644 --- a/src/loop/task-counter.ts +++ b/src/loop/task-counter.ts @@ -1,9 +1,25 @@ import { existsSync, readFileSync, statSync } from 'node:fs'; import { join } from 'node:path'; +/** Maximum iterations for estimated calculations */ +export const MAX_ESTIMATED_ITERATIONS = 25; + /** Mtime-based cache for parsePlanTasks to avoid redundant file reads within the same iteration */ let _planCache: { path: string; mtimeMs: number; result: TaskCount } | null = null; +/** Deep-clone a TaskCount to prevent cache mutation by consumers */ +function cloneTaskCount(tc: TaskCount): TaskCount { + return { + total: tc.total, + completed: tc.completed, + pending: tc.pending, + tasks: tc.tasks.map((t) => ({ + ...t, + subtasks: t.subtasks?.map((st) => ({ ...st })), + })), + }; +} + export interface PlanTask { name: string; completed: boolean; @@ -36,7 +52,7 @@ export function parsePlanTasks(cwd: string): TaskCount { try { preMtime = statSync(planPath).mtimeMs; if (_planCache && _planCache.path === planPath && _planCache.mtimeMs === preMtime) { - return _planCache.result; + return cloneTaskCount(_planCache.result); } } catch { // stat failed — fall through to full parse @@ -256,7 +272,7 @@ export function calculateOptimalIterations( // Apply bounds iterations = Math.max(3, iterations); // Minimum 3 - iterations = Math.min(25, iterations); // Maximum 25 + iterations = Math.min(MAX_ESTIMATED_ITERATIONS, iterations); return { iterations, diff --git a/src/loop/task-executor.ts b/src/loop/task-executor.ts index 1e88029..ed3db31 100644 --- a/src/loop/task-executor.ts +++ b/src/loop/task-executor.ts @@ -140,7 +140,9 @@ export async function executeTaskBatch(options: TaskExecutionOptions): Promise Date: Fri, 13 Feb 2026 18:34:12 +0000 Subject: [PATCH 17/46] fix(skills): replace broken CLI search with skills.sh HTTP API npx skills find is an interactive fzf UI that returns garbage when piped programmatically. Replace with skills.sh search API (https://skills.sh/api/search) which returns real repos with install counts. Enable auto-install by default. Co-Authored-By: Claude Opus 4.6 --- docs/docs/cli/skill.md | 14 ++--- src/commands/run.ts | 6 +- src/commands/skill.ts | 4 +- src/skills/auto-install.ts | 117 ++++++++++++++++++++++++------------- 4 files changed, 84 insertions(+), 57 deletions(-) diff --git a/docs/docs/cli/skill.md b/docs/docs/cli/skill.md index 25f5c71..e3edbf8 100644 --- a/docs/docs/cli/skill.md +++ b/docs/docs/cli/skill.md @@ -125,17 +125,11 @@ and included in the agent's prompt context when relevant. ## Auto Skill Discovery -Auto skill discovery is opt-in. When enabled, ralph-starter -queries the skills.sh registry to find and install relevant -skills automatically. +Auto skill discovery is enabled by default. ralph-starter +queries the skills.sh API to find and install relevant +skills automatically before each run. -Enable it by setting: - -```bash -RALPH_ENABLE_SKILL_AUTO_INSTALL=1 -``` - -You can also force-disable it with: +To disable it, set: ```bash RALPH_DISABLE_SKILL_AUTO_INSTALL=1 diff --git a/src/commands/run.ts b/src/commands/run.ts index 8c432a7..e836f0e 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -583,10 +583,8 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN return; } - // Auto-install relevant skills from skills.sh (if available) - // Enable by default for greenfield projects (no package.json yet) - const isLikelyGreenfield = !existsSync(join(cwd, 'package.json')); - await autoInstallSkillsFromTask(finalTask, cwd, { forceEnable: isLikelyGreenfield }); + // Auto-install relevant skills from skills.sh (enabled by default) + await autoInstallSkillsFromTask(finalTask, cwd); // Apply preset if specified let preset: PresetConfig | undefined; diff --git a/src/commands/skill.ts b/src/commands/skill.ts index ecfdf58..c2f3453 100644 --- a/src/commands/skill.ts +++ b/src/commands/skill.ts @@ -9,7 +9,7 @@ interface SkillOptions { global?: boolean; } -interface SkillEntry { +export interface SkillEntry { name: string; description: string; category: string; @@ -17,7 +17,7 @@ interface SkillEntry { } // Popular skills registry (curated list) -const POPULAR_SKILLS: SkillEntry[] = [ +export const POPULAR_SKILLS: SkillEntry[] = [ // Agents { name: 'vercel-labs/agent-skills', diff --git a/src/skills/auto-install.ts b/src/skills/auto-install.ts index 0dffbcc..3c08fff 100644 --- a/src/skills/auto-install.ts +++ b/src/skills/auto-install.ts @@ -1,18 +1,29 @@ import chalk from 'chalk'; -import { execa } from 'execa'; import ora from 'ora'; +import { POPULAR_SKILLS } from '../commands/skill.js'; import { findSkill } from '../loop/skills.js'; export interface SkillCandidate { fullName: string; // owner/repo@skill repo: string; skill: string; + installs: number; score: number; } const MAX_SKILLS_TO_INSTALL = 2; +const SKILLS_API_URL = 'https://skills.sh/api/search'; const SKILLS_CLI = 'skills'; +/** Shape of a single skill from the skills.sh search API */ +interface SkillsApiSkill { + id: string; + skillId: string; + name: string; + installs: number; + source: string; +} + function buildSkillQueries(task: string): string[] { const queries = new Set(); const text = task.toLowerCase(); @@ -46,22 +57,6 @@ function buildSkillQueries(task: string): string[] { return Array.from(queries); } -function parseSkillLine(line: string): SkillCandidate | null { - const match = line.match(/([a-z0-9_.-]+\/[a-z0-9_.-]+@[a-z0-9_.-]+)/i); - if (!match) return null; - - const fullName = match[1]; - const [repo, skill] = fullName.split('@'); - if (!repo || !skill) return null; - - return { - fullName, - repo, - skill, - score: 0, - }; -} - function scoreCandidate(candidate: SkillCandidate, task: string): number { const text = `${candidate.fullName}`.toLowerCase(); const taskLower = task.toLowerCase(); @@ -82,6 +77,11 @@ function scoreCandidate(candidate: SkillCandidate, task: string): number { boost('tailwind', taskLower.includes('tailwind') ? 2 : 0); boost('seo', taskLower.includes('seo') ? 2 : 0); + // Boost based on install count (popularity as quality signal) + if (candidate.installs > 10000) score += 5; + else if (candidate.installs > 1000) score += 3; + else if (candidate.installs > 100) score += 1; + return score; } @@ -93,29 +93,62 @@ function rankCandidates(candidates: SkillCandidate[], task: string): SkillCandid return candidates.sort((a, b) => b.score - a.score); } +/** + * Search skills.sh HTTP API for skills matching a query. + * Returns structured results with real repo names and install counts. + */ async function findSkillsByQuery(query: string): Promise { try { - const result = await execa('npx', [SKILLS_CLI, 'find', query], { - stdio: 'pipe', - }); - - const lines = result.stdout.split('\n').map((line) => line.trim()); - const candidates: SkillCandidate[] = []; + const url = `${SKILLS_API_URL}?q=${encodeURIComponent(query)}`; + const resp = await fetch(url, { signal: AbortSignal.timeout(5000) }); + if (!resp.ok) return []; + + const data = (await resp.json()) as { skills?: SkillsApiSkill[] }; + return (data.skills || []).map((s) => ({ + fullName: `${s.source}@${s.skillId}`, + repo: s.source, + skill: s.skillId, + installs: s.installs ?? 0, + score: 0, + })); + } catch { + return []; // Timeout or network error — caller falls back to POPULAR_SKILLS + } +} - for (const line of lines) { - const candidate = parseSkillLine(line); - if (candidate) { - candidates.push(candidate); +/** + * Fallback: match task keywords against the curated POPULAR_SKILLS registry + * when the skills.sh API is unreachable. + */ +function fallbackFromPopularSkills(task: string): SkillCandidate[] { + const taskLower = task.toLowerCase(); + const candidates: SkillCandidate[] = []; + + for (const entry of POPULAR_SKILLS) { + const entryText = `${entry.name} ${entry.description} ${entry.category}`.toLowerCase(); + const matches = + entry.skills.some((s) => taskLower.includes(s.split('-')[0])) || + entryText.includes('frontend') || + entryText.includes('design'); + + if (matches) { + for (const skill of entry.skills) { + candidates.push({ + fullName: `${entry.name}@${skill}`, + repo: entry.name, + skill, + installs: 0, + score: 0, + }); } } - - return candidates; - } catch { - return []; } + + return candidates; } async function installSkill(candidate: SkillCandidate, globalInstall: boolean): Promise { + const { execa } = await import('execa'); const args = [SKILLS_CLI, 'add', candidate.fullName, '-y']; if (globalInstall) args.push('-g'); @@ -127,18 +160,10 @@ async function installSkill(candidate: SkillCandidate, globalInstall: boolean): } } -export async function autoInstallSkillsFromTask( - task: string, - cwd: string, - options?: { forceEnable?: boolean } -): Promise { +export async function autoInstallSkillsFromTask(task: string, cwd: string): Promise { if (!task.trim()) return []; - // Explicit disable always wins + // Explicit disable is the only way to turn this off if (process.env.RALPH_DISABLE_SKILL_AUTO_INSTALL === '1') return []; - // Enable if: env var set, OR caller opts in (greenfield projects) - const autoInstallEnabled = - process.env.RALPH_ENABLE_SKILL_AUTO_INSTALL === '1' || options?.forceEnable === true; - if (!autoInstallEnabled) return []; const queries = buildSkillQueries(task); if (queries.length === 0) return []; @@ -155,6 +180,16 @@ export async function autoInstallSkillsFromTask( } } + // Fallback to curated registry if API returned nothing + if (allCandidates.size === 0) { + const fallback = fallbackFromPopularSkills(task); + for (const candidate of fallback) { + if (!allCandidates.has(candidate.fullName)) { + allCandidates.set(candidate.fullName, candidate); + } + } + } + if (allCandidates.size === 0) { spinner.warn('No skills found from skills.sh'); return []; From aceddb88989893068a804926eff94a0f8c29cc80 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 18:41:47 +0000 Subject: [PATCH 18/46] fix(loop): improve resilience and design quality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Loop resilience: - Increase default iterations 7→10, minimum 3→5, buffer 2→3 - Validation failures no longer count as idle (agent is debugging) - Relax stall threshold for larger projects (4 idle for 5+ tasks) Design quality: - Add anti-AI-aesthetic rules to hard preamble (bans purple gradients, Inter/Roboto fonts, glass morphism) - Expand skill auto-apply triggers (page, dashboard, app, shop, store) Co-Authored-By: Claude Opus 4.6 --- src/loop/context-builder.ts | 8 ++++++++ src/loop/executor.ts | 11 ++++++++--- src/loop/skills.ts | 8 +++++++- src/loop/task-counter.ts | 12 ++++++------ 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/src/loop/context-builder.ts b/src/loop/context-builder.ts index 359391f..71f3f5c 100644 --- a/src/loop/context-builder.ts +++ b/src/loop/context-builder.ts @@ -187,6 +187,14 @@ Technology gotchas (CRITICAL — follow these exactly): * Do NOT create tailwind.config.js — Tailwind v4 uses CSS-based configuration - JSX: Never put unescaped quotes inside attribute strings. For SVG backgrounds or data URLs, use a CSS file or encodeURIComponent(). - Run the dev server or build command to verify the project works before marking setup tasks complete. + +Design quality (IMPORTANT — avoid generic AI aesthetics): +- NEVER use purple-blue gradient backgrounds or gradient text — this is the #1 AI design tell +- NEVER default to Inter, Roboto, or system fonts — pick distinctive typography (e.g. DM Sans, Playfair Display, Space Mono) +- NEVER use glass morphism, neumorphism, or frosted-glass cards +- Choose ONE clear design direction (bold/minimal/retro/editorial/playful) and commit to it +- Use a specific color palette with max 3-4 colors, not rainbow gradients +- Prefer flat or subtle shadows over glassmorphism effects `; // No structured tasks — pass the task with preamble diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 864fb38..04f9a02 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -763,7 +763,10 @@ export async function runLoop(options: LoopOptions): Promise { // Re-parse tasks after agent runs to catch newly completed tasks const postIterationTaskInfo = parsePlanTasks(options.cwd); const tasksProgressedThisIteration = postIterationTaskInfo.completed > previousCompletedTasks; - const hasProductiveProgress = hasChanges || tasksProgressedThisIteration; + // Build/validation failures are NOT idle — agent is actively debugging + const hadValidationFailure = lastValidationFeedback !== null; + const hasProductiveProgress = + hasChanges || tasksProgressedThisIteration || hadValidationFailure; if (!hasProductiveProgress) { consecutiveIdleIterations++; @@ -783,8 +786,10 @@ export async function runLoop(options: LoopOptions): Promise { await waitForFilesystemQuiescence(options.cwd, 2000); } - // Stall detection: stop if no productive progress for 3+ consecutive iterations - if (consecutiveIdleIterations >= 3 && i > 3) { + // Stall detection: stop if no productive progress for consecutive iterations + // More lenient for larger projects (5+ tasks) which need more iterations for scaffolding + const staleThreshold = taskInfo.total > 5 ? 4 : 3; + if (consecutiveIdleIterations >= staleThreshold && i > 3) { console.log( chalk.yellow( ` No progress for ${consecutiveIdleIterations} consecutive iterations - stopping` diff --git a/src/loop/skills.ts b/src/loop/skills.ts index c221461..56f3b60 100644 --- a/src/loop/skills.ts +++ b/src/loop/skills.ts @@ -257,7 +257,13 @@ function shouldAutoApplySkill(skill: ClaudeSkill, task: string): boolean { taskLower.includes('landing') || taskLower.includes('frontend') || taskLower.includes('ui') || - taskLower.includes('ux'); + taskLower.includes('ux') || + taskLower.includes('page') || + taskLower.includes('dashboard') || + taskLower.includes('app') || + taskLower.includes('component') || + taskLower.includes('shop') || + taskLower.includes('store'); const isDesignSkill = text.includes('design') || diff --git a/src/loop/task-counter.ts b/src/loop/task-counter.ts index 0c8d8aa..35a05ad 100644 --- a/src/loop/task-counter.ts +++ b/src/loop/task-counter.ts @@ -242,14 +242,14 @@ export function calculateOptimalIterations( if (taskCount.total === 0) { const estimate = taskContent ? estimateTasksFromContent(taskContent) : null; if (estimate && estimate.estimated > 0) { - const buffer = Math.max(2, Math.ceil(estimate.estimated * 0.3)); + const buffer = Math.max(3, Math.ceil(estimate.estimated * 0.3)); let iterations = estimate.estimated + buffer; - iterations = Math.max(3, iterations); + iterations = Math.max(5, iterations); iterations = Math.min(15, iterations); return { iterations, taskCount, reason: estimate.reason }; } return { - iterations: 7, + iterations: 10, taskCount, reason: 'No plan or spec structure found, using default', }; @@ -264,14 +264,14 @@ export function calculateOptimalIterations( }; } - // Calculate buffer (at least 2, or 30% of pending tasks for retries) - const buffer = Math.max(2, Math.ceil(taskCount.pending * 0.3)); + // Calculate buffer (at least 3, or 30% of pending tasks for retries) + const buffer = Math.max(3, Math.ceil(taskCount.pending * 0.3)); // Calculate iterations: pending tasks + buffer let iterations = taskCount.pending + buffer; // Apply bounds - iterations = Math.max(3, iterations); // Minimum 3 + iterations = Math.max(5, iterations); // Minimum 5 iterations = Math.min(MAX_ESTIMATED_ITERATIONS, iterations); return { From 588dcc5b63b173cff0539bec5760dbbad2f0fcf4 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 18:45:12 +0000 Subject: [PATCH 19/46] fix(skills): skip install when relevant skills exist, filter irrelevant results - Check installed skills first: if frontend-design is already installed and relevant to the task, show "Using installed skills:" and skip API - Add negative keyword filtering: react-native, mobile, ios, android, flutter etc. are filtered out for standard web projects - Use detectClaudeSkills() for comprehensive installed-skill detection Co-Authored-By: Claude Opus 4.6 --- src/skills/auto-install.ts | 84 +++++++++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 2 deletions(-) diff --git a/src/skills/auto-install.ts b/src/skills/auto-install.ts index 3c08fff..65a9f57 100644 --- a/src/skills/auto-install.ts +++ b/src/skills/auto-install.ts @@ -1,7 +1,7 @@ import chalk from 'chalk'; import ora from 'ora'; import { POPULAR_SKILLS } from '../commands/skill.js'; -import { findSkill } from '../loop/skills.js'; +import { type ClaudeSkill, detectClaudeSkills } from '../loop/skills.js'; export interface SkillCandidate { fullName: string; // owner/repo@skill @@ -24,6 +24,17 @@ interface SkillsApiSkill { source: string; } +/** Keywords that indicate a skill is NOT relevant for standard web projects */ +const WEB_NEGATIVE_KEYWORDS = [ + 'react-native', + 'mobile', + 'ios', + 'android', + 'flutter', + 'swift', + 'kotlin', +]; + function buildSkillQueries(task: string): string[] { const queries = new Set(); const text = task.toLowerCase(); @@ -57,6 +68,61 @@ function buildSkillQueries(task: string): string[] { return Array.from(queries); } +/** + * Check if a skill is relevant to the given task. + * Reuses the same logic as the executor's shouldAutoApplySkill. + */ +function isSkillRelevantToTask(skill: ClaudeSkill, task: string): boolean { + const name = skill.name.toLowerCase(); + const desc = (skill.description || '').toLowerCase(); + const text = `${name} ${desc}`; + const taskLower = task.toLowerCase(); + + const taskIsWeb = + taskLower.includes('web') || + taskLower.includes('website') || + taskLower.includes('landing') || + taskLower.includes('frontend') || + taskLower.includes('ui') || + taskLower.includes('ux') || + taskLower.includes('page') || + taskLower.includes('dashboard') || + taskLower.includes('app') || + taskLower.includes('component') || + taskLower.includes('shop') || + taskLower.includes('store'); + + const isDesignSkill = + text.includes('design') || + text.includes('ui') || + text.includes('ux') || + text.includes('frontend'); + + if (taskIsWeb && isDesignSkill) return true; + if (taskLower.includes('astro') && text.includes('astro')) return true; + if (taskLower.includes('tailwind') && text.includes('tailwind')) return true; + if (taskLower.includes('seo') && text.includes('seo')) return true; + + return false; +} + +/** + * Check if a candidate skill is irrelevant to the task (negative filtering). + * E.g., react-native-design for a web landing page. + */ +function isCandidateIrrelevant(candidate: SkillCandidate, task: string): boolean { + const taskLower = task.toLowerCase(); + const skillText = `${candidate.fullName} ${candidate.skill}`.toLowerCase(); + + // If the task explicitly mentions a platform, don't filter it out + for (const keyword of WEB_NEGATIVE_KEYWORDS) { + if (taskLower.includes(keyword)) return false; + } + + // For standard web tasks, filter out mobile/native skills + return WEB_NEGATIVE_KEYWORDS.some((keyword) => skillText.includes(keyword)); +} + function scoreCandidate(candidate: SkillCandidate, task: string): number { const text = `${candidate.fullName}`.toLowerCase(); const taskLower = task.toLowerCase(); @@ -165,6 +231,16 @@ export async function autoInstallSkillsFromTask(task: string, cwd: string): Prom // Explicit disable is the only way to turn this off if (process.env.RALPH_DISABLE_SKILL_AUTO_INSTALL === '1') return []; + // Check if relevant skills are already installed — skip API if so + const installedSkills = detectClaudeSkills(cwd); + const relevantInstalled = installedSkills.filter((s) => isSkillRelevantToTask(s, task)); + + if (relevantInstalled.length > 0) { + const names = relevantInstalled.map((s) => s.name); + console.log(chalk.cyan(`Using installed skills: ${names.join(', ')}`)); + return names; + } + const queries = buildSkillQueries(task); if (queries.length === 0) return []; @@ -197,7 +273,11 @@ export async function autoInstallSkillsFromTask(task: string, cwd: string): Prom const ranked = rankCandidates(Array.from(allCandidates.values()), task); const toInstall = ranked - .filter((candidate) => !findSkill(cwd, candidate.skill)) + .filter((candidate) => !isCandidateIrrelevant(candidate, task)) + .filter( + (candidate) => + !installedSkills.some((s) => s.name.toLowerCase() === candidate.skill.toLowerCase()) + ) .slice(0, MAX_SKILLS_TO_INSTALL); if (toInstall.length === 0) { From 4dcf500ab247b28a9492554397b06785e0936e5d Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 18:50:09 +0000 Subject: [PATCH 20/46] fix(skills): install complementary skills, expand queries, max 5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Don't early-return when skills are installed — always search for complementary ones (e.g., react-best-practices alongside frontend-design) - Add React/Vue/Svelte-specific queries (best practices, composition) - Auto-add SEO query for landing/marketing pages - Increase max skills from 2 to 5 - Boost scoring for best-practices, composition, guidelines skills Co-Authored-By: Claude Opus 4.6 --- src/skills/auto-install.ts | 52 +++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/src/skills/auto-install.ts b/src/skills/auto-install.ts index 65a9f57..4b1b0c5 100644 --- a/src/skills/auto-install.ts +++ b/src/skills/auto-install.ts @@ -11,7 +11,7 @@ export interface SkillCandidate { score: number; } -const MAX_SKILLS_TO_INSTALL = 2; +const MAX_SKILLS_TO_INSTALL = 5; const SKILLS_API_URL = 'https://skills.sh/api/search'; const SKILLS_CLI = 'skills'; @@ -39,13 +39,20 @@ function buildSkillQueries(task: string): string[] { const queries = new Set(); const text = task.toLowerCase(); + // Framework-specific skills if (text.includes('astro')) queries.add('astro'); - if (text.includes('react')) queries.add('react'); + if (text.includes('react') || text.includes('jsx')) { + queries.add('react best practices'); + queries.add('react composition patterns'); + } if (text.includes('next')) queries.add('nextjs'); + if (text.includes('vue')) queries.add('vue'); + if (text.includes('svelte')) queries.add('svelte'); if (text.includes('tailwind')) queries.add('tailwind'); if (text.includes('seo')) queries.add('seo'); if (text.includes('accessibility') || text.includes('a11y')) queries.add('accessibility'); + // Landing/marketing pages get SEO + design skills automatically if ( text.includes('landing') || text.includes('website') || @@ -54,7 +61,7 @@ function buildSkillQueries(task: string): string[] { text.includes('marketing') ) { queries.add('frontend design'); - queries.add('web design'); + queries.add('seo'); } if (text.includes('design') || text.includes('ui') || text.includes('ux')) { @@ -134,14 +141,17 @@ function scoreCandidate(candidate: SkillCandidate, task: string): number { boost('frontend', 3); boost('design', 3); + boost('best-practices', 2); + boost('composition', 2); + boost('guidelines', 2); boost('ui', 2); boost('ux', 2); boost('landing', 2); boost('astro', taskLower.includes('astro') ? 3 : 1); - boost('react', taskLower.includes('react') ? 2 : 0); - boost('next', taskLower.includes('next') ? 2 : 0); - boost('tailwind', taskLower.includes('tailwind') ? 2 : 0); - boost('seo', taskLower.includes('seo') ? 2 : 0); + boost('react', taskLower.includes('react') ? 3 : 0); + boost('next', taskLower.includes('next') ? 3 : 0); + boost('tailwind', taskLower.includes('tailwind') ? 3 : 0); + boost('seo', taskLower.includes('seo') ? 3 : 2); // SEO is always useful for web projects // Boost based on install count (popularity as quality signal) if (candidate.installs > 10000) score += 5; @@ -231,20 +241,21 @@ export async function autoInstallSkillsFromTask(task: string, cwd: string): Prom // Explicit disable is the only way to turn this off if (process.env.RALPH_DISABLE_SKILL_AUTO_INSTALL === '1') return []; - // Check if relevant skills are already installed — skip API if so + // Detect what's already installed const installedSkills = detectClaudeSkills(cwd); const relevantInstalled = installedSkills.filter((s) => isSkillRelevantToTask(s, task)); + // Show installed skills to the user if (relevantInstalled.length > 0) { const names = relevantInstalled.map((s) => s.name); console.log(chalk.cyan(`Using installed skills: ${names.join(', ')}`)); - return names; } + // Always search for complementary skills (even if some are installed) const queries = buildSkillQueries(task); - if (queries.length === 0) return []; + if (queries.length === 0) return relevantInstalled.map((s) => s.name); - const spinner = ora('Searching skills.sh for relevant skills...').start(); + const spinner = ora('Checking for complementary skills...').start(); const allCandidates = new Map(); for (const query of queries) { @@ -267,8 +278,8 @@ export async function autoInstallSkillsFromTask(task: string, cwd: string): Prom } if (allCandidates.size === 0) { - spinner.warn('No skills found from skills.sh'); - return []; + spinner.stop(); + return relevantInstalled.map((s) => s.name); } const ranked = rankCandidates(Array.from(allCandidates.values()), task); @@ -281,14 +292,14 @@ export async function autoInstallSkillsFromTask(task: string, cwd: string): Prom .slice(0, MAX_SKILLS_TO_INSTALL); if (toInstall.length === 0) { - spinner.succeed('Relevant skills already installed'); - return []; + spinner.succeed('All relevant skills already installed'); + return relevantInstalled.map((s) => s.name); } spinner.stop(); - console.log(chalk.cyan('Installing recommended skills from skills.sh...')); + console.log(chalk.cyan('Installing complementary skills from skills.sh...')); - const installed: string[] = []; + const installed: string[] = relevantInstalled.map((s) => s.name); for (const candidate of toInstall) { console.log(chalk.dim(` • ${candidate.fullName}`)); const ok = await installSkill(candidate, true); @@ -297,10 +308,9 @@ export async function autoInstallSkillsFromTask(task: string, cwd: string): Prom } } - if (installed.length > 0) { - console.log(chalk.green(`Installed skills: ${installed.join(', ')}`)); - } else { - console.log(chalk.yellow('No skills were installed.')); + const newlyInstalled = installed.slice(relevantInstalled.length); + if (newlyInstalled.length > 0) { + console.log(chalk.green(`Installed: ${newlyInstalled.join(', ')}`)); } return installed; From 6a4fb62eac4a74052c2b0d731178e8d6e76b166f Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 19:12:36 +0000 Subject: [PATCH 21/46] fix(loop): dynamic iteration budget + ban dev server in loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dynamic iterations: - maxIterations now recalculates when agent expands the plan (e.g., spec has 3 tasks → agent creates 8 → budget adjusts to 11) - Fixes premature "max_iterations" exit on greenfield projects Ban dev server: - Preamble now explicitly says "NEVER start a dev server" and to use npm run build instead. Dev servers block forever, create zombie processes, and eat up ports (5173, 5174, 5175...) across iterations. Co-Authored-By: Claude Opus 4.6 --- src/loop/context-builder.ts | 4 ++-- src/loop/executor.ts | 25 +++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/loop/context-builder.ts b/src/loop/context-builder.ts index 71f3f5c..a64e9aa 100644 --- a/src/loop/context-builder.ts +++ b/src/loop/context-builder.ts @@ -175,7 +175,7 @@ Rules: - Don't assume functionality is not already implemented — search the codebase first - Implement completely — no placeholders or stubs - Create files before importing them — never import components or modules that don't exist yet -- After creating or modifying files, verify the project compiles by running the build or dev command +- After creating or modifying files, verify the project compiles by running the build command (e.g., \`npm run build\`). NEVER start a dev server (\`npm run dev\`, \`npx vite\`, etc.) — it blocks forever and wastes resources - When ALL tasks are complete, explicitly state "All tasks completed" - If you learn how to run/build the project, update AGENTS.md @@ -186,7 +186,7 @@ Technology gotchas (CRITICAL — follow these exactly): * CSS file must use: \`@import "tailwindcss";\` (NOT \`@tailwind base/components/utilities\` — those are v3 directives) * Do NOT create tailwind.config.js — Tailwind v4 uses CSS-based configuration - JSX: Never put unescaped quotes inside attribute strings. For SVG backgrounds or data URLs, use a CSS file or encodeURIComponent(). -- Run the dev server or build command to verify the project works before marking setup tasks complete. +- Run \`npm run build\` (NOT \`npm run dev\`) to verify the project compiles before marking setup tasks complete. Design quality (IMPORTANT — avoid generic AI aesthetics): - NEVER use purple-blue gradient backgrounds or gradient text — this is the #1 AI design tell diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 04f9a02..31c1420 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -32,7 +32,7 @@ import { RateLimiter } from './rate-limiter.js'; import { analyzeResponse, hasExitSignal } from './semantic-analyzer.js'; import { detectClaudeSkills, formatSkillsForPrompt } from './skills.js'; import { detectStepFromOutput } from './step-detector.js'; -import { getCurrentTask, parsePlanTasks } from './task-counter.js'; +import { getCurrentTask, MAX_ESTIMATED_ITERATIONS, parsePlanTasks } from './task-counter.js'; import { detectBuildCommands, detectValidationCommands, @@ -403,7 +403,7 @@ function summarizeChanges(output: string): string { export async function runLoop(options: LoopOptions): Promise { const spinner = ora(); - const maxIterations = options.maxIterations || 50; + let maxIterations = options.maxIterations || 50; const commits: string[] = []; const startTime = Date.now(); let validationFailures = 0; @@ -507,6 +507,7 @@ export async function runLoop(options: LoopOptions): Promise { // Track completed tasks to show progress diff between iterations let previousCompletedTasks = initialTaskCount.completed; + let previousTotalTasks = initialTaskCount.total; // Filesystem snapshot for git-independent change detection let previousSnapshot = await getFilesystemSnapshot(options.cwd); @@ -625,6 +626,26 @@ export async function runLoop(options: LoopOptions): Promise { } previousCompletedTasks = completedTasks; + // Dynamic iteration budget: if agent expanded the plan (added more tasks), + // recalculate maxIterations so we don't run out mid-project + if (totalTasks > previousTotalTasks && totalTasks > 0) { + const buffer = Math.max(3, Math.ceil(totalTasks * 0.3)); + const newMax = Math.min( + MAX_ESTIMATED_ITERATIONS, + Math.max(maxIterations, totalTasks + buffer) + ); + if (newMax > maxIterations) { + console.log( + chalk.dim( + ` Adjusting iterations: ${maxIterations} → ${newMax} (plan expanded to ${totalTasks} tasks)` + ) + ); + maxIterations = newMax; + finalIteration = maxIterations; + } + previousTotalTasks = totalTasks; + } + // Show loop header with task info const sourceIcon = getSourceIcon(options.sourceType); const headerLines: string[] = []; From fcc88f5b188dc61236bea1c0e4b44eff85b86dba Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 21:14:39 +0000 Subject: [PATCH 22/46] feat(fix): add fix command with design-aware skill detection and visual verification - Add `ralph-starter fix` command for autonomous build/design fixing - Fix skill re-installation bug: normalize skill IDs (spaces vs hyphens) for dedup - Fix design skills not applied: add CSS/visual keywords to task detection - Add visual verification instructions for design tasks (web-design-reviewer skill) - Tiered validation: lint on intermediate iterations, build on final iteration - Extend loop by 2 iterations when build fails on final iteration - Fix TOCTOU race condition in task-counter.ts (CodeQL alert #164) - Ban manual dev server in loop preamble (loop handles validation) - Export shared WEB_TASK_KEYWORDS to eliminate keyword list divergence Co-Authored-By: Claude Opus 4.6 --- src/cli.ts | 12 ++ src/commands/fix.ts | 236 ++++++++++++++++++++++++++++++++++++ src/loop/context-builder.ts | 4 +- src/loop/executor.ts | 159 +++++++++++++++--------- src/loop/skills.ts | 59 +++++++-- src/loop/task-counter.ts | 9 +- src/loop/validation.ts | 71 +++++++++++ src/skills/auto-install.ts | 54 ++++++--- 8 files changed, 513 insertions(+), 91 deletions(-) create mode 100644 src/commands/fix.ts diff --git a/src/cli.ts b/src/cli.ts index 8696969..c54d695 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -6,6 +6,7 @@ import { authCommand } from './commands/auth.js'; import { autoCommand } from './commands/auto.js'; import { checkCommand } from './commands/check.js'; import { configCommand } from './commands/config.js'; +import { fixCommand } from './commands/fix.js'; import { initCommand } from './commands/init.js'; import { integrationsCommand } from './commands/integrations.js'; import { pauseCommand } from './commands/pause.js'; @@ -104,6 +105,17 @@ program .option('--figma-mapping ', 'Custom content mapping file (content mode)') .action(runCommand); +// ralph-starter fix - Fix build errors and code quality issues +program + .command('fix [task]') + .description('Fix build errors and code quality issues (optional: describe what to fix)') + .option('--scan', 'Force full project scan (build + lint + typecheck + tests)') + .option('--agent ', 'Agent to use (default: auto-detect)') + .option('--commit', 'Auto-commit the fix') + .option('--max-iterations ', 'Max fix iterations (default: 3)') + .option('--output-dir ', 'Project directory (default: cwd)') + .action(fixCommand); + // ralph-starter init - Initialize Ralph in a project program .command('init') diff --git a/src/commands/fix.ts b/src/commands/fix.ts new file mode 100644 index 0000000..25baf69 --- /dev/null +++ b/src/commands/fix.ts @@ -0,0 +1,236 @@ +import { existsSync, readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import chalk from 'chalk'; +import ora from 'ora'; +import { type Agent, detectAvailableAgents, detectBestAgent } from '../loop/agents.js'; +import { runLoop } from '../loop/executor.js'; +import { + detectBuildCommands, + detectValidationCommands, + formatValidationFeedback, + runAllValidations, + type ValidationCommand, +} from '../loop/validation.js'; +import { autoInstallSkillsFromTask } from '../skills/auto-install.js'; + +interface FixOptions { + agent?: string; + commit?: boolean; + maxIterations?: string; + outputDir?: string; + scan?: boolean; +} + +/** + * Parse the last validation failure from .ralph/activity.md. + * Returns the names of commands that failed (e.g., ["npm run build"]). + */ +function parseLastFailedValidations(cwd: string): string[] { + const activityPath = join(cwd, '.ralph', 'activity.md'); + if (!existsSync(activityPath)) return []; + + const content = readFileSync(activityPath, 'utf-8'); + // Split into iteration blocks and find the last one with a validation failure + const blocks = content.split(/^### Iteration/m); + const lastFailed = blocks.reverse().find((b) => b.includes('Validation Failed')); + if (!lastFailed) return []; + + const failedNames: string[] = []; + for (const match of lastFailed.matchAll(/- ❌\s+(.+)/g)) { + failedNames.push(match[1].trim()); + } + return failedNames; +} + +export async function fixCommand(customTask: string | undefined, options: FixOptions) { + const cwd = options.outputDir || process.cwd(); + + // --- Step 1: Determine which commands to run --- + let commands: ValidationCommand[] | undefined; + let mode: 'activity' | 'scan' | 'custom' = 'scan'; + let feedback = ''; + + if (customTask) { + // Custom task provided — still run build to check for errors, but don't bail if clean + mode = 'custom'; + commands = detectBuildCommands(cwd); + } else if (!options.scan) { + const failedNames = parseLastFailedValidations(cwd); + if (failedNames.length > 0) { + mode = 'activity'; + const allCommands = detectValidationCommands(cwd); + commands = allCommands.filter((c) => failedNames.some((name) => name.includes(c.name))); + if (commands.length === 0) commands = detectBuildCommands(cwd); + } + } + + if (!commands || commands.length === 0) { + if (mode !== 'custom') mode = 'scan'; + commands = detectValidationCommands(cwd); + if (commands.length === 0) commands = detectBuildCommands(cwd); + } + + // Run validations if we have commands + if (commands.length > 0) { + const spinner = ora( + mode === 'custom' + ? 'Checking project health...' + : `Scanning project (${mode === 'activity' ? 'from last run' : 'full scan'})...` + ).start(); + + const results = await runAllValidations(cwd, commands); + const failures = results.filter((r) => !r.success); + + if (failures.length === 0 && !customTask) { + spinner.succeed(chalk.green('All checks passed — nothing to fix!')); + return; + } + + if (failures.length > 0) { + spinner.fail(chalk.red(`Found ${failures.length} issue(s):`)); + for (const f of failures) { + const errorText = f.error || f.output || ''; + const errorCount = (errorText.match(/error/gi) || []).length; + console.log(chalk.red(` ✗ ${f.command}${errorCount ? ` (${errorCount} errors)` : ''}`)); + } + feedback = formatValidationFeedback(results); + } else { + spinner.succeed(chalk.green('Build passing')); + } + console.log(); + } else if (!customTask) { + console.log(chalk.yellow('No build/lint/test commands detected in this project.')); + return; + } + + // --- Step 3: Detect agent --- + let agent: Agent | null = null; + + if (options.agent) { + const agents = await detectAvailableAgents(); + const found = agents.find( + (a) => a.type === options.agent || a.name.toLowerCase() === options.agent?.toLowerCase() + ); + if (!found) { + console.log(chalk.red(`Agent not found: ${options.agent}`)); + return; + } + if (!found.available) { + console.log(chalk.red(`Agent not available: ${found.name}`)); + return; + } + agent = found; + } else { + agent = await detectBestAgent(); + } + + if (!agent) { + console.log( + chalk.red( + 'No coding agent detected. Install Claude Code, Cursor, or another supported agent.' + ) + ); + return; + } + + console.log(chalk.cyan(`Using ${agent.name} to fix issues...\n`)); + + // --- Step 4: Build task and run fix loop --- + let fixTask: string; + if (customTask) { + fixTask = feedback + ? `${customTask}\n\nAlso fix any build/validation errors found during the scan.` + : customTask; + } else if (mode === 'activity') { + fixTask = + 'Fix the build/validation errors in this project. Study the error output below, identify the root cause, and implement the minimal fix. Do not refactor or make unnecessary changes.'; + } else { + fixTask = + 'Fix all project issues found by the scan below. Prioritize: build errors first, then type errors, then lint violations, then test failures. Make minimal, focused fixes.'; + } + + // For design/visual tasks, add instructions to visually verify with screenshots + const DESIGN_KEYWORDS = [ + 'css', + 'style', + 'styling', + 'padding', + 'margin', + 'spacing', + 'color', + 'colour', + 'background', + 'theme', + 'font', + 'typography', + 'border', + 'shadow', + 'layout', + 'responsive', + 'animation', + 'design', + 'ui', + 'ux', + 'brighter', + 'darker', + 'visual', + ]; + const isDesignTask = + customTask && DESIGN_KEYWORDS.some((kw) => customTask.toLowerCase().includes(kw)); + if (isDesignTask) { + fixTask += `\n\nVisual verification (IMPORTANT): +This is a visual/design task. After making your CSS and styling changes, you MUST visually verify the result: +1. Start a local dev server briefly (exception to the "no dev server" rule for visual checks) +2. Use the /web-design-reviewer skill to take browser screenshots at desktop and mobile viewports +3. Review the screenshots and fix any visual issues you spot (spacing, colors, alignment, contrast) +4. Stop the dev server when done verifying`; + } + + // Install relevant skills so the agent has design/quality context + await autoInstallSkillsFromTask(fixTask, cwd); + + const maxIter = options.maxIterations ? Number.parseInt(options.maxIterations, 10) : 3; + + const result = await runLoop({ + task: fixTask, + cwd, + agent, + maxIterations: maxIter, + auto: true, + commit: options.commit, + initialValidationFeedback: feedback || undefined, + trackProgress: true, + checkFileCompletion: false, + validate: mode === 'scan', + }); + + // --- Step 5: Verify fix by re-running validations --- + // The loop's exit reason may be max_iterations even if the build now passes. + // For the fix command, success = "do the checks pass now?", not "did the agent say done?" + let fixed = result.success; + + if (!fixed && commands.length > 0) { + const verifySpinner = ora('Verifying fix...').start(); + const verifyResults = await runAllValidations(cwd, commands); + const stillFailing = verifyResults.filter((r) => !r.success); + + if (stillFailing.length === 0) { + verifySpinner.succeed(chalk.green('All checks passing now!')); + fixed = true; + } else { + verifySpinner.fail(chalk.red(`${stillFailing.length} issue(s) still failing`)); + for (const f of stillFailing) { + console.log(chalk.red(` ✗ ${f.command}`)); + } + } + } + + // --- Step 6: Report --- + console.log(); + if (fixed) { + console.log(chalk.green('All issues fixed!')); + } else { + console.log(chalk.red('Could not fix all issues automatically.')); + console.log(chalk.dim(' Run again or fix remaining issues manually.')); + } +} diff --git a/src/loop/context-builder.ts b/src/loop/context-builder.ts index a64e9aa..e9b4861 100644 --- a/src/loop/context-builder.ts +++ b/src/loop/context-builder.ts @@ -175,7 +175,7 @@ Rules: - Don't assume functionality is not already implemented — search the codebase first - Implement completely — no placeholders or stubs - Create files before importing them — never import components or modules that don't exist yet -- After creating or modifying files, verify the project compiles by running the build command (e.g., \`npm run build\`). NEVER start a dev server (\`npm run dev\`, \`npx vite\`, etc.) — it blocks forever and wastes resources +- Do NOT run build or dev server commands yourself — the loop automatically runs lint checks between iterations and a full build on the final iteration. NEVER start a dev server (\`npm run dev\`, \`npx vite\`, etc.) — it blocks forever and wastes resources - When ALL tasks are complete, explicitly state "All tasks completed" - If you learn how to run/build the project, update AGENTS.md @@ -186,7 +186,7 @@ Technology gotchas (CRITICAL — follow these exactly): * CSS file must use: \`@import "tailwindcss";\` (NOT \`@tailwind base/components/utilities\` — those are v3 directives) * Do NOT create tailwind.config.js — Tailwind v4 uses CSS-based configuration - JSX: Never put unescaped quotes inside attribute strings. For SVG backgrounds or data URLs, use a CSS file or encodeURIComponent(). -- Run \`npm run build\` (NOT \`npm run dev\`) to verify the project compiles before marking setup tasks complete. +- Do NOT run \`npm run build\` or \`npm run dev\` manually — the loop handles validation automatically (lint between tasks, full build at the end). Design quality (IMPORTANT — avoid generic AI aesthetics): - NEVER use purple-blue gradient backgrounds or gradient text — this is the #1 AI design tell diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 31c1420..d38fd8a 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -35,10 +35,12 @@ import { detectStepFromOutput } from './step-detector.js'; import { getCurrentTask, MAX_ESTIMATED_ITERATIONS, parsePlanTasks } from './task-counter.js'; import { detectBuildCommands, + detectLintCommands, detectValidationCommands, formatValidationFeedback, runAllValidations, runBuildValidation, + runLintValidation, type ValidationResult, } from './validation.js'; @@ -233,6 +235,7 @@ export interface LoopOptions { validationWarmup?: number; // Skip validation until N tasks completed (for greenfield builds) maxCost?: number; // Maximum cost in USD before stopping (0 = unlimited) agentTimeout?: number; // Agent timeout in milliseconds (default: 300000 = 5 min) + initialValidationFeedback?: string; // Pre-populate with errors (used by `fix` command) } export interface LoopResult { @@ -439,6 +442,8 @@ export async function runLoop(options: LoopOptions): Promise { // Always-on build validation (not gated by --validate flag) // Re-detected inside the loop for greenfield projects where package.json appears mid-loop let buildCommands = detectBuildCommands(options.cwd); + // Lightweight lint for intermediate iterations (build only runs on final iteration) + let lintCommands = detectLintCommands(options.cwd); // Detect Claude Code skills const detectedSkills = detectClaudeSkills(options.cwd); @@ -449,7 +454,8 @@ export async function runLoop(options: LoopOptions): Promise { } // Track validation feedback separately — don't mutate taskWithSkills - let lastValidationFeedback = ''; + // initialValidationFeedback lets the `fix` command pre-populate errors for iteration 1 + let lastValidationFeedback = options.initialValidationFeedback || ''; // Completion detection options const completionOptions: CompletionOptions = { @@ -509,6 +515,11 @@ export async function runLoop(options: LoopOptions): Promise { let previousCompletedTasks = initialTaskCount.completed; let previousTotalTasks = initialTaskCount.total; + // Track whether we've already extended the loop for build-fix retries + // When the build fails on the "final" iteration, we grant 2 extra iterations to fix it (once) + let buildFixExtended = false; + const BUILD_FIX_EXTRA_ITERATIONS = 2; + // Filesystem snapshot for git-independent change detection let previousSnapshot = await getFilesystemSnapshot(options.cwd); @@ -556,7 +567,8 @@ export async function runLoop(options: LoopOptions): Promise { } // Check for file-based completion signals - if (options.checkFileCompletion) { + // Skip if validation just failed — the agent needs a chance to fix build errors first + if (options.checkFileCompletion && !lastValidationFeedback) { const fileCompletion = await checkFileBasedCompletion(options.cwd); if (fileCompletion.completed) { spinner.succeed(chalk.green(`File-based completion: ${fileCompletion.reason}`)); @@ -917,8 +929,8 @@ export async function runLoop(options: LoopOptions): Promise { }; } - // --- Always-on build validation --- - // Re-detect build commands if none found yet (greenfield: package.json may appear mid-loop) + // --- Tiered validation: lint on intermediate iterations, build on final --- + // Re-detect commands if none found yet (greenfield: package.json may appear mid-loop) if (buildCommands.length === 0) { buildCommands = detectBuildCommands(options.cwd); if (buildCommands.length > 0 && process.env.RALPH_DEBUG) { @@ -927,72 +939,111 @@ export async function runLoop(options: LoopOptions): Promise { ); } } + if (lintCommands.length === 0) { + lintCommands = detectLintCommands(options.cwd); + } - // Run build validation if commands available and not already covered by full validation const buildCoveredByFullValidation = options.validate && validationCommands.some((vc) => vc.name === 'build' || vc.name === 'typecheck'); - if (buildCommands.length > 0 && !buildCoveredByFullValidation && i > 1) { - spinner.start(chalk.yellow(`Loop ${i}: Running build check...`)); - - const buildResults: ValidationResult[] = []; - for (const cmd of buildCommands) { - buildResults.push(await runBuildValidation(options.cwd, cmd)); + // Determine if this is a "final" iteration where the full build should run: + // - Last allowed iteration, OR all plan tasks are complete + const preValidationTaskInfo = parsePlanTasks(options.cwd); + const isFinalIteration = i === maxIterations || preValidationTaskInfo.pending === 0; + + if (!buildCoveredByFullValidation && i > 1) { + const checkResults: ValidationResult[] = []; + let checkLabel = ''; + + if (isFinalIteration && buildCommands.length > 0) { + // Final iteration: run full build validation (catches compile errors) + checkLabel = 'build'; + spinner.start(chalk.yellow(`Loop ${i}: Running build check...`)); + for (const cmd of buildCommands) { + checkResults.push(await runBuildValidation(options.cwd, cmd)); + } + } else if (!isFinalIteration && lintCommands.length > 0) { + // Intermediate iteration: run lightweight lint check (fast feedback) + checkLabel = 'lint'; + spinner.start(chalk.yellow(`Loop ${i}: Running lint check...`)); + for (const cmd of lintCommands) { + checkResults.push(await runLintValidation(options.cwd, cmd)); + } } - const allBuildsPassed = buildResults.every((r) => r.success); - if (!allBuildsPassed) { - validationFailures++; - const feedback = formatValidationFeedback(buildResults); - spinner.fail(chalk.red(`Loop ${i}: Build check failed`)); - - const failedSummaries: string[] = []; - for (const vr of buildResults) { - if (!vr.success) { - const errorText = vr.error || vr.output || ''; - const errorCount = (errorText.match(/error/gi) || []).length; - const hint = errorCount > 0 ? `${errorCount} errors` : 'failed'; - failedSummaries.push(`${vr.command} (${hint})`); + if (checkResults.length > 0) { + const allPassed = checkResults.every((r) => r.success); + + if (!allPassed) { + validationFailures++; + const feedback = formatValidationFeedback(checkResults); + spinner.fail( + chalk.red(`Loop ${i}: ${checkLabel === 'build' ? 'Build' : 'Lint'} check failed`) + ); + + const failedSummaries: string[] = []; + for (const vr of checkResults) { + if (!vr.success) { + const errorText = vr.error || vr.output || ''; + const errorCount = (errorText.match(/error/gi) || []).length; + const hint = errorCount > 0 ? `${errorCount} errors` : 'failed'; + failedSummaries.push(`${vr.command} (${hint})`); + } + } + console.log(chalk.red(` ✗ ${failedSummaries.join(' │ ')}`)); + + const errorMsg = checkResults + .filter((r) => !r.success) + .map((r) => r.error?.slice(0, 200) || r.output?.slice(0, 200) || r.command) + .join('\n'); + const tripped = circuitBreaker.recordFailure(errorMsg); + + if (tripped) { + const reason = circuitBreaker.getTripReason(); + console.log(chalk.red(`Circuit breaker tripped: ${reason}`)); + if (progressTracker && progressEntry) { + progressEntry.status = 'failed'; + progressEntry.summary = `Circuit breaker tripped (${checkLabel}): ${reason}`; + progressEntry.validationResults = checkResults; + progressEntry.duration = Date.now() - iterationStart; + await progressTracker.appendEntry(progressEntry); + } + finalIteration = i; + exitReason = 'circuit_breaker'; + break; } - } - console.log(chalk.red(` ✗ ${failedSummaries.join(' │ ')}`)); - - const errorMsg = buildResults - .filter((r) => !r.success) - .map((r) => r.error?.slice(0, 200) || r.output?.slice(0, 200) || r.command) - .join('\n'); - const tripped = circuitBreaker.recordFailure(errorMsg); - if (tripped) { - const reason = circuitBreaker.getTripReason(); - console.log(chalk.red(`Circuit breaker tripped: ${reason}`)); if (progressTracker && progressEntry) { - progressEntry.status = 'failed'; - progressEntry.summary = `Circuit breaker tripped (build): ${reason}`; - progressEntry.validationResults = buildResults; + progressEntry.status = 'validation_failed'; + progressEntry.summary = `${checkLabel === 'build' ? 'Build' : 'Lint'} check failed`; + progressEntry.validationResults = checkResults; progressEntry.duration = Date.now() - iterationStart; await progressTracker.appendEntry(progressEntry); } - finalIteration = i; - exitReason = 'circuit_breaker'; - break; - } - if (progressTracker && progressEntry) { - progressEntry.status = 'validation_failed'; - progressEntry.summary = 'Build check failed'; - progressEntry.validationResults = buildResults; - progressEntry.duration = Date.now() - iterationStart; - await progressTracker.appendEntry(progressEntry); - } + // If build failed on the final iteration, extend the loop to let the agent fix it + if (checkLabel === 'build' && isFinalIteration && !buildFixExtended) { + const newMax = maxIterations + BUILD_FIX_EXTRA_ITERATIONS; + console.log( + chalk.yellow( + ` Extending loop by ${BUILD_FIX_EXTRA_ITERATIONS} iterations to fix build errors (${maxIterations} → ${newMax})` + ) + ); + maxIterations = newMax; + finalIteration = maxIterations; + buildFixExtended = true; + } - // Pass build feedback to context builder for next iteration - // (don't mutate taskWithSkills — that defeats context trimming) - lastValidationFeedback = feedback; - continue; // Go to next iteration to fix build issues + lastValidationFeedback = feedback; + continue; + } + spinner.succeed( + chalk.green(`Loop ${i}: ${checkLabel === 'build' ? 'Build' : 'Lint'} check passed`) + ); + circuitBreaker.recordSuccess(); + lastValidationFeedback = ''; } - spinner.succeed(chalk.green(`Loop ${i}: Build check passed`)); } // Run full validation (backpressure) if enabled and there are changes diff --git a/src/loop/skills.ts b/src/loop/skills.ts index 56f3b60..4662907 100644 --- a/src/loop/skills.ts +++ b/src/loop/skills.ts @@ -9,6 +9,51 @@ export interface ClaudeSkill { source: 'global' | 'project' | 'agents' | 'skills.sh'; } +/** Keywords that indicate the task involves web/UI/design work */ +export const WEB_TASK_KEYWORDS = [ + // Page/app types + 'web', + 'website', + 'landing', + 'frontend', + 'page', + 'dashboard', + 'app', + 'component', + 'shop', + 'store', + // UI/UX terms + 'ui', + 'ux', + // CSS/visual properties (detect styling tasks) + 'css', + 'style', + 'styling', + 'layout', + 'padding', + 'margin', + 'spacing', + 'color', + 'colour', + 'background', + 'theme', + 'dark mode', + 'font', + 'typography', + 'border', + 'shadow', + 'radius', + 'responsive', + 'breakpoint', + 'animation', + 'transition', + 'grid', + 'flex', + 'flexbox', + 'align', + 'position', +]; + /** * Parse YAML frontmatter from markdown content * Returns name and description if found @@ -251,19 +296,7 @@ function shouldAutoApplySkill(skill: ClaudeSkill, task: string): boolean { const text = `${name} ${desc}`; const taskLower = task.toLowerCase(); - const taskIsWeb = - taskLower.includes('web') || - taskLower.includes('website') || - taskLower.includes('landing') || - taskLower.includes('frontend') || - taskLower.includes('ui') || - taskLower.includes('ux') || - taskLower.includes('page') || - taskLower.includes('dashboard') || - taskLower.includes('app') || - taskLower.includes('component') || - taskLower.includes('shop') || - taskLower.includes('store'); + const taskIsWeb = WEB_TASK_KEYWORDS.some((kw) => taskLower.includes(kw)); const isDesignSkill = text.includes('design') || diff --git a/src/loop/task-counter.ts b/src/loop/task-counter.ts index 35a05ad..29898e5 100644 --- a/src/loop/task-counter.ts +++ b/src/loop/task-counter.ts @@ -1,4 +1,4 @@ -import { existsSync, readFileSync, statSync } from 'node:fs'; +import { readFileSync, statSync } from 'node:fs'; import { join } from 'node:path'; /** Maximum iterations for estimated calculations */ @@ -42,11 +42,6 @@ export interface TaskCount { export function parsePlanTasks(cwd: string): TaskCount { const planPath = join(cwd, 'IMPLEMENTATION_PLAN.md'); - if (!existsSync(planPath)) { - _planCache = null; - return { total: 0, completed: 0, pending: 0, tasks: [] }; - } - // Return cached result if file hasn't changed (avoids redundant reads within same iteration) let preMtime = 0; try { @@ -55,7 +50,7 @@ export function parsePlanTasks(cwd: string): TaskCount { return cloneTaskCount(_planCache.result); } } catch { - // stat failed — fall through to full parse + // stat failed (file may not exist) — fall through to read attempt } try { diff --git a/src/loop/validation.ts b/src/loop/validation.ts index fe6fd50..1738294 100644 --- a/src/loop/validation.ts +++ b/src/loop/validation.ts @@ -96,6 +96,77 @@ export function detectValidationCommands(cwd: string): ValidationCommand[] { return commands; } +/** + * Detect lint-only commands for lightweight intermediate-iteration checks. + * Much faster than build (5-15s vs 30-60s), good for catching syntax errors mid-loop. + * Returns empty array if no lint command is available — caller should skip validation. + */ +export function detectLintCommands(cwd: string): ValidationCommand[] { + const commands: ValidationCommand[] = []; + + // Check AGENTS.md for lint command + const agentsPath = join(cwd, 'AGENTS.md'); + if (existsSync(agentsPath)) { + const content = readFileSync(agentsPath, 'utf-8'); + const lintMatch = content.match(/[-*]\s*\*?\*?lint\*?\*?[:\s]+`([^`]+)`/i); + if (lintMatch) { + const parts = lintMatch[1].trim().split(/\s+/); + commands.push({ name: 'lint', command: parts[0], args: parts.slice(1) }); + } + } + + // Fallback to package.json + if (commands.length === 0) { + const packagePath = join(cwd, 'package.json'); + if (existsSync(packagePath)) { + try { + const pkg = JSON.parse(readFileSync(packagePath, 'utf-8')); + const scripts = pkg.scripts || {}; + const pm = detectPackageManager(cwd); + + if (scripts.lint) { + const cmd = getRunCommand(pm, 'lint'); + commands.push({ name: 'lint', ...cmd }); + } + } catch { + // Invalid package.json + } + } + } + + return commands; +} + +/** + * Run a lint validation command with a short timeout (lint is fast). + */ +export async function runLintValidation( + cwd: string, + command: ValidationCommand +): Promise { + try { + const result = await execa(command.command, command.args, { + cwd, + timeout: 60000, // 1 minute timeout (lint is fast) + reject: false, + }); + + return { + success: result.exitCode === 0, + command: `${command.command} ${command.args.join(' ')}`, + output: result.stdout, + ...(result.exitCode !== 0 && { error: result.stderr || result.stdout }), + }; + } catch (error) { + return { + success: false, + command: `${command.command} ${command.args.join(' ')}`, + output: '', + error: error instanceof Error ? error.message : 'Unknown error', + }; + } +} + /** * Detect build-only commands for always-on build validation. * Unlike detectValidationCommands(), this: diff --git a/src/skills/auto-install.ts b/src/skills/auto-install.ts index 4b1b0c5..764de99 100644 --- a/src/skills/auto-install.ts +++ b/src/skills/auto-install.ts @@ -1,7 +1,7 @@ import chalk from 'chalk'; import ora from 'ora'; import { POPULAR_SKILLS } from '../commands/skill.js'; -import { type ClaudeSkill, detectClaudeSkills } from '../loop/skills.js'; +import { type ClaudeSkill, detectClaudeSkills, WEB_TASK_KEYWORDS } from '../loop/skills.js'; export interface SkillCandidate { fullName: string; // owner/repo@skill @@ -12,6 +12,15 @@ export interface SkillCandidate { } const MAX_SKILLS_TO_INSTALL = 5; + +/** + * Normalize a skill identifier for comparison. + * Handles mismatches between YAML frontmatter names (may use spaces/caps) + * and skills.sh API skillIds (always hyphenated lowercase). + */ +function normalizeSkillId(name: string): string { + return name.toLowerCase().replace(/[\s_]+/g, '-'); +} const SKILLS_API_URL = 'https://skills.sh/api/search'; const SKILLS_CLI = 'skills'; @@ -68,6 +77,33 @@ function buildSkillQueries(task: string): string[] { queries.add('ui design'); } + // CSS/styling tasks get design skills + const cssKeywords = [ + 'css', + 'style', + 'styling', + 'padding', + 'margin', + 'spacing', + 'color', + 'colour', + 'background', + 'theme', + 'font', + 'typography', + 'border', + 'shadow', + 'layout', + 'responsive', + 'animation', + 'grid', + 'flex', + ]; + if (cssKeywords.some((kw) => text.includes(kw))) { + queries.add('frontend design'); + queries.add('ui design'); + } + if (queries.size === 0) { queries.add('web design'); } @@ -85,19 +121,7 @@ function isSkillRelevantToTask(skill: ClaudeSkill, task: string): boolean { const text = `${name} ${desc}`; const taskLower = task.toLowerCase(); - const taskIsWeb = - taskLower.includes('web') || - taskLower.includes('website') || - taskLower.includes('landing') || - taskLower.includes('frontend') || - taskLower.includes('ui') || - taskLower.includes('ux') || - taskLower.includes('page') || - taskLower.includes('dashboard') || - taskLower.includes('app') || - taskLower.includes('component') || - taskLower.includes('shop') || - taskLower.includes('store'); + const taskIsWeb = WEB_TASK_KEYWORDS.some((kw) => taskLower.includes(kw)); const isDesignSkill = text.includes('design') || @@ -287,7 +311,7 @@ export async function autoInstallSkillsFromTask(task: string, cwd: string): Prom .filter((candidate) => !isCandidateIrrelevant(candidate, task)) .filter( (candidate) => - !installedSkills.some((s) => s.name.toLowerCase() === candidate.skill.toLowerCase()) + !installedSkills.some((s) => normalizeSkillId(s.name) === normalizeSkillId(candidate.skill)) ) .slice(0, MAX_SKILLS_TO_INSTALL); From f3d3d5a872200b6e654bddfc641dfa4b3e21439c Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 21:18:07 +0000 Subject: [PATCH 23/46] docs: add fix command to README, CLI docs, and llms.txt Co-Authored-By: Claude Opus 4.6 --- README.md | 26 +++++++++ docs/docs/cli/fix.md | 115 ++++++++++++++++++++++++++++++++++++++ docs/sidebars.ts | 1 + docs/static/llms-full.txt | 94 +++++++++++++++++++++++++++++++ docs/static/llms.txt | 1 + 5 files changed, 237 insertions(+) create mode 100644 docs/docs/cli/fix.md diff --git a/README.md b/README.md index fa19b73..3852a9c 100644 --- a/README.md +++ b/README.md @@ -451,6 +451,7 @@ This creates: |---------|-------------| | `ralph-starter` | Launch interactive wizard | | `ralph-starter run [task]` | Run an autonomous coding loop | +| `ralph-starter fix [task]` | Fix build errors, lint issues, or design problems | | `ralph-starter auto` | Batch-process issues from GitHub/Linear | | `ralph-starter integrations ` | Manage integrations (list, help, test, fetch) | | `ralph-starter plan` | Create implementation plan from specs | @@ -561,6 +562,31 @@ ralph-starter run --circuit-breaker-failures 2 "build Y" | `--output-dir ` | Directory to run task in (skips prompt) | | `--prd ` | Read tasks from markdown | +## Options for `fix` + +| Flag | Description | +|------|-------------| +| `--scan` | Force full project scan (build + lint + typecheck + tests) | +| `--agent ` | Specify agent to use (default: auto-detect) | +| `--commit` | Auto-commit the fix | +| `--max-iterations ` | Max fix iterations (default: 3) | +| `--output-dir ` | Project directory (default: cwd) | + +```bash +# Fix build/lint errors automatically +ralph-starter fix + +# Fix a specific design/visual issue +ralph-starter fix "fix the paddings and make the colors brighter" + +# Full scan with auto-commit +ralph-starter fix --scan --commit +``` + +For design-related tasks (CSS, colors, spacing, etc.), the fix command automatically: +- Detects and applies installed design skills +- Instructs the agent to visually verify changes via browser screenshots + ## Config Commands ```bash diff --git a/docs/docs/cli/fix.md b/docs/docs/cli/fix.md new file mode 100644 index 0000000..aa84586 --- /dev/null +++ b/docs/docs/cli/fix.md @@ -0,0 +1,115 @@ +--- +sidebar_position: 2 +title: fix +description: Fix build errors, lint issues, or design problems +keywords: [cli, fix, command, build errors, lint, design] +--- + +# ralph-starter fix + +Fix build errors, lint issues, or design problems. + +## Synopsis + +```bash +ralph-starter fix [task] [options] +``` + +## Description + +The `fix` command runs a focused AI loop to fix project issues. It scans for build, lint, typecheck, and test failures, then orchestrates a coding agent to fix them automatically. + +When given a custom task describing a visual or design problem (e.g., "fix the paddings and make the colors brighter"), the fix command detects CSS/design keywords and: + +- Auto-applies installed design skills (frontend-design, ui-ux-designer, etc.) +- Instructs the agent to visually verify changes using the `/web-design-reviewer` skill with browser screenshots + +## Arguments + +| Argument | Description | +|----------|-------------| +| `task` | Optional description of what to fix. If not provided, scans for build/lint errors. | + +## Options + +| Option | Description | Default | +|--------|-------------|---------| +| `--scan` | Force full project scan (build + lint + typecheck + tests) | false | +| `--agent ` | Specify agent (claude-code, cursor, codex, opencode) | auto-detect | +| `--commit` | Auto-commit the fix | false | +| `--max-iterations ` | Maximum fix iterations | 3 | +| `--output-dir ` | Project directory | cwd | + +## Examples + +### Fix Build Errors + +```bash +# Auto-detect and fix build/lint errors +ralph-starter fix + +# Force full project scan +ralph-starter fix --scan +``` + +### Fix Design Issues + +```bash +# Fix visual/CSS problems +ralph-starter fix "fix the paddings and make the colors brighter" + +# Fix responsive layout +ralph-starter fix "make the layout responsive on mobile" + +# Fix color theme +ralph-starter fix "change the color scheme to darker tones" +``` + +### With Options + +```bash +# Auto-commit the fix +ralph-starter fix --scan --commit + +# Use a specific agent +ralph-starter fix "fix lint errors" --agent claude-code + +# Allow more iterations for complex fixes +ralph-starter fix "fix all test failures" --max-iterations 5 +``` + +## Behavior + +1. **Error Detection**: + - If `task` provided → runs build check for baseline, then fixes the described issue + - If no task and previous failures exist → re-runs failed validations from `.ralph/activity.md` + - If `--scan` → runs full validation suite (build + lint + typecheck + tests) + +2. **Skill Detection**: + - Detects installed Claude Code skills relevant to the task + - For CSS/design tasks → auto-applies design skills and adds visual verification instructions + - Searches skills.sh for complementary skills if needed + +3. **Fix Loop**: + - Agent works on fixing issues (default: 3 iterations) + - Lint checks run between iterations (fast feedback) + - Full build check runs on final iteration + - If build fails on final iteration → extends loop by 2 extra iterations + +4. **Verification**: + - Re-runs original validation commands after the loop + - Reports success only if all checks pass (not just agent completion) + +## Exit Codes + +| Code | Description | +|------|-------------| +| 0 | All issues fixed | +| 1 | Could not fix all issues automatically | + +## See Also + +- [ralph-starter run](/docs/cli/run) +- [ralph-starter skill](/docs/cli/skill) +- [Validation](/docs/advanced/validation) +- [Skills System](/docs/guides/skills-system) diff --git a/docs/sidebars.ts b/docs/sidebars.ts index 93d1de1..2f1b4f3 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -40,6 +40,7 @@ const sidebars: SidebarsConfig = { collapsed: true, items: [ 'cli/run', + 'cli/fix', 'cli/init', 'cli/plan', 'cli/config', diff --git a/docs/static/llms-full.txt b/docs/static/llms-full.txt index 53e7f6b..d74ecd2 100644 --- a/docs/static/llms-full.txt +++ b/docs/static/llms-full.txt @@ -1523,6 +1523,100 @@ Checking ralph-starter configuration... --- +## fix + +# ralph-starter fix + +Fix build errors, lint issues, or design problems. + +## Synopsis + +```bash +ralph-starter fix [task] [options] +``` + +## Description + +The `fix` command runs a focused AI loop to fix project issues. It scans for build, lint, typecheck, and test failures, then orchestrates a coding agent to fix them automatically. + +When given a custom task describing a visual or design problem (e.g., "fix the paddings and make the colors brighter"), the fix command detects CSS/design keywords and: + +- Auto-applies installed design skills (frontend-design, ui-ux-designer, etc.) +- Instructs the agent to visually verify changes using the `/web-design-reviewer` skill with browser screenshots + +## Arguments + +| Argument | Description | +|----------|-------------| +| `task` | Optional description of what to fix. If not provided, scans for build/lint errors. | + +## Options + +| Option | Description | Default | +|--------|-------------|---------| +| `--scan` | Force full project scan (build + lint + typecheck + tests) | false | +| `--agent ` | Specify agent (claude-code, cursor, codex, opencode) | auto-detect | +| `--commit` | Auto-commit the fix | false | +| `--max-iterations ` | Maximum fix iterations | 3 | +| `--output-dir ` | Project directory | cwd | + +## Examples + +### Fix Build Errors + +```bash +# Auto-detect and fix build/lint errors +ralph-starter fix + +# Force full project scan +ralph-starter fix --scan +``` + +### Fix Design Issues + +```bash +# Fix visual/CSS problems +ralph-starter fix "fix the paddings and make the colors brighter" + +# Fix responsive layout +ralph-starter fix "make the layout responsive on mobile" +``` + +### With Options + +```bash +# Auto-commit the fix +ralph-starter fix --scan --commit + +# Use a specific agent +ralph-starter fix "fix lint errors" --agent claude-code + +# Allow more iterations for complex fixes +ralph-starter fix "fix all test failures" --max-iterations 5 +``` + +## Behavior + +1. **Error Detection**: Scans for build/lint/test failures or uses the custom task description. +2. **Skill Detection**: Auto-applies installed design skills for CSS/visual tasks. Adds visual verification instructions. +3. **Fix Loop**: Agent works on fixing issues (default: 3 iterations). Lint checks run between iterations; full build on final iteration. +4. **Verification**: Re-runs validation commands after the loop. Reports success only if all checks pass. + +## Exit Codes + +| Code | Description | +|------|-------------| +| 0 | All issues fixed | +| 1 | Could not fix all issues automatically | + +## See Also + +- [ralph-starter run](/docs/cli/run) +- [ralph-starter skill](/docs/cli/skill) +- [Validation](/docs/advanced/validation) + +--- + ## config # ralph-starter config diff --git a/docs/static/llms.txt b/docs/static/llms.txt index 1ce9198..1f5973c 100644 --- a/docs/static/llms.txt +++ b/docs/static/llms.txt @@ -14,6 +14,7 @@ This file contains links to documentation sections following the llmstxt.org sta - [auth](https://ralphstarter.ai/docs/cli/auth): Browser-based OAuth authentication for integrations - [auto](https://ralphstarter.ai/docs/cli/auto): Autonomous batch task processing from GitHub and Linear - [check](https://ralphstarter.ai/docs/cli/check): Validate configuration and test LLM connection +- [fix](https://ralphstarter.ai/docs/cli/fix): Fix build errors, lint issues, or design problems - [config](https://ralphstarter.ai/docs/cli/config): Manage source configuration and credentials - [init](https://ralphstarter.ai/docs/cli/init): Initialize Ralph Playbook in a project - [integrations](https://ralphstarter.ai/docs/cli/integrations): Manage, test, and fetch data from integrations From 65e77729ff28a560e81a762f826addf88165c665 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 21:20:08 +0000 Subject: [PATCH 24/46] docs: regenerate SEO/AEO artifacts from docusaurus build Rebuild docs and sync auto-generated files (llms.txt, llms-full.txt, docs.json, ai-index.json, sidebar.json, sitemap.xml, docs-urls.txt) to include the new fix command documentation. Co-Authored-By: Claude Opus 4.6 --- docs/static/ai-index.json | 22 +++- docs/static/docs-urls.txt | 1 + docs/static/docs.json | 20 +++- docs/static/llms-full.txt | 223 ++++++++++++++++++++++---------------- docs/static/llms.txt | 2 +- docs/static/sidebar.json | 16 ++- docs/static/sitemap.xml | 2 +- 7 files changed, 181 insertions(+), 105 deletions(-) diff --git a/docs/static/ai-index.json b/docs/static/ai-index.json index af31353..78648b4 100644 --- a/docs/static/ai-index.json +++ b/docs/static/ai-index.json @@ -5,7 +5,7 @@ "description": "Connect your tools like GitHub, Linear, and Notion. Fetch specs from anywhere and let AI coding agents build production-ready code automatically with autonomous loops.", "url": "https://ralphstarter.ai", "version": "1.0.0", - "generatedAt": "2026-02-09T23:59:04.120Z", + "generatedAt": "2026-02-13T21:19:43.300Z", "summary": { "purpose": "ralph-starter is an AI-powered CLI tool that fetches specs from tools like Figma, GitHub, Linear, and Notion, then runs autonomous AI coding loops to build production-ready code.", "primaryUseCase": "Developers use ralph-starter to automate coding tasks by providing natural language specs or importing requirements from external tools.", @@ -144,7 +144,7 @@ }, { "name": "Cli", - "documentCount": 13, + "documentCount": 14, "documents": [ { "title": "auth", @@ -206,6 +206,20 @@ "credentials" ] }, + { + "title": "fix", + "description": "Fix build errors, lint issues, or design problems", + "url": "https://ralphstarter.ai/docs/cli/fix", + "markdownUrl": "https://ralphstarter.ai/docs/cli/fix.md", + "keywords": [ + "cli", + "fix", + "command", + "build errors", + "lint", + "design" + ] + }, { "title": "init", "description": "Initialize Ralph Playbook in a project", @@ -639,8 +653,8 @@ "sitemap": "https://ralphstarter.ai/sitemap.xml" }, "stats": { - "totalDocuments": 39, + "totalDocuments": 40, "totalCategories": 8, - "lastUpdated": "2026-02-09T23:59:04.120Z" + "lastUpdated": "2026-02-13T21:19:43.300Z" } } \ No newline at end of file diff --git a/docs/static/docs-urls.txt b/docs/static/docs-urls.txt index f5d6207..58be194 100644 --- a/docs/static/docs-urls.txt +++ b/docs/static/docs-urls.txt @@ -7,6 +7,7 @@ https://ralphstarter.ai/docs/cli/auth.md https://ralphstarter.ai/docs/cli/auto.md https://ralphstarter.ai/docs/cli/check.md https://ralphstarter.ai/docs/cli/config.md +https://ralphstarter.ai/docs/cli/fix.md https://ralphstarter.ai/docs/cli/init.md https://ralphstarter.ai/docs/cli/integrations.md https://ralphstarter.ai/docs/cli/plan.md diff --git a/docs/static/docs.json b/docs/static/docs.json index 0b86d4b..c881da0 100644 --- a/docs/static/docs.json +++ b/docs/static/docs.json @@ -2,8 +2,8 @@ "name": "ralph-starter - AI-Powered Autonomous Coding from Specs to Production", "description": "Connect your tools like GitHub, Linear, and Notion. Fetch specs from anywhere and let AI coding agents build production-ready code automatically with autonomous loops.", "baseUrl": "https://ralphstarter.ai", - "generatedAt": "2026-02-09T23:59:04.119Z", - "totalDocs": 39, + "generatedAt": "2026-02-13T21:19:43.299Z", + "totalDocs": 40, "access": { "llmsTxt": "https://ralphstarter.ai/llms.txt", "llmsFullTxt": "https://ralphstarter.ai/llms-full.txt", @@ -155,6 +155,22 @@ "credentials" ] }, + { + "title": "fix", + "description": "Fix build errors, lint issues, or design problems", + "path": "/docs/cli/fix", + "markdownUrl": "https://ralphstarter.ai/docs/cli/fix.md", + "htmlUrl": "https://ralphstarter.ai/docs/cli/fix", + "category": "Cli", + "keywords": [ + "cli", + "fix", + "command", + "build errors", + "lint", + "design" + ] + }, { "title": "init", "description": "Initialize Ralph Playbook in a project", diff --git a/docs/static/llms-full.txt b/docs/static/llms-full.txt index d74ecd2..c5181f4 100644 --- a/docs/static/llms-full.txt +++ b/docs/static/llms-full.txt @@ -1523,100 +1523,6 @@ Checking ralph-starter configuration... --- -## fix - -# ralph-starter fix - -Fix build errors, lint issues, or design problems. - -## Synopsis - -```bash -ralph-starter fix [task] [options] -``` - -## Description - -The `fix` command runs a focused AI loop to fix project issues. It scans for build, lint, typecheck, and test failures, then orchestrates a coding agent to fix them automatically. - -When given a custom task describing a visual or design problem (e.g., "fix the paddings and make the colors brighter"), the fix command detects CSS/design keywords and: - -- Auto-applies installed design skills (frontend-design, ui-ux-designer, etc.) -- Instructs the agent to visually verify changes using the `/web-design-reviewer` skill with browser screenshots - -## Arguments - -| Argument | Description | -|----------|-------------| -| `task` | Optional description of what to fix. If not provided, scans for build/lint errors. | - -## Options - -| Option | Description | Default | -|--------|-------------|---------| -| `--scan` | Force full project scan (build + lint + typecheck + tests) | false | -| `--agent ` | Specify agent (claude-code, cursor, codex, opencode) | auto-detect | -| `--commit` | Auto-commit the fix | false | -| `--max-iterations ` | Maximum fix iterations | 3 | -| `--output-dir ` | Project directory | cwd | - -## Examples - -### Fix Build Errors - -```bash -# Auto-detect and fix build/lint errors -ralph-starter fix - -# Force full project scan -ralph-starter fix --scan -``` - -### Fix Design Issues - -```bash -# Fix visual/CSS problems -ralph-starter fix "fix the paddings and make the colors brighter" - -# Fix responsive layout -ralph-starter fix "make the layout responsive on mobile" -``` - -### With Options - -```bash -# Auto-commit the fix -ralph-starter fix --scan --commit - -# Use a specific agent -ralph-starter fix "fix lint errors" --agent claude-code - -# Allow more iterations for complex fixes -ralph-starter fix "fix all test failures" --max-iterations 5 -``` - -## Behavior - -1. **Error Detection**: Scans for build/lint/test failures or uses the custom task description. -2. **Skill Detection**: Auto-applies installed design skills for CSS/visual tasks. Adds visual verification instructions. -3. **Fix Loop**: Agent works on fixing issues (default: 3 iterations). Lint checks run between iterations; full build on final iteration. -4. **Verification**: Re-runs validation commands after the loop. Reports success only if all checks pass. - -## Exit Codes - -| Code | Description | -|------|-------------| -| 0 | All issues fixed | -| 1 | Could not fix all issues automatically | - -## See Also - -- [ralph-starter run](/docs/cli/run) -- [ralph-starter skill](/docs/cli/skill) -- [Validation](/docs/advanced/validation) - ---- - ## config # ralph-starter config @@ -1765,6 +1671,119 @@ These environment variables override config file values: --- +## fix + +# ralph-starter fix + +Fix build errors, lint issues, or design problems. + +## Synopsis + +```bash +ralph-starter fix [task] [options] +``` + +## Description + +The `fix` command runs a focused AI loop to fix project issues. It scans for build, lint, typecheck, and test failures, then orchestrates a coding agent to fix them automatically. + +When given a custom task describing a visual or design problem (e.g., "fix the paddings and make the colors brighter"), the fix command detects CSS/design keywords and: + +- Auto-applies installed design skills (frontend-design, ui-ux-designer, etc.) +- Instructs the agent to visually verify changes using the `/web-design-reviewer` skill with browser screenshots + +## Arguments + +| Argument | Description | +|----------|-------------| +| `task` | Optional description of what to fix. If not provided, scans for build/lint errors. | + +## Options + +| Option | Description | Default | +|--------|-------------|---------| +| `--scan` | Force full project scan (build + lint + typecheck + tests) | false | +| `--agent ` | Specify agent (claude-code, cursor, codex, opencode) | auto-detect | +| `--commit` | Auto-commit the fix | false | +| `--max-iterations ` | Maximum fix iterations | 3 | +| `--output-dir ` | Project directory | cwd | + +## Examples + +### Fix Build Errors + +```bash +# Auto-detect and fix build/lint errors +ralph-starter fix + +# Force full project scan +ralph-starter fix --scan +``` + +### Fix Design Issues + +```bash +# Fix visual/CSS problems +ralph-starter fix "fix the paddings and make the colors brighter" + +# Fix responsive layout +ralph-starter fix "make the layout responsive on mobile" + +# Fix color theme +ralph-starter fix "change the color scheme to darker tones" +``` + +### With Options + +```bash +# Auto-commit the fix +ralph-starter fix --scan --commit + +# Use a specific agent +ralph-starter fix "fix lint errors" --agent claude-code + +# Allow more iterations for complex fixes +ralph-starter fix "fix all test failures" --max-iterations 5 +``` + +## Behavior + +1. **Error Detection**: + - If `task` provided → runs build check for baseline, then fixes the described issue + - If no task and previous failures exist → re-runs failed validations from `.ralph/activity.md` + - If `--scan` → runs full validation suite (build + lint + typecheck + tests) + +2. **Skill Detection**: + - Detects installed Claude Code skills relevant to the task + - For CSS/design tasks → auto-applies design skills and adds visual verification instructions + - Searches skills.sh for complementary skills if needed + +3. **Fix Loop**: + - Agent works on fixing issues (default: 3 iterations) + - Lint checks run between iterations (fast feedback) + - Full build check runs on final iteration + - If build fails on final iteration → extends loop by 2 extra iterations + +4. **Verification**: + - Re-runs original validation commands after the loop + - Reports success only if all checks pass (not just agent completion) + +## Exit Codes + +| Code | Description | +|------|-------------| +| 0 | All issues fixed | +| 1 | Could not fix all issues automatically | + +## See Also + +- [ralph-starter run](/docs/cli/run) +- [ralph-starter skill](/docs/cli/skill) +- [Validation](/docs/advanced/validation) +- [Skills System](/docs/guides/skills-system) + +--- + ## init # ralph-starter init @@ -3035,6 +3054,18 @@ installed skills from three locations: Detected skills are matched against the project's tech stack and included in the agent's prompt context when relevant. +## Auto Skill Discovery + +Auto skill discovery is enabled by default. ralph-starter +queries the skills.sh API to find and install relevant +skills automatically before each run. + +To disable it, set: + +```bash +RALPH_DISABLE_SKILL_AUTO_INSTALL=1 +``` + ## Behavior - The `add` action uses `npx add-skill` under the hood. @@ -7988,7 +8019,7 @@ $ ralph-starter ❯ Yes, I know what I want to build No, help me brainstorm ideas -? What's your idea for today? +? Which idea do you want to build? (e.g., "a habit tracker app" or "an API for managing recipes") > a personal finance tracker @@ -8012,7 +8043,7 @@ $ ralph-starter Complexity: Working MVP -? Does this look right? +? Is this the right specs? ❯ Yes, let's build it! I want to change something Start over with a different idea diff --git a/docs/static/llms.txt b/docs/static/llms.txt index 1f5973c..7c1116a 100644 --- a/docs/static/llms.txt +++ b/docs/static/llms.txt @@ -14,8 +14,8 @@ This file contains links to documentation sections following the llmstxt.org sta - [auth](https://ralphstarter.ai/docs/cli/auth): Browser-based OAuth authentication for integrations - [auto](https://ralphstarter.ai/docs/cli/auto): Autonomous batch task processing from GitHub and Linear - [check](https://ralphstarter.ai/docs/cli/check): Validate configuration and test LLM connection -- [fix](https://ralphstarter.ai/docs/cli/fix): Fix build errors, lint issues, or design problems - [config](https://ralphstarter.ai/docs/cli/config): Manage source configuration and credentials +- [fix](https://ralphstarter.ai/docs/cli/fix): Fix build errors, lint issues, or design problems - [init](https://ralphstarter.ai/docs/cli/init): Initialize Ralph Playbook in a project - [integrations](https://ralphstarter.ai/docs/cli/integrations): Manage, test, and fetch data from integrations - [plan](https://ralphstarter.ai/docs/cli/plan): Create implementation plan from specs diff --git a/docs/static/sidebar.json b/docs/static/sidebar.json index 83b7ee3..0b834b0 100644 --- a/docs/static/sidebar.json +++ b/docs/static/sidebar.json @@ -1,7 +1,7 @@ { "name": "ralph-starter Documentation", "baseUrl": "https://ralphstarter.ai", - "generatedAt": "2026-02-09T23:59:04.120Z", + "generatedAt": "2026-02-13T21:19:43.300Z", "navigation": [ { "type": "category", @@ -159,6 +159,14 @@ "markdownUrl": "https://ralphstarter.ai/docs/cli/config.md", "description": "Manage source configuration and credentials" }, + { + "type": "doc", + "label": "fix", + "path": "/docs/cli/fix", + "url": "https://ralphstarter.ai/docs/cli/fix", + "markdownUrl": "https://ralphstarter.ai/docs/cli/fix.md", + "description": "Fix build errors, lint issues, or design problems" + }, { "type": "doc", "label": "init", @@ -419,6 +427,12 @@ "url": "https://ralphstarter.ai/docs/cli/config", "category": "Cli" }, + { + "title": "fix", + "path": "/docs/cli/fix", + "url": "https://ralphstarter.ai/docs/cli/fix", + "category": "Cli" + }, { "title": "init", "path": "/docs/cli/init", diff --git a/docs/static/sitemap.xml b/docs/static/sitemap.xml index 8bc72eb..c727c4f 100644 --- a/docs/static/sitemap.xml +++ b/docs/static/sitemap.xml @@ -1 +1 @@ -https://ralphstarter.ai/integrationsdaily0.7https://ralphstarter.ai/markdown-pagedaily0.7https://ralphstarter.ai/templatesdaily0.7https://ralphstarter.ai/use-casesdaily0.7https://ralphstarter.ai/docs/advanced/circuit-breakerdaily0.7https://ralphstarter.ai/docs/advanced/git-automationdaily0.7https://ralphstarter.ai/docs/advanced/ralph-playbookdaily0.7https://ralphstarter.ai/docs/advanced/rate-limitingdaily0.7https://ralphstarter.ai/docs/advanced/validationdaily0.7https://ralphstarter.ai/docs/cli/authdaily0.7https://ralphstarter.ai/docs/cli/autodaily0.7https://ralphstarter.ai/docs/cli/checkdaily0.7https://ralphstarter.ai/docs/cli/configdaily0.7https://ralphstarter.ai/docs/cli/initdaily0.7https://ralphstarter.ai/docs/cli/integrationsdaily0.7https://ralphstarter.ai/docs/cli/plandaily0.7https://ralphstarter.ai/docs/cli/presetsdaily0.7https://ralphstarter.ai/docs/cli/rundaily0.7https://ralphstarter.ai/docs/cli/setupdaily0.7https://ralphstarter.ai/docs/cli/skilldaily0.7https://ralphstarter.ai/docs/cli/sourcedaily0.7https://ralphstarter.ai/docs/cli/templatedaily0.7https://ralphstarter.ai/docs/community/changelogdaily0.7https://ralphstarter.ai/docs/community/contributingdaily0.7https://ralphstarter.ai/docs/community/ideasdaily0.7https://ralphstarter.ai/docs/faqdaily0.7https://ralphstarter.ai/docs/guides/cost-trackingdaily0.7https://ralphstarter.ai/docs/guides/extending-ralph-starterdaily0.7https://ralphstarter.ai/docs/guides/prd-workflowdaily0.7https://ralphstarter.ai/docs/guides/skills-systemdaily0.7https://ralphstarter.ai/docs/guides/testing-integrationsdaily0.7https://ralphstarter.ai/docs/guides/workflow-presetsdaily0.7https://ralphstarter.ai/docs/installationdaily0.7https://ralphstarter.ai/docs/introdaily0.7https://ralphstarter.ai/docs/mcp/claude-desktopdaily0.7https://ralphstarter.ai/docs/mcp/setupdaily0.7https://ralphstarter.ai/docs/sources/figmadaily0.7https://ralphstarter.ai/docs/sources/githubdaily0.7https://ralphstarter.ai/docs/sources/lineardaily0.7https://ralphstarter.ai/docs/sources/notiondaily0.7https://ralphstarter.ai/docs/sources/overviewdaily0.7https://ralphstarter.ai/docs/wizard/idea-modedaily0.7https://ralphstarter.ai/docs/wizard/overviewdaily0.7https://ralphstarter.ai/daily0.7 \ No newline at end of file +https://ralphstarter.ai/integrationsdaily0.7https://ralphstarter.ai/markdown-pagedaily0.7https://ralphstarter.ai/templatesdaily0.7https://ralphstarter.ai/use-casesdaily0.7https://ralphstarter.ai/docs/advanced/circuit-breakerdaily0.7https://ralphstarter.ai/docs/advanced/git-automationdaily0.7https://ralphstarter.ai/docs/advanced/ralph-playbookdaily0.7https://ralphstarter.ai/docs/advanced/rate-limitingdaily0.7https://ralphstarter.ai/docs/advanced/validationdaily0.7https://ralphstarter.ai/docs/cli/authdaily0.7https://ralphstarter.ai/docs/cli/autodaily0.7https://ralphstarter.ai/docs/cli/checkdaily0.7https://ralphstarter.ai/docs/cli/configdaily0.7https://ralphstarter.ai/docs/cli/fixdaily0.7https://ralphstarter.ai/docs/cli/initdaily0.7https://ralphstarter.ai/docs/cli/integrationsdaily0.7https://ralphstarter.ai/docs/cli/plandaily0.7https://ralphstarter.ai/docs/cli/presetsdaily0.7https://ralphstarter.ai/docs/cli/rundaily0.7https://ralphstarter.ai/docs/cli/setupdaily0.7https://ralphstarter.ai/docs/cli/skilldaily0.7https://ralphstarter.ai/docs/cli/sourcedaily0.7https://ralphstarter.ai/docs/cli/templatedaily0.7https://ralphstarter.ai/docs/community/changelogdaily0.7https://ralphstarter.ai/docs/community/contributingdaily0.7https://ralphstarter.ai/docs/community/ideasdaily0.7https://ralphstarter.ai/docs/faqdaily0.7https://ralphstarter.ai/docs/guides/cost-trackingdaily0.7https://ralphstarter.ai/docs/guides/extending-ralph-starterdaily0.7https://ralphstarter.ai/docs/guides/prd-workflowdaily0.7https://ralphstarter.ai/docs/guides/skills-systemdaily0.7https://ralphstarter.ai/docs/guides/testing-integrationsdaily0.7https://ralphstarter.ai/docs/guides/workflow-presetsdaily0.7https://ralphstarter.ai/docs/installationdaily0.7https://ralphstarter.ai/docs/introdaily0.7https://ralphstarter.ai/docs/mcp/claude-desktopdaily0.7https://ralphstarter.ai/docs/mcp/setupdaily0.7https://ralphstarter.ai/docs/sources/figmadaily0.7https://ralphstarter.ai/docs/sources/githubdaily0.7https://ralphstarter.ai/docs/sources/lineardaily0.7https://ralphstarter.ai/docs/sources/notiondaily0.7https://ralphstarter.ai/docs/sources/overviewdaily0.7https://ralphstarter.ai/docs/wizard/idea-modedaily0.7https://ralphstarter.ai/docs/wizard/overviewdaily0.7https://ralphstarter.ai/daily0.7 \ No newline at end of file From af39b39c5f981ba590cd84522cb64081b4d73079 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 21:32:43 +0000 Subject: [PATCH 25/46] fix(skills): cap skill installation + filter prompt to top 5 relevant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add installation ceiling: skip API search when >=3 relevant skills exist - Filter formatSkillsForPrompt to only task-relevant skills, capped at 5 - Fix stall detection: lastValidationFeedback is a string (never null), so `!== null` was always true — use `!!` for correct falsy check Co-Authored-By: Claude Opus 4.6 --- src/loop/executor.ts | 2 +- src/loop/skills.ts | 26 +++++++++++++++++++------- src/skills/auto-install.ts | 11 ++++++++++- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/loop/executor.ts b/src/loop/executor.ts index d38fd8a..5420f66 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -797,7 +797,7 @@ export async function runLoop(options: LoopOptions): Promise { const postIterationTaskInfo = parsePlanTasks(options.cwd); const tasksProgressedThisIteration = postIterationTaskInfo.completed > previousCompletedTasks; // Build/validation failures are NOT idle — agent is actively debugging - const hadValidationFailure = lastValidationFeedback !== null; + const hadValidationFailure = !!lastValidationFeedback; const hasProductiveProgress = hasChanges || tasksProgressedThisIteration || hadValidationFailure; diff --git a/src/loop/skills.ts b/src/loop/skills.ts index 4662907..08eac64 100644 --- a/src/loop/skills.ts +++ b/src/loop/skills.ts @@ -315,21 +315,33 @@ function shouldAutoApplySkill(skill: ClaudeSkill, task: string): boolean { export function formatSkillsForPrompt(skills: ClaudeSkill[], task?: string): string { if (skills.length === 0) return ''; + const MAX_SKILLS_IN_PROMPT = 5; + + // When we have a task, only include relevant skills to avoid prompt bloat + let selected: ClaudeSkill[]; + if (task) { + const relevant = skills.filter((skill) => shouldAutoApplySkill(skill, task)); + selected = + relevant.length > 0 + ? relevant.slice(0, MAX_SKILLS_IN_PROMPT) + : skills.slice(0, MAX_SKILLS_IN_PROMPT); + } else { + selected = skills.slice(0, MAX_SKILLS_IN_PROMPT); + } + const lines = ['## Available Claude Code Skills', '']; - for (const skill of skills) { + for (const skill of selected) { lines.push(`- **${skill.name}**: ${skill.description || 'No description'}`); } lines.push(''); if (task) { - const autoApply = skills.filter((skill) => shouldAutoApplySkill(skill, task)); - if (autoApply.length > 0) { - const skillList = autoApply.map((skill) => `/${skill.name}`).join(', '); - lines.push(`Auto-apply these skills: ${skillList}`); - lines.push(''); - } + // All selected skills are already relevant — tell the agent to apply them + const skillList = selected.map((skill) => `/${skill.name}`).join(', '); + lines.push(`Auto-apply these skills: ${skillList}`); + lines.push(''); } lines.push('Use these skills when appropriate by invoking them with /skill-name.'); diff --git a/src/skills/auto-install.ts b/src/skills/auto-install.ts index 764de99..b3f5feb 100644 --- a/src/skills/auto-install.ts +++ b/src/skills/auto-install.ts @@ -275,7 +275,16 @@ export async function autoInstallSkillsFromTask(task: string, cwd: string): Prom console.log(chalk.cyan(`Using installed skills: ${names.join(', ')}`)); } - // Always search for complementary skills (even if some are installed) + // Skip API search when enough relevant skills are already installed + const SUFFICIENT_RELEVANT_SKILLS = 3; + if (relevantInstalled.length >= SUFFICIENT_RELEVANT_SKILLS) { + console.log( + chalk.green(`Sufficient skills installed (${relevantInstalled.length}), skipping search.`) + ); + return relevantInstalled.map((s) => s.name); + } + + // Search for complementary skills if we don't have enough relevant ones const queries = buildSkillQueries(task); if (queries.length === 0) return relevantInstalled.map((s) => s.name); From 167e707a214d780ca3a2a044a99d619a89c149f9 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 23:02:30 +0000 Subject: [PATCH 26/46] fix(context): spec-adherent preamble + include spec summary in iterations 2+ The context builder was dropping all spec content after iteration 1, causing "spec amnesia" where the agent lost sight of design requirements. Also, the preamble only had negative design guidance ("NEVER use...") with no positive instruction to follow the spec faithfully. - Add buildSpecSummary() to read specs/ directory for later iterations - Rewrite design section: spec is now "FIRST PRIORITY" source of truth - Include spec summary in iterations 2-3 and truncated hint in 4+ - Add dev server exception clause for visual verification flows Co-Authored-By: Claude Opus 4.6 --- src/loop/context-builder.ts | 72 ++++++++++++++++++++++++++++++------- 1 file changed, 59 insertions(+), 13 deletions(-) diff --git a/src/loop/context-builder.ts b/src/loop/context-builder.ts index e9b4861..5565f34 100644 --- a/src/loop/context-builder.ts +++ b/src/loop/context-builder.ts @@ -7,6 +7,8 @@ * - Iterations 4+: Current task only + error summary */ +import { existsSync, readdirSync, readFileSync } from 'node:fs'; +import { join } from 'node:path'; import { estimateTokens } from './cost-tracker.js'; import type { PlanTask, TaskCount } from './task-counter.js'; @@ -27,6 +29,8 @@ export interface ContextBuildOptions { validationFeedback?: string; /** Maximum input tokens budget (0 = unlimited) */ maxInputTokens?: number; + /** Abbreviated spec summary for later iterations (avoids agent re-reading specs/) */ + specSummary?: string; } export interface BuiltContext { @@ -103,6 +107,43 @@ export function compressValidationFeedback(feedback: string, maxChars: number = return compressed.join('\n'); } +/** + * Build an abbreviated spec summary from the specs/ directory. + * Gives later iterations a quick design reference without requiring + * the agent to re-read spec files via tool calls. + */ +export function buildSpecSummary(cwd: string, maxChars: number = 1500): string | undefined { + const specsDir = join(cwd, 'specs'); + if (!existsSync(specsDir)) return undefined; + + try { + const specFiles = readdirSync(specsDir).filter((f) => f.endsWith('.md')); + if (specFiles.length === 0) return undefined; + + const parts: string[] = []; + let totalLength = 0; + + for (const file of specFiles) { + const content = readFileSync(join(specsDir, file), 'utf-8'); + const available = maxChars - totalLength; + if (available <= 100) { + parts.push(`\n[${specFiles.length - parts.length} more spec file(s) omitted]`); + break; + } + const truncated = + content.length > available + ? `${content.slice(0, available)}\n[... truncated ...]` + : content; + parts.push(truncated); + totalLength += truncated.length; + } + + return parts.join('\n---\n'); + } catch { + return undefined; + } +} + /** * Build a trimmed implementation plan context showing only the current task * with a summary of completed and pending tasks. @@ -156,6 +197,7 @@ export function buildIterationContext(opts: ContextBuildOptions): BuiltContext { iteration, validationFeedback, maxInputTokens = 0, + specSummary, } = opts; const totalTasks = taskInfo.total; @@ -175,7 +217,7 @@ Rules: - Don't assume functionality is not already implemented — search the codebase first - Implement completely — no placeholders or stubs - Create files before importing them — never import components or modules that don't exist yet -- Do NOT run build or dev server commands yourself — the loop automatically runs lint checks between iterations and a full build on the final iteration. NEVER start a dev server (\`npm run dev\`, \`npx vite\`, etc.) — it blocks forever and wastes resources +- Do NOT run build or dev server commands yourself — the loop automatically runs lint checks between iterations and a full build on the final iteration. NEVER start a dev server (\`npm run dev\`, \`npx vite\`, etc.) — it blocks forever and wastes resources. (Exception: if explicitly told to do visual verification, you may briefly start a dev server and MUST kill it when done.) - When ALL tasks are complete, explicitly state "All tasks completed" - If you learn how to run/build the project, update AGENTS.md @@ -188,13 +230,11 @@ Technology gotchas (CRITICAL — follow these exactly): - JSX: Never put unescaped quotes inside attribute strings. For SVG backgrounds or data URLs, use a CSS file or encodeURIComponent(). - Do NOT run \`npm run build\` or \`npm run dev\` manually — the loop handles validation automatically (lint between tasks, full build at the end). -Design quality (IMPORTANT — avoid generic AI aesthetics): -- NEVER use purple-blue gradient backgrounds or gradient text — this is the #1 AI design tell -- NEVER default to Inter, Roboto, or system fonts — pick distinctive typography (e.g. DM Sans, Playfair Display, Space Mono) -- NEVER use glass morphism, neumorphism, or frosted-glass cards -- Choose ONE clear design direction (bold/minimal/retro/editorial/playful) and commit to it +Design quality (IMPORTANT): +- FIRST PRIORITY: If specs/ contains a design specification, follow it EXACTLY — match the described colors, spacing, layout, typography, and visual style faithfully. The spec is the source of truth. +- If no spec exists, choose ONE clear design direction (bold/minimal/retro/editorial/playful) and commit to it - Use a specific color palette with max 3-4 colors, not rainbow gradients -- Prefer flat or subtle shadows over glassmorphism effects +- Avoid generic AI aesthetics: no purple-blue gradient backgrounds/text, no glass morphism/neumorphism, no Inter/Roboto defaults — pick distinctive typography (e.g. DM Sans, Playfair Display, Space Mono) `; // No structured tasks — pass the task with preamble @@ -233,12 +273,15 @@ Complete these subtasks, then mark them done in IMPLEMENTATION_PLAN.md by changi debugParts.push('mode=full (iteration 1)'); debugParts.push(`included: preamble + full spec + skills + task ${taskNum}/${totalTasks}`); } else if (iteration <= 3) { - // Iterations 2-3: Preamble + trimmed plan context + spec reference + // Iterations 2-3: Preamble + trimmed plan context + spec summary const planContext = buildTrimmedPlanContext(currentTask, taskInfo); + const specRef = specSummary + ? `\n## Spec Summary (reference — follow this faithfully)\n${specSummary}\n` + : '\nStudy specs/ for requirements if needed.'; prompt = `${preamble} -Continue working on the project. Study specs/ for requirements if needed. Check IMPLEMENTATION_PLAN.md for full progress. - +Continue working on the project. Check IMPLEMENTATION_PLAN.md for full progress. +${specRef} ${planContext}`; // Add compressed validation feedback if present @@ -252,12 +295,15 @@ ${planContext}`; debugParts.push(`mode=trimmed (iteration ${iteration})`); debugParts.push(`excluded: full spec, skills`); } else { - // Iterations 4+: Preamble + minimal context + // Iterations 4+: Preamble + minimal context + truncated spec hint const planContext = buildTrimmedPlanContext(currentTask, taskInfo); + const specHint = specSummary + ? `\nSpec key points:\n${specSummary.slice(0, 500)}${specSummary.length > 500 ? '\n[... see specs/ for full details ...]' : ''}\n` + : '\nSpecs in specs/.'; prompt = `${preamble} -Continue working on the project. Specs in specs/. Check IMPLEMENTATION_PLAN.md for progress. - +Continue working on the project. Check IMPLEMENTATION_PLAN.md for progress. +${specHint} ${planContext}`; // Add heavily compressed validation feedback if present From c1851058a9725c389421c950abe307e2c3b1a420 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 23:06:39 +0000 Subject: [PATCH 27/46] fix(loop): pass spec summary to context builder for iterations 2+ Wire up buildSpecSummary() in the executor so the context builder can include abbreviated spec content in later iterations, preventing the agent from losing sight of design requirements. Co-Authored-By: Claude Opus 4.6 --- src/loop/executor.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 5420f66..1708a26 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -24,7 +24,7 @@ import { } from '../utils/rate-limit-display.js'; import { type Agent, type AgentRunOptions, runAgent } from './agents.js'; import { CircuitBreaker, type CircuitBreakerConfig } from './circuit-breaker.js'; -import { buildIterationContext } from './context-builder.js'; +import { buildIterationContext, buildSpecSummary } from './context-builder.js'; import { CostTracker, type CostTrackerStats, formatCost } from './cost-tracker.js'; import { estimateLoop, formatEstimateDetailed } from './estimator.js'; import { checkFileBasedCompletion, createProgressTracker, type ProgressEntry } from './progress.js'; @@ -453,6 +453,9 @@ export async function runLoop(options: LoopOptions): Promise { taskWithSkills = `${options.task}\n\n${skillsPrompt}`; } + // Build abbreviated spec summary for context builder (iterations 2+) + const specSummary = buildSpecSummary(options.cwd); + // Track validation feedback separately — don't mutate taskWithSkills // initialValidationFeedback lets the `fix` command pre-populate errors for iteration 1 let lastValidationFeedback = options.initialValidationFeedback || ''; @@ -700,6 +703,7 @@ export async function runLoop(options: LoopOptions): Promise { maxIterations, validationFeedback: lastValidationFeedback || undefined, maxInputTokens: options.contextBudget || 0, + specSummary, }); const iterationTask = builtContext.prompt; From 3c941a20907a88153a8a76471e50c3c376e542be Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 23:07:12 +0000 Subject: [PATCH 28/46] feat(skills): add maxSkills parameter to formatSkillsForPrompt Allow callers to cap the number of skills included in the prompt. Used by the --design flag to limit to 3-4 focused design skills instead of the default 5. Co-Authored-By: Claude Opus 4.6 --- src/loop/skills.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/loop/skills.ts b/src/loop/skills.ts index 08eac64..eb2b654 100644 --- a/src/loop/skills.ts +++ b/src/loop/skills.ts @@ -312,10 +312,14 @@ function shouldAutoApplySkill(skill: ClaudeSkill, task: string): boolean { return false; } -export function formatSkillsForPrompt(skills: ClaudeSkill[], task?: string): string { +export function formatSkillsForPrompt( + skills: ClaudeSkill[], + task?: string, + maxSkills?: number +): string { if (skills.length === 0) return ''; - const MAX_SKILLS_IN_PROMPT = 5; + const MAX_SKILLS_IN_PROMPT = maxSkills || 5; // When we have a task, only include relevant skills to avoid prompt bloat let selected: ClaudeSkill[]; From bb6c1bec4364e117f8c5db7155df114f2803e46b Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 23:09:58 +0000 Subject: [PATCH 29/46] feat(fix): add --design flag for visual-first design fix flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fix command was losing all original design context — the agent only saw the custom task string and build errors, never the spec or plan. This caused design fixes to be guesswork rather than spec-adherent. Changes: - Include specs/ and IMPLEMENTATION_PLAN.md content in fixTask so the agent knows what "correct" looks like - Add --design flag: structured screenshot → analyze → plan → fix flow with 3 viewport breakpoints (desktop/tablet/mobile) - Bump default iterations: 7 for --design, 5 for design keywords, 3 default - Clarify dev server override for visual verification - Register --design option in CLI Co-Authored-By: Claude Opus 4.6 --- src/cli.ts | 1 + src/commands/fix.ts | 90 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 82 insertions(+), 9 deletions(-) diff --git a/src/cli.ts b/src/cli.ts index c54d695..4e99bdc 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -114,6 +114,7 @@ program .option('--commit', 'Auto-commit the fix') .option('--max-iterations ', 'Max fix iterations (default: 3)') .option('--output-dir ', 'Project directory (default: cwd)') + .option('--design', 'Visual-first design fix: screenshot, analyze, plan, and fix design issues') .action(fixCommand); // ralph-starter init - Initialize Ralph in a project diff --git a/src/commands/fix.ts b/src/commands/fix.ts index 25baf69..1651c68 100644 --- a/src/commands/fix.ts +++ b/src/commands/fix.ts @@ -1,4 +1,4 @@ -import { existsSync, readFileSync } from 'node:fs'; +import { existsSync, readdirSync, readFileSync } from 'node:fs'; import { join } from 'node:path'; import chalk from 'chalk'; import ora from 'ora'; @@ -19,6 +19,7 @@ interface FixOptions { maxIterations?: string; outputDir?: string; scan?: boolean; + design?: boolean; } /** @@ -149,6 +150,44 @@ export async function fixCommand(customTask: string | undefined, options: FixOpt 'Fix all project issues found by the scan below. Prioritize: build errors first, then type errors, then lint violations, then test failures. Make minimal, focused fixes.'; } + // Include original spec context so the agent knows what "correct" looks like + const specsDir = join(cwd, 'specs'); + const planPath = join(cwd, 'IMPLEMENTATION_PLAN.md'); + let specContext = ''; + + if (existsSync(specsDir)) { + try { + const specFiles = readdirSync(specsDir).filter((f) => f.endsWith('.md')); + for (const file of specFiles) { + const content = readFileSync(join(specsDir, file), 'utf-8'); + const truncated = + content.length > 3000 + ? `${content.slice(0, 3000)}\n\n[... spec truncated for brevity ...]` + : content; + specContext += `\n### Spec: ${file}\n${truncated}\n`; + } + } catch { + // Specs directory unreadable + } + } + + if (existsSync(planPath)) { + try { + const planContent = readFileSync(planPath, 'utf-8'); + const planSummary = + planContent.length > 2000 + ? `${planContent.slice(0, 2000)}\n\n[... plan truncated ...]` + : planContent; + specContext += `\n### Implementation Plan\n${planSummary}\n`; + } catch { + // Plan file unreadable + } + } + + if (specContext) { + fixTask = `${fixTask}\n\n## Original Design Specification\n\nIMPORTANT: Use the following specification as the source of truth for what the design should look like. Match the described colors, spacing, layout, and styling exactly.\n${specContext}`; + } + // For design/visual tasks, add instructions to visually verify with screenshots const DESIGN_KEYWORDS = [ 'css', @@ -176,20 +215,53 @@ export async function fixCommand(customTask: string | undefined, options: FixOpt 'visual', ]; const isDesignTask = - customTask && DESIGN_KEYWORDS.some((kw) => customTask.toLowerCase().includes(kw)); - if (isDesignTask) { - fixTask += `\n\nVisual verification (IMPORTANT): + options.design || + (customTask && DESIGN_KEYWORDS.some((kw) => customTask.toLowerCase().includes(kw))); + + // --design flag: structured visual-first fix flow + if (options.design) { + fixTask = `You are fixing design and visual issues in this project. Follow this structured methodology: + +## Phase 1: Visual Audit +1. Start the dev server (e.g. \`npm run dev\` or \`npx vite\`) — this OVERRIDES the "no dev server" rule +2. Take full-page screenshots at 3 viewports: desktop (1440px), tablet (768px), mobile (375px) +3. Analyze each screenshot carefully against the spec below + +## Phase 2: Issue Identification +List ALL design issues you find: +- Layout/spacing problems (misalignment, excess whitespace, overflow) +- Typography issues (wrong fonts, sizes, weights, line-heights) +- Color mismatches (wrong palette, poor contrast, inconsistent usage) +- Responsive breakage (elements overlapping, content clipping, bad stacking) +- Component styling (borders, shadows, padding, margins) + +## Phase 3: Fix Plan +Create a DESIGN_FIX_PLAN.md with prioritized issues and specific CSS/component fixes for each. + +## Phase 4: Execute & Verify +1. Fix issues one by one, starting with layout/structure, then typography, then colors +2. After each major fix, re-screenshot to verify improvement +3. Final verification: screenshot all 3 viewports and confirm all issues are resolved + +## Phase 5: Cleanup +CRITICAL: Stop the dev server (kill the process) when done — do NOT leave it running. + +${customTask ? `\nUser notes: ${customTask}\n` : ''}${specContext ? `\n## Original Design Specification\n${specContext}` : ''}${feedback ? `\n\n## Build Errors (also fix these)\n${feedback}` : ''}`; + } else if (isDesignTask) { + fixTask += `\n\nVisual verification (IMPORTANT — OVERRIDES the "no dev server" rule): This is a visual/design task. After making your CSS and styling changes, you MUST visually verify the result: -1. Start a local dev server briefly (exception to the "no dev server" rule for visual checks) -2. Use the /web-design-reviewer skill to take browser screenshots at desktop and mobile viewports -3. Review the screenshots and fix any visual issues you spot (spacing, colors, alignment, contrast) -4. Stop the dev server when done verifying`; +1. Start a local dev server (e.g. npm run dev) — this is the ONE exception to the "never start a dev server" rule +2. Take browser screenshots at desktop (1440px) and mobile (375px) viewports +3. Compare screenshots against the spec above — check colors, spacing, layout, and typography match +4. Fix any visual issues you spot (spacing, colors, alignment, contrast) +5. CRITICAL: Stop the dev server (kill the process) when done — do NOT leave it running`; } // Install relevant skills so the agent has design/quality context await autoInstallSkillsFromTask(fixTask, cwd); - const maxIter = options.maxIterations ? Number.parseInt(options.maxIterations, 10) : 3; + const defaultIter = options.design ? 7 : isDesignTask ? 5 : 3; + const maxIter = options.maxIterations ? Number.parseInt(options.maxIterations, 10) : defaultIter; const result = await runLoop({ task: fixTask, From f92b1bc2fb75eb9473fca2f3fc2f18ea45d10d50 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 23:13:33 +0000 Subject: [PATCH 30/46] fix(validation): use detected package manager in detectBuildCommands detectBuildCommands was hardcoding `npm run build` instead of using the project's actual package manager (pnpm/yarn/bun). This caused build validation to fail in projects that enforce a specific pm. Co-Authored-By: Claude Opus 4.6 --- src/loop/validation.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/loop/validation.ts b/src/loop/validation.ts index 1738294..c44a9ed 100644 --- a/src/loop/validation.ts +++ b/src/loop/validation.ts @@ -202,12 +202,15 @@ export function detectBuildCommands(cwd: string): ValidationCommand[] { try { const pkg = JSON.parse(readFileSync(packagePath, 'utf-8')); const scripts = pkg.scripts || {}; + const pm = detectPackageManager(cwd); if (scripts.build) { - commands.push({ name: 'build', command: 'npm', args: ['run', 'build'] }); + const cmd = getRunCommand(pm, 'build'); + commands.push({ name: 'build', ...cmd }); } if (scripts.typecheck) { - commands.push({ name: 'typecheck', command: 'npm', args: ['run', 'typecheck'] }); + const cmd = getRunCommand(pm, 'typecheck'); + commands.push({ name: 'typecheck', ...cmd }); } } catch { // Invalid package.json From daeacd233c25884abd0b26f9cee4a75d65ff5cce Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 23:16:20 +0000 Subject: [PATCH 31/46] fix(fix): don't bail early when --design flag is set The fix command was exiting with "nothing to fix!" when build checks passed and no custom task was given. But --design targets visual issues that build checks can't detect, so it should always proceed to the screenshot/analysis flow. Co-Authored-By: Claude Opus 4.6 --- src/commands/fix.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/commands/fix.ts b/src/commands/fix.ts index 1651c68..163100b 100644 --- a/src/commands/fix.ts +++ b/src/commands/fix.ts @@ -82,7 +82,7 @@ export async function fixCommand(customTask: string | undefined, options: FixOpt const results = await runAllValidations(cwd, commands); const failures = results.filter((r) => !r.success); - if (failures.length === 0 && !customTask) { + if (failures.length === 0 && !customTask && !options.design) { spinner.succeed(chalk.green('All checks passed — nothing to fix!')); return; } @@ -99,7 +99,7 @@ export async function fixCommand(customTask: string | undefined, options: FixOpt spinner.succeed(chalk.green('Build passing')); } console.log(); - } else if (!customTask) { + } else if (!customTask && !options.design) { console.log(chalk.yellow('No build/lint/test commands detected in this project.')); return; } From 63c4a79a5076eaca7acbeb55e830df44eb5caca8 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 23:20:27 +0000 Subject: [PATCH 32/46] fix(fix): cap skills to 4 in --design mode + screenshot-first prompt Three issues fixed: - Skills were showing "25 detected" because maxSkills wasn't threaded through LoopOptions to formatSkillsForPrompt. Now --design caps to 4. - Startup display now shows "4 active (25 installed)" instead of raw count - Design prompt now forcefully instructs the agent to start with dev server + screenshots as the VERY FIRST action, ignoring IMPLEMENTATION_PLAN.md Co-Authored-By: Claude Opus 4.6 --- src/commands/fix.ts | 7 +++++-- src/loop/executor.ts | 12 +++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/commands/fix.ts b/src/commands/fix.ts index 163100b..d994d80 100644 --- a/src/commands/fix.ts +++ b/src/commands/fix.ts @@ -220,9 +220,11 @@ export async function fixCommand(customTask: string | undefined, options: FixOpt // --design flag: structured visual-first fix flow if (options.design) { - fixTask = `You are fixing design and visual issues in this project. Follow this structured methodology: + fixTask = `You are fixing design and visual issues in this project. Ignore IMPLEMENTATION_PLAN.md — this is a visual fix pass, not a feature build. -## Phase 1: Visual Audit +IMPORTANT: Your VERY FIRST action must be to start the dev server and take screenshots. Do NOT read files or explore the codebase first — start visually. + +## Phase 1: Visual Audit (DO THIS FIRST) 1. Start the dev server (e.g. \`npm run dev\` or \`npx vite\`) — this OVERRIDES the "no dev server" rule 2. Take full-page screenshots at 3 viewports: desktop (1440px), tablet (768px), mobile (375px) 3. Analyze each screenshot carefully against the spec below @@ -274,6 +276,7 @@ This is a visual/design task. After making your CSS and styling changes, you MUS trackProgress: true, checkFileCompletion: false, validate: mode === 'scan', + maxSkills: options.design ? 4 : undefined, }); // --- Step 5: Verify fix by re-running validations --- diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 1708a26..bf76949 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -236,6 +236,7 @@ export interface LoopOptions { maxCost?: number; // Maximum cost in USD before stopping (0 = unlimited) agentTimeout?: number; // Agent timeout in milliseconds (default: 300000 = 5 min) initialValidationFeedback?: string; // Pre-populate with errors (used by `fix` command) + maxSkills?: number; // Cap skills included in prompt (default: 5) } export interface LoopResult { @@ -445,11 +446,11 @@ export async function runLoop(options: LoopOptions): Promise { // Lightweight lint for intermediate iterations (build only runs on final iteration) let lintCommands = detectLintCommands(options.cwd); - // Detect Claude Code skills + // Detect Claude Code skills (capped by maxSkills option) const detectedSkills = detectClaudeSkills(options.cwd); let taskWithSkills = options.task; if (detectedSkills.length > 0) { - const skillsPrompt = formatSkillsForPrompt(detectedSkills, options.task); + const skillsPrompt = formatSkillsForPrompt(detectedSkills, options.task, options.maxSkills); taskWithSkills = `${options.task}\n\n${skillsPrompt}`; } @@ -484,7 +485,12 @@ export async function runLoop(options: LoopOptions): Promise { startupLines.push(` Auto-commit: ${chalk.green('enabled')}`); } if (detectedSkills.length > 0) { - startupLines.push(` Skills: ${chalk.white(`${detectedSkills.length} detected`)}`); + const effectiveSkills = options.maxSkills + ? Math.min(detectedSkills.length, options.maxSkills) + : Math.min(detectedSkills.length, 5); + startupLines.push( + ` Skills: ${chalk.white(`${effectiveSkills} active (${detectedSkills.length} installed)`)}` + ); } if (rateLimiter) { startupLines.push(` Rate limit: ${chalk.white(`${options.rateLimit}/hour`)}`); From d3b9fe59332f5225a39c715f75d122d05a5a784c Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 23:22:22 +0000 Subject: [PATCH 33/46] fix(fix): rewrite --design prompt to catch structural issues first MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The design fix prompt was too vague — "layout/spacing problems" led the agent to suggest padding tweaks instead of catching obvious structural issues like content not being centered or huge empty gaps. Rewritten Phase 2 (Issue Identification) to: - Prioritize page structure (centering, containers, max-width) over cosmetic - Check for content pinned to edges, broken grid layouts, unbalanced columns - Require CONCRETE issues visible in screenshots, not generic improvements Rewritten Phase 3 (Fix Plan) to: - Require exact file + CSS property for each fix - Focus on minimal fixes, not redesigning entire components Co-Authored-By: Claude Opus 4.6 --- src/commands/fix.ts | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/commands/fix.ts b/src/commands/fix.ts index d994d80..fa6ede3 100644 --- a/src/commands/fix.ts +++ b/src/commands/fix.ts @@ -229,21 +229,30 @@ IMPORTANT: Your VERY FIRST action must be to start the dev server and take scree 2. Take full-page screenshots at 3 viewports: desktop (1440px), tablet (768px), mobile (375px) 3. Analyze each screenshot carefully against the spec below -## Phase 2: Issue Identification -List ALL design issues you find: -- Layout/spacing problems (misalignment, excess whitespace, overflow) -- Typography issues (wrong fonts, sizes, weights, line-heights) -- Color mismatches (wrong palette, poor contrast, inconsistent usage) -- Responsive breakage (elements overlapping, content clipping, bad stacking) -- Component styling (borders, shadows, padding, margins) +## Phase 2: Issue Identification (be SPECIFIC, not generic) +Look at the screenshots and identify CONCRETE issues you can actually see. Do NOT list generic improvements — only list problems visible in the screenshots. + +Check in this priority order: +1. **Page structure** — Is content centered? Are sections contained in a max-width wrapper? Is anything stuck to the left/right edge when it shouldn't be? Are there huge empty gaps between sections? +2. **Layout & positioning** — Are grid/flex layouts rendering correctly? Are columns balanced? Is the hero section properly structured? Are elements overlapping or misaligned? +3. **Responsive issues** — Does the layout break at any viewport? Do elements overflow or get clipped? +4. **Spacing** — Is vertical rhythm consistent between sections? Are there abnormally large or small gaps? +5. **Typography & colors** — Are fonts loading? Is text readable against backgrounds? Are colors consistent? + +IMPORTANT: Focus on what looks BROKEN, not what could be "improved." A centered layout with wrong padding is lower priority than content pinned to the left edge. ## Phase 3: Fix Plan -Create a DESIGN_FIX_PLAN.md with prioritized issues and specific CSS/component fixes for each. +Create a DESIGN_FIX_PLAN.md. For each issue: +- Describe EXACTLY what's wrong (e.g., "Hero content is not centered — text hugs the left edge with no container") +- Specify the exact file and CSS property to change +- Keep fixes minimal — fix the actual problem, don't redesign the entire component + +Prioritize: page structure > layout positioning > responsive > spacing > cosmetic. ## Phase 4: Execute & Verify -1. Fix issues one by one, starting with layout/structure, then typography, then colors -2. After each major fix, re-screenshot to verify improvement -3. Final verification: screenshot all 3 viewports and confirm all issues are resolved +1. Fix structural issues FIRST (containers, centering, grid layout), then work down to cosmetic +2. After fixing each structural issue, re-screenshot to verify the layout improved +3. Final verification: screenshot all 3 viewports and confirm the page looks properly structured ## Phase 5: Cleanup CRITICAL: Stop the dev server (kill the process) when done — do NOT leave it running. From 378261d8b07190e7603cb0639bebeb77b5265465 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 23:54:48 +0000 Subject: [PATCH 34/46] fix(skills): reduce skill bloat in fix command Pass user's custom task text (not the full generated prompt) to autoInstallSkillsFromTask. The --design prompt contains dozens of CSS/design keywords that triggered excessive skill search queries, causing skills to accumulate globally (25+ after a few runs). Also lower MAX_SKILLS_TO_INSTALL from 5 to 3 to cap accumulation. Co-Authored-By: Claude Opus 4.6 --- src/commands/fix.ts | 4 +++- src/skills/auto-install.ts | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/commands/fix.ts b/src/commands/fix.ts index fa6ede3..db7351f 100644 --- a/src/commands/fix.ts +++ b/src/commands/fix.ts @@ -269,7 +269,9 @@ This is a visual/design task. After making your CSS and styling changes, you MUS } // Install relevant skills so the agent has design/quality context - await autoInstallSkillsFromTask(fixTask, cwd); + // Use the user's custom task (not the full generated prompt) to avoid keyword-spam + // that triggers excessive skill searches from the design prompt boilerplate + await autoInstallSkillsFromTask(customTask || (options.design ? 'design fix' : 'fix'), cwd); const defaultIter = options.design ? 7 : isDesignTask ? 5 : 3; const maxIter = options.maxIterations ? Number.parseInt(options.maxIterations, 10) : defaultIter; diff --git a/src/skills/auto-install.ts b/src/skills/auto-install.ts index b3f5feb..84c00b2 100644 --- a/src/skills/auto-install.ts +++ b/src/skills/auto-install.ts @@ -11,7 +11,7 @@ export interface SkillCandidate { score: number; } -const MAX_SKILLS_TO_INSTALL = 5; +const MAX_SKILLS_TO_INSTALL = 3; /** * Normalize a skill identifier for comparison. From 7813dc36c18ded95b1f9407a99f790df6092b01f Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 23:56:58 +0000 Subject: [PATCH 35/46] fix(loop): skip IMPLEMENTATION_PLAN.md instructions for fix --design MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The preamble said "Study IMPLEMENTATION_PLAN.md and work on ONE task" which directly conflicted with the --design prompt's "Ignore IMPLEMENTATION_PLAN.md — this is a visual fix pass." The preamble appeared first and won, confusing the agent. Add skipPlanInstructions option that replaces plan-related rules with "This is a fix/review pass" when active. Set from fix --design. Co-Authored-By: Claude Opus 4.6 --- src/commands/fix.ts | 1 + src/loop/context-builder.ts | 14 +++++++++++--- src/loop/executor.ts | 2 ++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/commands/fix.ts b/src/commands/fix.ts index db7351f..e1973bb 100644 --- a/src/commands/fix.ts +++ b/src/commands/fix.ts @@ -288,6 +288,7 @@ This is a visual/design task. After making your CSS and styling changes, you MUS checkFileCompletion: false, validate: mode === 'scan', maxSkills: options.design ? 4 : undefined, + skipPlanInstructions: options.design, }); // --- Step 5: Verify fix by re-running validations --- diff --git a/src/loop/context-builder.ts b/src/loop/context-builder.ts index 5565f34..04a7cda 100644 --- a/src/loop/context-builder.ts +++ b/src/loop/context-builder.ts @@ -31,6 +31,8 @@ export interface ContextBuildOptions { maxInputTokens?: number; /** Abbreviated spec summary for later iterations (avoids agent re-reading specs/) */ specSummary?: string; + /** Skip IMPLEMENTATION_PLAN.md instructions in preamble (used by fix --design) */ + skipPlanInstructions?: boolean; } export interface BuiltContext { @@ -198,6 +200,7 @@ export function buildIterationContext(opts: ContextBuildOptions): BuiltContext { validationFeedback, maxInputTokens = 0, specSummary, + skipPlanInstructions = false, } = opts; const totalTasks = taskInfo.total; @@ -206,14 +209,19 @@ export function buildIterationContext(opts: ContextBuildOptions): BuiltContext { let prompt: string; let wasTrimmed = false; + // Plan-related rules — omitted for fix/design passes where IMPLEMENTATION_PLAN.md is irrelevant + const planRules = skipPlanInstructions + ? '- This is a fix/review pass. Focus on the specific instructions in the task below.' + : `- Study IMPLEMENTATION_PLAN.md and work on ONE task at a time +- Mark each subtask [x] in IMPLEMENTATION_PLAN.md immediately when done +- Study specs/ directory for original requirements`; + // Loop-aware preamble — gives the agent behavioral context per Ralph Playbook patterns const preamble = `You are a coding agent in an autonomous development loop (iteration ${iteration}/${opts.maxIterations}). Rules: - IMPORTANT: The current working directory IS the project root. Create ALL files here — do NOT create a subdirectory for the project (e.g., do NOT run \`mkdir my-app\` or \`npx create-vite my-app\`). If you use a scaffolding tool, run it with \`.\` as the target (e.g., \`npm create vite@latest . -- --template react\`). -- Study IMPLEMENTATION_PLAN.md and work on ONE task at a time -- Mark each subtask [x] in IMPLEMENTATION_PLAN.md immediately when done -- Study specs/ directory for original requirements +${planRules} - Don't assume functionality is not already implemented — search the codebase first - Implement completely — no placeholders or stubs - Create files before importing them — never import components or modules that don't exist yet diff --git a/src/loop/executor.ts b/src/loop/executor.ts index bf76949..27dc6e0 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -237,6 +237,7 @@ export interface LoopOptions { agentTimeout?: number; // Agent timeout in milliseconds (default: 300000 = 5 min) initialValidationFeedback?: string; // Pre-populate with errors (used by `fix` command) maxSkills?: number; // Cap skills included in prompt (default: 5) + skipPlanInstructions?: boolean; // Skip IMPLEMENTATION_PLAN.md rules in preamble (fix --design) } export interface LoopResult { @@ -710,6 +711,7 @@ export async function runLoop(options: LoopOptions): Promise { validationFeedback: lastValidationFeedback || undefined, maxInputTokens: options.contextBudget || 0, specSummary, + skipPlanInstructions: options.skipPlanInstructions, }); const iterationTask = builtContext.prompt; From 5036cb28d935a97d6c50683eeaf721e58703a148 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Fri, 13 Feb 2026 23:57:19 +0000 Subject: [PATCH 36/46] perf(fix): reduce default iterations for design tasks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix --design: 7 → 5 (5-phase structure should complete in 3-4 iters) isDesignTask: 5 → 4 (visual tasks with keyword detection) Reduces worst-case wall time from 35min to 25min. Co-Authored-By: Claude Opus 4.6 --- src/commands/fix.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/commands/fix.ts b/src/commands/fix.ts index e1973bb..1b4d56e 100644 --- a/src/commands/fix.ts +++ b/src/commands/fix.ts @@ -273,7 +273,7 @@ This is a visual/design task. After making your CSS and styling changes, you MUS // that triggers excessive skill searches from the design prompt boilerplate await autoInstallSkillsFromTask(customTask || (options.design ? 'design fix' : 'fix'), cwd); - const defaultIter = options.design ? 7 : isDesignTask ? 5 : 3; + const defaultIter = options.design ? 5 : isDesignTask ? 4 : 3; const maxIter = options.maxIterations ? Number.parseInt(options.maxIterations, 10) : defaultIter; const result = await runLoop({ From a65888756abc835ae7df7389827652e153eb7eec Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Sat, 14 Feb 2026 00:00:27 +0000 Subject: [PATCH 37/46] feat(loop): add iteration log for inter-iteration memory Each iteration now appends a summary to .ralph/iteration-log.md with status (validation passed/failed), whether files changed, and agent summary text. On iterations 2+, the last 3 entries are included in the prompt as "## Previous Iterations" so the agent knows what was already tried and can avoid repeating failed approaches. This is a lightweight alternative to full session continuity (--resume) which is deferred to 0.3.1. Co-Authored-By: Claude Opus 4.6 --- src/loop/context-builder.ts | 14 +++++++-- src/loop/executor.ts | 57 +++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/src/loop/context-builder.ts b/src/loop/context-builder.ts index 04a7cda..423f878 100644 --- a/src/loop/context-builder.ts +++ b/src/loop/context-builder.ts @@ -33,6 +33,8 @@ export interface ContextBuildOptions { specSummary?: string; /** Skip IMPLEMENTATION_PLAN.md instructions in preamble (used by fix --design) */ skipPlanInstructions?: boolean; + /** Iteration log content from .ralph/iteration-log.md (previous iteration summaries) */ + iterationLog?: string; } export interface BuiltContext { @@ -245,11 +247,17 @@ Design quality (IMPORTANT): - Avoid generic AI aesthetics: no purple-blue gradient backgrounds/text, no glass morphism/neumorphism, no Inter/Roboto defaults — pick distinctive typography (e.g. DM Sans, Playfair Display, Space Mono) `; + // Inject iteration log for iterations 2+ (gives agent memory of what happened before) + const iterationLogSection = + iteration > 1 && opts.iterationLog + ? `\n## Previous Iterations\n${opts.iterationLog}\nUse this history to avoid repeating failed approaches.\n` + : ''; + // No structured tasks — pass the task with preamble if (!currentTask || totalTasks === 0) { if (iteration > 1) { // Later iterations without structured tasks — remind agent to create a plan - prompt = `${preamble} + prompt = `${preamble}${iterationLogSection} Continue working on the project. If you haven't already, create an IMPLEMENTATION_PLAN.md with structured tasks. Study the specs/ directory for the original specification. @@ -287,7 +295,7 @@ Complete these subtasks, then mark them done in IMPLEMENTATION_PLAN.md by changi ? `\n## Spec Summary (reference — follow this faithfully)\n${specSummary}\n` : '\nStudy specs/ for requirements if needed.'; - prompt = `${preamble} + prompt = `${preamble}${iterationLogSection} Continue working on the project. Check IMPLEMENTATION_PLAN.md for full progress. ${specRef} ${planContext}`; @@ -309,7 +317,7 @@ ${planContext}`; ? `\nSpec key points:\n${specSummary.slice(0, 500)}${specSummary.length > 500 ? '\n[... see specs/ for full details ...]' : ''}\n` : '\nSpecs in specs/.'; - prompt = `${preamble} + prompt = `${preamble}${iterationLogSection} Continue working on the project. Check IMPLEMENTATION_PLAN.md for progress. ${specHint} ${planContext}`; diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 27dc6e0..0c28e6c 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -1,3 +1,4 @@ +import { appendFileSync, existsSync, mkdirSync, readFileSync } from 'node:fs'; import { readdir, stat } from 'node:fs/promises'; import { join } from 'node:path'; import chalk from 'chalk'; @@ -360,6 +361,53 @@ function detectCompletionWithReason( return { status: 'continue', reason: '' }; } +/** + * Append an iteration summary to .ralph/iteration-log.md. + * Gives the agent inter-iteration memory without session continuity. + */ +function appendIterationLog( + cwd: string, + iteration: number, + summary: string, + validationPassed: boolean, + hasChanges: boolean +): void { + try { + const ralphDir = join(cwd, '.ralph'); + if (!existsSync(ralphDir)) mkdirSync(ralphDir, { recursive: true }); + + const logPath = join(ralphDir, 'iteration-log.md'); + const entry = `## Iteration ${iteration} +- Status: ${validationPassed ? 'validation passed' : 'validation failed'} +- Changes: ${hasChanges ? 'yes' : 'no files changed'} +- Summary: ${summary.slice(0, 200)} +`; + appendFileSync(logPath, entry); + } catch { + // Non-critical — don't break the loop if we can't write the log + } +} + +/** + * Read the last N iteration summaries from .ralph/iteration-log.md. + * Used by context-builder to give the agent memory of previous iterations. + */ +export function readIterationLog(cwd: string, maxEntries = 3): string | undefined { + try { + const logPath = join(cwd, '.ralph', 'iteration-log.md'); + if (!existsSync(logPath)) return undefined; + + const content = readFileSync(logPath, 'utf-8'); + const entries = content.split(/^## Iteration /m).filter((e) => e.trim()); + if (entries.length === 0) return undefined; + + const recent = entries.slice(-maxEntries).map((e) => `## Iteration ${e}`); + return recent.join('\n'); + } catch { + return undefined; + } +} + function summarizeChanges(output: string): string { // Try to extract a meaningful summary from the output const lines = output.split('\n').filter((l) => l.trim()); @@ -701,6 +749,9 @@ export async function runLoop(options: LoopOptions): Promise { iterProgress.updateProgress(i, maxIterations, costTracker?.getStats()?.totalCost?.totalCost); // Build iteration-specific task with smart context windowing + // Read iteration log for inter-iteration memory (iterations 2+) + const iterationLog = i > 1 ? readIterationLog(options.cwd) : undefined; + const builtContext = buildIterationContext({ fullTask: options.task, taskWithSkills, @@ -712,6 +763,7 @@ export async function runLoop(options: LoopOptions): Promise { maxInputTokens: options.contextBudget || 0, specSummary, skipPlanInstructions: options.skipPlanInstructions, + iterationLog, }); const iterationTask = builtContext.prompt; @@ -1185,6 +1237,11 @@ export async function runLoop(options: LoopOptions): Promise { await progressTracker.appendEntry(progressEntry); } + // Write iteration summary for inter-iteration memory + const iterSummary = summarizeChanges(result.output); + const iterValidationPassed = validationResults.every((r) => r.success); + appendIterationLog(options.cwd, i, iterSummary, iterValidationPassed, hasChanges); + if (status === 'done') { const completionReason = completionResult.reason || 'Task marked as complete by agent'; const duration = Date.now() - startTime; From 1516fac7139b8b3e9ca749cecc3261a91c4632f4 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Sat, 14 Feb 2026 00:01:15 +0000 Subject: [PATCH 38/46] feat(loop): better header labels + subtask tree display - Loop header shows "Design Fix", "Fix", or agent name based on fixMode instead of always showing "Running Claude Code" - Subtask tree renders below header when current task has subtasks: [x] Create hero component [ ] Add responsive styles - Add fixMode option to LoopOptions ('design' | 'scan' | 'custom') Co-Authored-By: Claude Opus 4.6 --- src/commands/fix.ts | 1 + src/loop/executor.ts | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/commands/fix.ts b/src/commands/fix.ts index 1b4d56e..3b3bd35 100644 --- a/src/commands/fix.ts +++ b/src/commands/fix.ts @@ -289,6 +289,7 @@ This is a visual/design task. After making your CSS and styling changes, you MUS validate: mode === 'scan', maxSkills: options.design ? 4 : undefined, skipPlanInstructions: options.design, + fixMode: options.design ? 'design' : customTask ? 'custom' : 'scan', }); // --- Step 5: Verify fix by re-running validations --- diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 0c28e6c..257ac3e 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -239,6 +239,7 @@ export interface LoopOptions { initialValidationFeedback?: string; // Pre-populate with errors (used by `fix` command) maxSkills?: number; // Cap skills included in prompt (default: 5) skipPlanInstructions?: boolean; // Skip IMPLEMENTATION_PLAN.md rules in preamble (fix --design) + fixMode?: 'design' | 'scan' | 'custom'; // Display mode for fix command headers } export interface LoopResult { @@ -736,11 +737,26 @@ export async function runLoop(options: LoopOptions): Promise { chalk.dim(truncateToFit(` ${options.agent.name} │ Iter ${i}/${maxIterations}`, innerWidth)) ); } else { - const fallbackLine = ` ${sourceIcon} Loop ${i}/${maxIterations} │ Running ${options.agent.name}`; + const modeLabel = + options.fixMode === 'design' + ? 'Design Fix' + : options.fixMode + ? 'Fix' + : `Running ${options.agent.name}`; + const fallbackLine = ` ${sourceIcon} Loop ${i}/${maxIterations} │ ${modeLabel}`; headerLines.push(chalk.white.bold(truncateToFit(fallbackLine, innerWidth))); } console.log(); console.log(drawBox(headerLines, { color: chalk.cyan, width: boxWidth })); + + // Show subtask tree if current task has subtasks + if (currentTask?.subtasks && currentTask.subtasks.length > 0) { + for (const st of currentTask.subtasks) { + const icon = st.completed ? chalk.green(' [x]') : chalk.dim(' [ ]'); + const name = truncateToFit(cleanTaskName(st.name), innerWidth - 8); + console.log(`${icon} ${chalk.dim(name)}`); + } + } console.log(); // Create progress renderer for this iteration From aa225612cd414db30c47d58a4190ec94045faad1 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Sat, 14 Feb 2026 00:01:59 +0000 Subject: [PATCH 39/46] fix(loop): kill orphaned dev servers after design iterations After each iteration in design mode, check ports 3000/5173/4321/8080 for orphaned dev server processes and SIGTERM them. This prevents resource leaks when the agent crashes or times out without cleaning up the dev server it started for visual verification. Co-Authored-By: Claude Opus 4.6 --- src/loop/executor.ts | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 257ac3e..72c93ae 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -1,3 +1,4 @@ +import { execSync } from 'node:child_process'; import { appendFileSync, existsSync, mkdirSync, readFileSync } from 'node:fs'; import { readdir, stat } from 'node:fs/promises'; import { join } from 'node:path'; @@ -839,6 +840,27 @@ export async function runLoop(options: LoopOptions): Promise { iterProgress.stop('Iteration complete'); + // Kill orphaned dev servers after design iterations (agent may crash without cleanup) + if (options.fixMode === 'design') { + try { + const pids = execSync('lsof -ti :3000,:5173,:4321,:8080 2>/dev/null', { + encoding: 'utf-8', + timeout: 3000, + }).trim(); + if (pids) { + for (const pid of pids.split('\n').filter(Boolean)) { + try { + process.kill(Number(pid), 'SIGTERM'); + } catch { + /* already dead */ + } + } + } + } catch { + /* no processes on those ports — normal */ + } + } + // Track cost for this iteration (silent - summary shown at end) if (costTracker) { costTracker.recordIteration(options.task, result.output); From 08e0169f42d5382e250daad23de61f5c89db0e95 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Sat, 14 Feb 2026 00:02:58 +0000 Subject: [PATCH 40/46] feat(session): persist validation feedback for resume Add lastValidationFeedback field to SessionState so that when a session is paused and later resumed, the agent gets the last validation errors as context. The resume command now passes this as initialValidationFeedback to runLoop. Co-Authored-By: Claude Opus 4.6 --- src/commands/resume.ts | 1 + src/loop/session.ts | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/commands/resume.ts b/src/commands/resume.ts index efc8446..fb0f09b 100644 --- a/src/commands/resume.ts +++ b/src/commands/resume.ts @@ -127,6 +127,7 @@ export async function resumeCommand(options: ResumeCommandOptions = {}): Promise checkFileCompletion: session.options.checkFileCompletion, trackCost: session.options.trackCost, model: session.options.model, + initialValidationFeedback: session.lastValidationFeedback, }; // Run the loop diff --git a/src/loop/session.ts b/src/loop/session.ts index be8429a..3716ab8 100644 --- a/src/loop/session.ts +++ b/src/loop/session.ts @@ -66,6 +66,8 @@ export interface SessionState { pauseReason?: string; /** Error message (if failed) */ error?: string; + /** Last validation feedback (preserved for resume) */ + lastValidationFeedback?: string; /** Exit reason */ exitReason?: | 'completed' @@ -197,7 +199,11 @@ export async function updateSessionIteration( /** * Pause the current session */ -export async function pauseSession(cwd: string, reason?: string): Promise { +export async function pauseSession( + cwd: string, + reason?: string, + validationFeedback?: string +): Promise { const session = await loadSession(cwd); if (!session) return null; @@ -205,6 +211,7 @@ export async function pauseSession(cwd: string, reason?: string): Promise Date: Sat, 14 Feb 2026 00:50:32 +0000 Subject: [PATCH 41/46] feat(wizard): add uiLibrary field to TechStack interface Add uiLibrary as an optional field in TechStack to support UI component library selection (shadcn/ui, shadcn-vue, shadcn-svelte, MUI, Chakra). Updated normalizeTechStack, hasTechStack, and the wizard summary display. Co-Authored-By: Claude Opus 4.6 --- src/wizard/index.ts | 1 + src/wizard/spec-generator.ts | 9 ++++++++- src/wizard/types.ts | 1 + src/wizard/ui.ts | 17 ++++++++++++++++- 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/wizard/index.ts b/src/wizard/index.ts index 1ba4840..44e9f22 100644 --- a/src/wizard/index.ts +++ b/src/wizard/index.ts @@ -64,6 +64,7 @@ function normalizeTechStack(stack: WizardAnswers['techStack']): WizardAnswers['t backend: normalizeTechStackValue(stack.backend), database: normalizeTechStackValue(stack.database), styling: normalizeTechStackValue(stack.styling), + uiLibrary: normalizeTechStackValue(stack.uiLibrary), language: normalizeTechStackValue(stack.language), }; } diff --git a/src/wizard/spec-generator.ts b/src/wizard/spec-generator.ts index bafa87e..0ed8464 100644 --- a/src/wizard/spec-generator.ts +++ b/src/wizard/spec-generator.ts @@ -193,7 +193,14 @@ export function generateAgentsMd(answers: WizardAnswers): string { * Check if tech stack has any values */ function hasTechStack(stack: TechStack): boolean { - return !!(stack.frontend || stack.backend || stack.database || stack.styling || stack.language); + return !!( + stack.frontend || + stack.backend || + stack.database || + stack.styling || + stack.uiLibrary || + stack.language + ); } /** diff --git a/src/wizard/types.ts b/src/wizard/types.ts index 56395ab..dd5a8f0 100644 --- a/src/wizard/types.ts +++ b/src/wizard/types.ts @@ -8,6 +8,7 @@ export interface TechStack { backend?: string; database?: string; styling?: string; + uiLibrary?: string; language?: string; } diff --git a/src/wizard/ui.ts b/src/wizard/ui.ts index af28586..d39f733 100644 --- a/src/wizard/ui.ts +++ b/src/wizard/ui.ts @@ -64,6 +64,7 @@ export function showRefinedSummary( backend?: string; database?: string; styling?: string; + uiLibrary?: string; language?: string; }, features: string[], @@ -89,6 +90,11 @@ export function showRefinedSummary( css: 'CSS', scss: 'SCSS', 'styled-components': 'styled-components', + shadcn: 'shadcn/ui', + 'shadcn-vue': 'shadcn-vue', + 'shadcn-svelte': 'shadcn-svelte', + mui: 'Material UI', + chakra: 'Chakra UI', typescript: 'TypeScript', javascript: 'JavaScript', }; @@ -103,7 +109,14 @@ export function showRefinedSummary( console.log(` ${chalk.white('Type:')} ${projectType}`); console.log(); - if (stack.frontend || stack.backend || stack.database || stack.styling || stack.language) { + if ( + stack.frontend || + stack.backend || + stack.database || + stack.styling || + stack.uiLibrary || + stack.language + ) { console.log(` ${chalk.white('Tech Stack:')}`); if (stack.frontend) console.log(` ${chalk.dim('Frontend:')} ${formatTechLabel(stack.frontend)}`); @@ -113,6 +126,8 @@ export function showRefinedSummary( console.log(` ${chalk.dim('Database:')} ${formatTechLabel(stack.database)}`); if (stack.styling) console.log(` ${chalk.dim('Styling:')} ${formatTechLabel(stack.styling)}`); + if (stack.uiLibrary) + console.log(` ${chalk.dim('UI Library:')} ${formatTechLabel(stack.uiLibrary)}`); if (stack.language) console.log(` ${chalk.dim('Language:')} ${formatTechLabel(stack.language)}`); console.log(); From 207cbc47b6c31752ae075802fa37b66b7492181f Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Sat, 14 Feb 2026 00:51:21 +0000 Subject: [PATCH 42/46] feat(wizard): default to shadcn + tailwind + motion-primitives for web projects When no UI library/styling is specified, web projects now default to: - Tailwind CSS for styling - shadcn/ui (React/Next.js), shadcn-vue (Vue), or shadcn-svelte (Svelte) Updated REFINEMENT_PROMPT to include uiLibrary field and guidance for the LLM to suggest this default stack. Template fallback also sets these defaults when the LLM is unavailable. Co-Authored-By: Claude Opus 4.6 --- src/wizard/llm.ts | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/wizard/llm.ts b/src/wizard/llm.ts index b6de7ad..1b22f72 100644 --- a/src/wizard/llm.ts +++ b/src/wizard/llm.ts @@ -106,6 +106,7 @@ Return ONLY valid JSON (no markdown, no explanation) in this exact format: "backend": "nodejs|express|fastify|hono|python|django|flask|fastapi|go|gin|rust|null", "database": "sqlite|postgres|mysql|mongodb|redis|supabase|firebase|prisma|drizzle|null", "styling": "tailwind|css|scss|styled-components|null", + "uiLibrary": "shadcn|shadcn-vue|shadcn-svelte|mui|chakra|null", "language": "typescript|javascript|python|go|rust" }, "coreFeatures": ["feature1", "feature2", "feature3"], @@ -124,6 +125,7 @@ Guidelines: - Language: TypeScript, Python, Go, Rust, etc. - NEVER substitute a user-specified technology with a different one - Only suggest defaults when the user doesn't specify (e.g., TypeScript + React for unspecified web apps) +- For web projects, default to Tailwind CSS + shadcn/ui (or framework variant) + motion-primitives unless the user explicitly specifies different styling/UI libraries. Use shadcn for React/Next.js, shadcn-vue for Vue, shadcn-svelte for Svelte. - coreFeatures are essential features implied by the idea - suggestedFeatures are nice-to-haves that would enhance the project - estimatedComplexity is based on scope (prototype=hours, mvp=day, full=days/weeks)`; @@ -456,6 +458,19 @@ function getTemplateSuggestions(idea: string): RefinedIdea { detectedBackend || (detectedFrontend === 'astro' ? undefined : 'nodejs'); suggestedStack.database = detectedDatabase || (detectedFrontend === 'astro' ? undefined : 'sqlite'); + + // Default UI stack: Tailwind + shadcn (framework-appropriate variant) + motion-primitives + if (!detectedStyling) { + suggestedStack.styling = 'tailwind'; + const frontend = suggestedStack.frontend; + if (frontend === 'vue') { + suggestedStack.uiLibrary = 'shadcn-vue'; + } else if (frontend === 'svelte') { + suggestedStack.uiLibrary = 'shadcn-svelte'; + } else if (frontend && frontend !== 'vanilla' && frontend !== 'astro') { + suggestedStack.uiLibrary = 'shadcn'; + } + } } else if (projectType === 'api') { suggestedStack.backend = detectedBackend || 'nodejs'; suggestedStack.database = detectedDatabase || 'postgres'; From 5b04fbbe9968b9573b920ba19d60eb837adde0f7 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Sat, 14 Feb 2026 00:52:14 +0000 Subject: [PATCH 43/46] feat(wizard): rich spec + AGENTS.md generation with UI stack details - Add uiLibrary to spec display (A4) - Add Tailwind v4 setup instructions to AGENTS.md including cascade layers warning and explicit "no manual CSS resets" guidance (B1) - Add shadcn/ui + motion-primitives setup instructions to AGENTS.md (B1) - Add Setup Notes section to spec with Tailwind v4 + UI library details to prevent CSS cascade conflicts (B2) - Add formatTech entries for shadcn, MUI, Chakra, motion-primitives Co-Authored-By: Claude Opus 4.6 --- src/wizard/spec-generator.ts | 70 ++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/src/wizard/spec-generator.ts b/src/wizard/spec-generator.ts index 0ed8464..a67705d 100644 --- a/src/wizard/spec-generator.ts +++ b/src/wizard/spec-generator.ts @@ -40,10 +40,35 @@ export function generateSpec(answers: WizardAnswers): string { if (answers.techStack.styling) { sections.push(`- **Styling:** ${formatTech(answers.techStack.styling)}`); } + if (answers.techStack.uiLibrary) { + sections.push(`- **UI Library:** ${formatTech(answers.techStack.uiLibrary)}`); + } if (answers.techStack.language) { sections.push(`- **Language:** ${formatTech(answers.techStack.language)}`); } sections.push(''); + + // Technical setup notes (prevents common pitfalls like CSS cascade conflicts) + if (answers.techStack.styling === 'tailwind') { + sections.push('### Setup Notes'); + sections.push(''); + sections.push( + '- Use Tailwind CSS v4 with `@import "tailwindcss"` — do NOT use v3 `@tailwind` directives' + ); + sections.push( + '- Do NOT add manual CSS resets — Tailwind v4 preflight handles `box-sizing`, margin/padding resets' + ); + sections.push( + '- Custom CSS must be wrapped in `@layer base { }` or `@layer components { }` to avoid overriding Tailwind utilities' + ); + if (answers.techStack.uiLibrary) { + sections.push( + `- Use ${formatTech(answers.techStack.uiLibrary)} components — install and add components as needed` + ); + sections.push('- Use motion-primitives for page transitions and micro-interactions'); + } + sections.push(''); + } } // Features @@ -150,6 +175,45 @@ export function generateAgentsMd(answers: WizardAnswers): string { sections.push('- Use Prisma for database access'); } + // Styling-specific instructions + if (answers.techStack.styling === 'tailwind') { + sections.push(''); + sections.push('### Tailwind CSS v4 Setup (CRITICAL)'); + sections.push(''); + sections.push('- Install: `npm install tailwindcss @tailwindcss/postcss postcss`'); + sections.push("- postcss.config.js: `plugins: { '@tailwindcss/postcss': {} }`"); + sections.push( + '- CSS entry: `@import "tailwindcss";` (NOT `@tailwind base/components/utilities`)' + ); + sections.push('- Do NOT create tailwind.config.js (v4 uses CSS-based config)'); + sections.push( + '- Do NOT add a manual CSS reset (`* { margin: 0; padding: 0; }`) — Tailwind v4 preflight handles this' + ); + sections.push( + '- If you need custom base styles, wrap them in `@layer base { }` — unlayered CSS overrides ALL Tailwind utilities' + ); + } + + // UI Library setup + const uiLib = answers.techStack.uiLibrary; + if (uiLib === 'shadcn' || uiLib === 'shadcn-vue' || uiLib === 'shadcn-svelte') { + sections.push(''); + sections.push('### UI Components'); + sections.push(''); + if (uiLib === 'shadcn') { + sections.push('- Use shadcn/ui for UI components: `npx shadcn@latest init`'); + sections.push('- Add components as needed: `npx shadcn@latest add button card dialog`'); + } else if (uiLib === 'shadcn-vue') { + sections.push('- Use shadcn-vue for UI components'); + } else if (uiLib === 'shadcn-svelte') { + sections.push('- Use shadcn-svelte for UI components'); + } + sections.push('- Use motion-primitives for animations'); + sections.push( + '- Follow the component patterns from shadcn — composable, accessible, customizable' + ); + } + sections.push(''); sections.push('## Validation Commands'); sections.push(''); @@ -226,6 +290,12 @@ function formatTech(tech: string): string { css: 'CSS', scss: 'SCSS', 'styled-components': 'styled-components', + shadcn: 'shadcn/ui', + 'shadcn-vue': 'shadcn-vue', + 'shadcn-svelte': 'shadcn-svelte', + mui: 'Material UI', + chakra: 'Chakra UI', + 'motion-primitives': 'motion-primitives', typescript: 'TypeScript', javascript: 'JavaScript', }; From 1cd2797d7148cd299606e633ec932539c821700c Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Sat, 14 Feb 2026 00:53:05 +0000 Subject: [PATCH 44/46] feat(fix): improve --design loop with DESIGN_VERIFIED token + cascade check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - C1: Require DESIGN_VERIFIED completion token for design mode, disable legacy "All tasks completed" markers via requireExitSignal - C2: Update Phases 4-5 to instruct agent to emit DESIGN_VERIFIED only after taking verification screenshots - C3: Increase default design iterations from 5 to 7 for fix+verify cycles - C4: Add CSS cascade conflict check as priority 0 in Phase 2 — detects the "spacing broken + colors working" pattern caused by unlayered CSS overriding Tailwind v4 @layer utilities Co-Authored-By: Claude Opus 4.6 --- src/commands/fix.ts | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/commands/fix.ts b/src/commands/fix.ts index 3b3bd35..65bb0b0 100644 --- a/src/commands/fix.ts +++ b/src/commands/fix.ts @@ -233,6 +233,7 @@ IMPORTANT: Your VERY FIRST action must be to start the dev server and take scree Look at the screenshots and identify CONCRETE issues you can actually see. Do NOT list generic improvements — only list problems visible in the screenshots. Check in this priority order: +0. **CSS cascade conflicts** — If spacing/margin/padding from Tailwind classes aren't working but colors/fonts/grid work fine, check the main CSS file (e.g. index.css, globals.css) for unlayered rules like \`* { margin: 0; padding: 0; }\` that override Tailwind's @layer-based utilities. Remove any such rules — Tailwind v4's preflight already provides proper resets. 1. **Page structure** — Is content centered? Are sections contained in a max-width wrapper? Is anything stuck to the left/right edge when it shouldn't be? Are there huge empty gaps between sections? 2. **Layout & positioning** — Are grid/flex layouts rendering correctly? Are columns balanced? Is the hero section properly structured? Are elements overlapping or misaligned? 3. **Responsive issues** — Does the layout break at any viewport? Do elements overflow or get clipped? @@ -253,9 +254,13 @@ Prioritize: page structure > layout positioning > responsive > spacing > cosmeti 1. Fix structural issues FIRST (containers, centering, grid layout), then work down to cosmetic 2. After fixing each structural issue, re-screenshot to verify the layout improved 3. Final verification: screenshot all 3 viewports and confirm the page looks properly structured +4. CRITICAL: After confirming fixes look correct in final screenshots, output DESIGN_VERIFIED on its own line. Do NOT output this until you have taken verification screenshots and confirmed the design is correct. ## Phase 5: Cleanup -CRITICAL: Stop the dev server (kill the process) when done — do NOT leave it running. +1. Stop the dev server (kill the process) when done — do NOT leave it running +2. If you have NOT already output DESIGN_VERIFIED, do it now after visual confirmation + +IMPORTANT: The loop will NOT accept completion without the exact token DESIGN_VERIFIED. Do NOT say "All tasks completed" — it will be ignored. ${customTask ? `\nUser notes: ${customTask}\n` : ''}${specContext ? `\n## Original Design Specification\n${specContext}` : ''}${feedback ? `\n\n## Build Errors (also fix these)\n${feedback}` : ''}`; } else if (isDesignTask) { @@ -273,7 +278,7 @@ This is a visual/design task. After making your CSS and styling changes, you MUS // that triggers excessive skill searches from the design prompt boilerplate await autoInstallSkillsFromTask(customTask || (options.design ? 'design fix' : 'fix'), cwd); - const defaultIter = options.design ? 5 : isDesignTask ? 4 : 3; + const defaultIter = options.design ? 7 : isDesignTask ? 4 : 3; const maxIter = options.maxIterations ? Number.parseInt(options.maxIterations, 10) : defaultIter; const result = await runLoop({ @@ -290,6 +295,11 @@ This is a visual/design task. After making your CSS and styling changes, you MUS maxSkills: options.design ? 4 : undefined, skipPlanInstructions: options.design, fixMode: options.design ? 'design' : customTask ? 'custom' : 'scan', + // Design mode: require explicit DESIGN_VERIFIED token after visual verification + ...(options.design && { + completionPromise: 'DESIGN_VERIFIED', + requireExitSignal: true, + }), }); // --- Step 5: Verify fix by re-running validations --- From 9f4b1faaf0a4371ea1d6420a605773c7e841820d Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Sat, 14 Feb 2026 00:55:23 +0000 Subject: [PATCH 45/46] feat(loop): design mode stall detection + conditional completion instruction - D1: Credit screenshot/viewport activity as productive progress in design mode, preventing stall detector from killing analysis iterations - D2: Suppress "All tasks completed" instruction for design mode (skipPlanInstructions=true), replacing with "Follow the completion instructions in the task below" to avoid conflicting with DESIGN_VERIFIED Co-Authored-By: Claude Opus 4.6 --- src/loop/context-builder.ts | 2 +- src/loop/executor.ts | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/loop/context-builder.ts b/src/loop/context-builder.ts index 423f878..30c0627 100644 --- a/src/loop/context-builder.ts +++ b/src/loop/context-builder.ts @@ -228,7 +228,7 @@ ${planRules} - Implement completely — no placeholders or stubs - Create files before importing them — never import components or modules that don't exist yet - Do NOT run build or dev server commands yourself — the loop automatically runs lint checks between iterations and a full build on the final iteration. NEVER start a dev server (\`npm run dev\`, \`npx vite\`, etc.) — it blocks forever and wastes resources. (Exception: if explicitly told to do visual verification, you may briefly start a dev server and MUST kill it when done.) -- When ALL tasks are complete, explicitly state "All tasks completed" +${skipPlanInstructions ? '- Follow the completion instructions in the task below' : '- When ALL tasks are complete, explicitly state "All tasks completed"'} - If you learn how to run/build the project, update AGENTS.md Technology gotchas (CRITICAL — follow these exactly): diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 72c93ae..dcf79cd 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -900,8 +900,13 @@ export async function runLoop(options: LoopOptions): Promise { const tasksProgressedThisIteration = postIterationTaskInfo.completed > previousCompletedTasks; // Build/validation failures are NOT idle — agent is actively debugging const hadValidationFailure = !!lastValidationFeedback; + // Design mode: screenshot analysis is productive even without file changes + const outputLower = result.output.toLowerCase(); + const hasDesignActivity = + options.fixMode === 'design' && + (outputLower.includes('screenshot') || outputLower.includes('viewport')); const hasProductiveProgress = - hasChanges || tasksProgressedThisIteration || hadValidationFailure; + hasChanges || tasksProgressedThisIteration || hadValidationFailure || hasDesignActivity; if (!hasProductiveProgress) { consecutiveIdleIterations++; From 5d65a00d362e2ea2b7eb1b6192953ff1f2bff90d Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Sat, 14 Feb 2026 00:59:05 +0000 Subject: [PATCH 46/46] docs: add --design flag, UI defaults, and changelog for beta.17 Co-Authored-By: Claude Opus 4.6 --- docs/docs/cli/fix.md | 68 ++++++++++++++++++++++++++++---- docs/docs/community/changelog.md | 18 +++++++++ docs/docs/wizard/overview.md | 12 ++++-- 3 files changed, 86 insertions(+), 12 deletions(-) diff --git a/docs/docs/cli/fix.md b/docs/docs/cli/fix.md index aa84586..21cec2a 100644 --- a/docs/docs/cli/fix.md +++ b/docs/docs/cli/fix.md @@ -2,7 +2,7 @@ sidebar_position: 2 title: fix description: Fix build errors, lint issues, or design problems -keywords: [cli, fix, command, build errors, lint, design] +keywords: [cli, fix, command, build errors, lint, design, visual, css] --- # ralph-starter fix @@ -22,7 +22,9 @@ The `fix` command runs a focused AI loop to fix project issues. It scans for bui When given a custom task describing a visual or design problem (e.g., "fix the paddings and make the colors brighter"), the fix command detects CSS/design keywords and: - Auto-applies installed design skills (frontend-design, ui-ux-designer, etc.) -- Instructs the agent to visually verify changes using the `/web-design-reviewer` skill with browser screenshots +- Instructs the agent to visually verify changes using browser screenshots + +For structured visual fix passes, use the `--design` flag — see [Design Mode](#design-mode) below. ## Arguments @@ -35,9 +37,10 @@ When given a custom task describing a visual or design problem (e.g., "fix the p | Option | Description | Default | |--------|-------------|---------| | `--scan` | Force full project scan (build + lint + typecheck + tests) | false | +| `--design` | Structured visual fix mode with screenshot verification | false | | `--agent ` | Specify agent (claude-code, cursor, codex, opencode) | auto-detect | | `--commit` | Auto-commit the fix | false | -| `--max-iterations ` | Maximum fix iterations | 3 | +| `--max-iterations ` | Maximum fix iterations | 3 (scan), 4 (design keywords), 7 (--design) | | `--output-dir ` | Project directory | cwd | ## Examples @@ -55,14 +58,17 @@ ralph-starter fix --scan ### Fix Design Issues ```bash -# Fix visual/CSS problems +# Structured visual fix pass (recommended for design work) +ralph-starter fix --design + +# Design mode with specific notes +ralph-starter fix --design "the hero section spacing is off and colors are too muted" + +# Ad-hoc CSS/design fix (auto-detected as design task) ralph-starter fix "fix the paddings and make the colors brighter" # Fix responsive layout ralph-starter fix "make the layout responsive on mobile" - -# Fix color theme -ralph-starter fix "change the color scheme to darker tones" ``` ### With Options @@ -76,6 +82,9 @@ ralph-starter fix "fix lint errors" --agent claude-code # Allow more iterations for complex fixes ralph-starter fix "fix all test failures" --max-iterations 5 + +# Design fix with more room to iterate +ralph-starter fix --design --max-iterations 10 ``` ## Behavior @@ -91,7 +100,7 @@ ralph-starter fix "fix all test failures" --max-iterations 5 - Searches skills.sh for complementary skills if needed 3. **Fix Loop**: - - Agent works on fixing issues (default: 3 iterations) + - Agent works on fixing issues (default: 3 iterations for scan, 7 for `--design`) - Lint checks run between iterations (fast feedback) - Full build check runs on final iteration - If build fails on final iteration → extends loop by 2 extra iterations @@ -100,6 +109,49 @@ ralph-starter fix "fix all test failures" --max-iterations 5 - Re-runs original validation commands after the loop - Reports success only if all checks pass (not just agent completion) +## Design Mode + +The `--design` flag enables a structured visual fix workflow specifically designed for CSS, layout, and styling issues. It runs the agent through a 5-phase process: + +### Phase 1: Visual Audit + +The agent's **first action** is to start the dev server and take screenshots at 3 viewports: +- Desktop (1440px) +- Tablet (768px) +- Mobile (375px) + +### Phase 2: Issue Identification + +The agent analyzes screenshots against the project spec and checks for issues in priority order: + +0. **CSS cascade conflicts** — Detects unlayered CSS resets (e.g., `* { margin: 0; padding: 0; }`) that silently override Tailwind v4 utilities. This is the most common cause of "classes are correct but nothing works." +1. **Page structure** — Content centering, max-width wrappers, empty gaps +2. **Layout & positioning** — Grid/flex rendering, column balance, overlaps +3. **Responsive issues** — Viewport breakage, overflow, clipping +4. **Spacing** — Vertical rhythm, abnormal gaps +5. **Typography & colors** — Font loading, readability, consistency + +### Phase 3: Fix Plan + +The agent creates a `DESIGN_FIX_PLAN.md` with specific issues, exact files, and CSS properties to change. + +### Phase 4: Execute & Verify + +Fixes are applied in priority order (structural first, cosmetic last). The agent re-screenshots after each structural fix to verify improvement. + +### Phase 5: Completion + +The loop requires the agent to output `DESIGN_VERIFIED` after taking final verification screenshots. The loop will **not** accept generic completion signals like "All tasks completed" — only `DESIGN_VERIFIED` after visual confirmation. + +### Why Design Mode Exists + +Without `--design`, agents often: +- Read code and see "correct" Tailwind classes, then declare victory without visual verification +- Add more CSS classes on top of cascade conflicts instead of fixing the root cause +- Complete in 1 iteration without actually verifying the visual result + +Design mode forces visual-first debugging and prevents premature exit. + ## Exit Codes | Code | Description | diff --git a/docs/docs/community/changelog.md b/docs/docs/community/changelog.md index ec02e82..7e48b50 100644 --- a/docs/docs/community/changelog.md +++ b/docs/docs/community/changelog.md @@ -11,6 +11,24 @@ All notable changes to ralph-starter are documented here. This project follows [ --- +## [0.1.1-beta.17] - 2026-02-14 + +### Added +- **`fix --design` mode**: Structured 5-phase visual fix workflow with screenshot verification, CSS cascade conflict detection, and `DESIGN_VERIFIED` completion token +- **Smart UI defaults**: Web projects now default to Tailwind CSS + shadcn/ui + motion-primitives when no styling is specified (framework-aware: shadcn-vue for Vue, shadcn-svelte for Svelte) +- **`uiLibrary` field** in TechStack for explicit UI component library selection +- **Rich spec generation**: Specs and AGENTS.md now include Tailwind v4 setup notes, CSS cascade layer warnings, and shadcn component setup instructions + +### Fixed +- Design loop premature exit — `fix --design` now requires explicit `DESIGN_VERIFIED` token after visual confirmation (prevents 1-iteration false completions) +- Design loop stall detection — screenshot/viewport analysis no longer falsely triggers idle detection +- Default design iterations increased from 5 to 7 for more thorough visual fixes + +### Changed +- Completion instruction in agent preamble is now conditional — design mode uses task-specific completion flow instead of generic "All tasks completed" + +--- + ## [0.1.1-beta.16] - 2026-02-07 ### Added diff --git a/docs/docs/wizard/overview.md b/docs/docs/wizard/overview.md index 7406dab..ba1070e 100644 --- a/docs/docs/wizard/overview.md +++ b/docs/docs/wizard/overview.md @@ -46,10 +46,12 @@ Once you describe your idea (e.g., "a habit tracker app"), the AI: You can then customize: - **Project Type** - Web, API, CLI, Mobile, Library, Automation -- **Tech Stack** - Frontend, backend, database choices +- **Tech Stack** - Frontend, backend, database, styling, UI library choices - **Features** - Select which features to include - **Complexity** - Prototype, MVP, or Full-featured +**Smart UI defaults**: For web projects, if you don't specify a styling or UI library, ralph-starter defaults to **Tailwind CSS + shadcn/ui + motion-primitives**. The shadcn variant is framework-aware: shadcn/ui for React/Next.js, shadcn-vue for Vue, and shadcn-svelte for Svelte. These defaults produce richer specs with Tailwind v4 setup notes that prevent common CSS cascade conflicts. + ### Step 4: Execution Options Choose how to proceed: @@ -95,9 +97,11 @@ $ ralph-starter Type: Web Application Tech Stack: - Frontend: react - Backend: nodejs - Database: sqlite + Frontend: React + Backend: Node.js + Database: SQLite + Styling: Tailwind CSS + UI Library: shadcn/ui Key Features: • Transaction tracking