From 7aea4a25c17dc10de17b3b2f75db5cdcab0891a9 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Thu, 12 Feb 2026 14:10:31 +0000 Subject: [PATCH] fix(loop): improve validation for greenfield builds - Reset circuit breaker when tasks advance (prevents false positives during multi-task greenfield builds where early tasks can't pass tests) - Add package manager detection: auto-detect pnpm/yarn/bun from lockfiles and packageManager field instead of hardcoding npm - Add validation warm-up: skip validation until enough tasks are done for greenfield builds (auto-detected, configurable via --validation-warmup) Co-Authored-By: Claude Opus 4.6 --- src/cli.ts | 4 + src/commands/init.ts | 16 +++- src/commands/run.ts | 30 ++++--- src/loop/__tests__/validation.test.ts | 2 +- src/loop/executor.ts | 9 +- src/loop/validation.ts | 14 ++- src/utils/__tests__/package-manager.test.ts | 94 +++++++++++++++++++++ src/utils/package-manager.ts | 61 +++++++++++++ src/wizard/spec-generator.ts | 9 +- 9 files changed, 215 insertions(+), 24 deletions(-) create mode 100644 src/utils/__tests__/package-manager.test.ts create mode 100644 src/utils/package-manager.ts diff --git a/src/cli.ts b/src/cli.ts index f5bac3a..8696969 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -82,6 +82,10 @@ program .option('--no-track-cost', 'Disable cost tracking') .option('--circuit-breaker-failures ', 'Max consecutive failures before stopping (default: 3)') .option('--circuit-breaker-errors ', 'Max same error occurrences before stopping (default: 5)') + .option( + '--validation-warmup ', + 'Skip validation until N tasks are completed (auto-detected for greenfield builds)' + ) .option( '--context-budget ', 'Max input tokens per iteration for smart context trimming (0 = unlimited)' diff --git a/src/commands/init.ts b/src/commands/init.ts index ec3754c..4e202dd 100644 --- a/src/commands/init.ts +++ b/src/commands/init.ts @@ -6,6 +6,11 @@ import ora from 'ora'; import YAML from 'yaml'; import { initGitRepo, isGitRepo } from '../automation/git.js'; import { type Agent, detectAvailableAgents, printAgentStatus } from '../loop/agents.js'; +import { + detectPackageManager, + formatRunCommand, + type PackageManager, +} from '../utils/package-manager.js'; interface InitOptions { name?: string; @@ -18,6 +23,7 @@ export type ProjectType = 'nodejs' | 'python' | 'rust' | 'go' | 'unknown'; export interface ProjectInfo { type: ProjectType; name: string; + packageManager?: PackageManager; testCmd?: string; buildCmd?: string; lintCmd?: string; @@ -29,12 +35,14 @@ export function detectProject(cwd: string): ProjectInfo { try { const pkg = JSON.parse(readFileSync(join(cwd, 'package.json'), 'utf-8')); const scripts = pkg.scripts || {}; + const pm = detectPackageManager(cwd); return { type: 'nodejs', name: pkg.name || 'project', - testCmd: scripts.test ? 'npm test' : undefined, - buildCmd: scripts.build ? 'npm run build' : undefined, - lintCmd: scripts.lint ? 'npm run lint' : undefined, + packageManager: pm, + testCmd: scripts.test ? formatRunCommand(pm, 'test') : undefined, + buildCmd: scripts.build ? formatRunCommand(pm, 'build') : undefined, + lintCmd: scripts.lint ? formatRunCommand(pm, 'lint') : undefined, }; } catch { return { type: 'nodejs', name: 'project' }; @@ -150,7 +158,7 @@ ${validationCmds.length > 0 ? validationCmds.join('\n') : '# Add your test/build ## Build Instructions -${project.type === 'nodejs' ? '1. Run `npm install` to install dependencies\n2. Run `npm run build` to build (if applicable)\n3. Run `npm test` to verify' : ''} +${project.type === 'nodejs' ? `1. Run \`${project.packageManager || 'npm'} install\` to install dependencies\n2. Run \`${project.buildCmd || `${project.packageManager || 'npm'} run build`}\` to build (if applicable)\n3. Run \`${project.testCmd || `${project.packageManager || 'npm'} test`}\` to verify` : ''} ${project.type === 'python' ? '1. Create virtual environment: `python -m venv venv`\n2. Install dependencies: `pip install -e .`\n3. Run tests: `pytest`' : ''} ${project.type === 'rust' ? '1. Run `cargo build` to compile\n2. Run `cargo test` to verify' : ''} ${project.type === 'go' ? '1. Run `go mod tidy` to sync dependencies\n2. Run `go build ./...` to compile\n3. Run `go test ./...` to verify' : ''} diff --git a/src/commands/run.ts b/src/commands/run.ts index 58ed02e..384eb0f 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -20,6 +20,7 @@ import { formatPresetsHelp, getPreset, type PresetConfig } from '../presets/inde import { autoInstallSkillsFromTask } from '../skills/auto-install.js'; import { getSourceDefaults } from '../sources/config.js'; import { fetchFromSource } from '../sources/index.js'; +import { detectPackageManager, formatRunCommand, getRunCommand } from '../utils/package-manager.js'; /** Default fallback repo for GitHub issues when no project is specified */ const DEFAULT_GITHUB_ISSUES_REPO = 'multivmlabs/ralph-ideas'; @@ -42,19 +43,14 @@ function detectRunCommand( try { const pkg = JSON.parse(readFileSync(packageJsonPath, 'utf-8')); const scripts = pkg.scripts || {}; + const pm = detectPackageManager(cwd); // Priority order for dev commands - if (scripts.dev) { - return { command: 'npm', args: ['run', 'dev'], description: 'npm run dev' }; - } - if (scripts.start) { - return { command: 'npm', args: ['run', 'start'], description: 'npm run start' }; - } - if (scripts.serve) { - return { command: 'npm', args: ['run', 'serve'], description: 'npm run serve' }; - } - if (scripts.preview) { - return { command: 'npm', args: ['run', 'preview'], description: 'npm run preview' }; + for (const script of ['dev', 'start', 'serve', 'preview']) { + if (scripts[script]) { + const cmd = getRunCommand(pm, script); + return { ...cmd, description: formatRunCommand(pm, script) }; + } } } catch { // Ignore parse errors @@ -223,6 +219,7 @@ export interface RunCommandOptions { circuitBreakerFailures?: number; circuitBreakerErrors?: number; contextBudget?: number; + validationWarmup?: number; // Figma options figmaMode?: 'spec' | 'tokens' | 'components' | 'assets' | 'content'; figmaFramework?: 'react' | 'vue' | 'svelte' | 'astro' | 'nextjs' | 'nuxt' | 'html'; @@ -573,6 +570,16 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN console.log(chalk.dim(`Max iterations: ${smartIterations} (${reason})`)); } + // Auto-detect greenfield builds: skip validation until enough tasks are done + const isGreenfield = taskCount.total > 0 && taskCount.completed === 0; + const autoWarmup = isGreenfield ? Math.max(2, Math.floor(taskCount.total * 0.5)) : 0; + const validationWarmup = options.validationWarmup ? Number(options.validationWarmup) : autoWarmup; + if (validationWarmup > 0 && options.validate) { + console.log( + chalk.dim(`Validation warm-up: skipping until ${validationWarmup} tasks completed`) + ); + } + // Apply preset values with CLI overrides const loopOptions: LoopOptions = { task: preset?.promptPrefix ? `${preset.promptPrefix}\n\n${finalTask}` : finalTask, @@ -587,6 +594,7 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN prIssueRef: sourceIssueRef, prLabels: options.auto ? ['AUTO'] : undefined, validate: options.validate ?? preset?.validate, + validationWarmup, sourceType: options.from?.toLowerCase(), // New options completionPromise: options.completionPromise ?? preset?.completionPromise, diff --git a/src/loop/__tests__/validation.test.ts b/src/loop/__tests__/validation.test.ts index f46a90c..66baa91 100644 --- a/src/loop/__tests__/validation.test.ts +++ b/src/loop/__tests__/validation.test.ts @@ -89,7 +89,7 @@ describe('validation', () => { expect(commands.find((c) => c.name === 'test')).toEqual({ name: 'test', command: 'npm', - args: ['run', 'test'], + args: ['test'], }); }); diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 6d35e63..102f69d 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -190,6 +190,7 @@ export interface LoopOptions { trackCost?: boolean; // Track token usage and cost model?: string; // Model name for cost estimation contextBudget?: number; // Max input tokens per iteration (0 = unlimited) + validationWarmup?: number; // Skip validation until N tasks completed (for greenfield builds) } export interface LoopResult { @@ -569,6 +570,9 @@ export async function runLoop(options: LoopOptions): Promise { // Check if tasks were completed since last iteration const newlyCompleted = completedTasks - previousCompletedTasks; if (newlyCompleted > 0 && i > 1) { + // Task completion is forward progress — reset circuit breaker consecutive failures + circuitBreaker.recordSuccess(); + // Get names of newly completed tasks (strip markdown) const maxNameWidth = Math.max(30, getTerminalWidth() - 30); const completedNames = taskInfo.tasks @@ -798,10 +802,13 @@ export async function runLoop(options: LoopOptions): Promise { } // Run validation (backpressure) if enabled and there are changes + // Skip validation during warm-up period (greenfield builds where early tasks can't pass tests) let _validationPassed = true; let validationResults: ValidationResult[] = []; + const warmupThreshold = options.validationWarmup ?? 0; + const pastWarmup = completedTasks >= warmupThreshold; - if (validationCommands.length > 0 && (await hasUncommittedChanges(options.cwd))) { + if (validationCommands.length > 0 && pastWarmup && (await hasUncommittedChanges(options.cwd))) { spinner.start(chalk.yellow(`Loop ${i}: Running validation...`)); validationResults = await runAllValidations(options.cwd, validationCommands); diff --git a/src/loop/validation.ts b/src/loop/validation.ts index 0f3962c..afef26e 100644 --- a/src/loop/validation.ts +++ b/src/loop/validation.ts @@ -1,6 +1,7 @@ import { existsSync, readFileSync } from 'node:fs'; import { join } from 'node:path'; import { execa } from 'execa'; +import { detectPackageManager, getRunCommand } from '../utils/package-manager.js'; export interface ValidationCommand { name: string; @@ -68,18 +69,23 @@ export function detectValidationCommands(cwd: string): ValidationCommand[] { try { const pkg = JSON.parse(readFileSync(packagePath, 'utf-8')); const scripts = pkg.scripts || {}; + const pm = detectPackageManager(cwd); if (scripts.test && scripts.test !== 'echo "Error: no test specified" && exit 1') { - commands.push({ name: 'test', command: 'npm', args: ['run', 'test'] }); + const cmd = getRunCommand(pm, 'test'); + commands.push({ name: 'test', ...cmd }); } if (scripts.lint) { - commands.push({ name: 'lint', command: 'npm', args: ['run', 'lint'] }); + const cmd = getRunCommand(pm, 'lint'); + commands.push({ name: 'lint', ...cmd }); } if (scripts.build) { - commands.push({ name: 'build', command: 'npm', args: ['run', 'build'] }); + const cmd = getRunCommand(pm, 'build'); + commands.push({ name: 'build', ...cmd }); } if (scripts.typecheck) { - commands.push({ name: 'typecheck', command: 'npm', args: ['run', 'typecheck'] }); + const cmd = getRunCommand(pm, 'typecheck'); + commands.push({ name: 'typecheck', ...cmd }); } } catch { // Invalid package.json diff --git a/src/utils/__tests__/package-manager.test.ts b/src/utils/__tests__/package-manager.test.ts new file mode 100644 index 0000000..0ceff38 --- /dev/null +++ b/src/utils/__tests__/package-manager.test.ts @@ -0,0 +1,94 @@ +import { existsSync, readFileSync } from 'node:fs'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { detectPackageManager, formatRunCommand, getRunCommand } from '../package-manager.js'; + +vi.mock('node:fs', () => ({ + existsSync: vi.fn(), + readFileSync: vi.fn(), +})); + +const mockExistsSync = vi.mocked(existsSync); +const mockReadFileSync = vi.mocked(readFileSync); + +describe('detectPackageManager', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('should return pnpm when pnpm-lock.yaml exists', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('pnpm-lock.yaml')); + expect(detectPackageManager('/test')).toBe('pnpm'); + }); + + it('should return yarn when yarn.lock exists', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('yarn.lock')); + expect(detectPackageManager('/test')).toBe('yarn'); + }); + + it('should return bun when bun.lockb exists', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('bun.lockb')); + expect(detectPackageManager('/test')).toBe('bun'); + }); + + it('should return bun when bun.lock exists', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('bun.lock')); + expect(detectPackageManager('/test')).toBe('bun'); + }); + + it('should read packageManager field from package.json', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('package.json')); + mockReadFileSync.mockReturnValue(JSON.stringify({ packageManager: 'pnpm@9.0.0' })); + expect(detectPackageManager('/test')).toBe('pnpm'); + }); + + it('should prefer lockfile over packageManager field', () => { + mockExistsSync.mockImplementation( + (p: any) => p.toString().includes('yarn.lock') || p.toString().includes('package.json') + ); + mockReadFileSync.mockReturnValue(JSON.stringify({ packageManager: 'pnpm@9.0.0' })); + expect(detectPackageManager('/test')).toBe('yarn'); + }); + + it('should default to npm when no indicators found', () => { + mockExistsSync.mockReturnValue(false); + expect(detectPackageManager('/test')).toBe('npm'); + }); + + it('should default to npm for unrecognized packageManager', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('package.json')); + mockReadFileSync.mockReturnValue(JSON.stringify({ packageManager: 'unknown@1.0.0' })); + expect(detectPackageManager('/test')).toBe('npm'); + }); + + it('should handle invalid package.json gracefully', () => { + mockExistsSync.mockImplementation((p: any) => p.toString().includes('package.json')); + mockReadFileSync.mockReturnValue('not valid json'); + expect(detectPackageManager('/test')).toBe('npm'); + }); +}); + +describe('getRunCommand', () => { + it('should return shorthand for test script', () => { + expect(getRunCommand('pnpm', 'test')).toEqual({ command: 'pnpm', args: ['test'] }); + expect(getRunCommand('npm', 'test')).toEqual({ command: 'npm', args: ['test'] }); + expect(getRunCommand('bun', 'test')).toEqual({ command: 'bun', args: ['test'] }); + }); + + it('should use run for non-test scripts', () => { + expect(getRunCommand('pnpm', 'build')).toEqual({ command: 'pnpm', args: ['run', 'build'] }); + expect(getRunCommand('npm', 'lint')).toEqual({ command: 'npm', args: ['run', 'lint'] }); + expect(getRunCommand('bun', 'dev')).toEqual({ command: 'bun', args: ['run', 'dev'] }); + }); +}); + +describe('formatRunCommand', () => { + it('should format test commands', () => { + expect(formatRunCommand('pnpm', 'test')).toBe('pnpm test'); + expect(formatRunCommand('npm', 'test')).toBe('npm test'); + }); + + it('should format run commands', () => { + expect(formatRunCommand('pnpm', 'build')).toBe('pnpm run build'); + expect(formatRunCommand('yarn', 'lint')).toBe('yarn run lint'); + }); +}); diff --git a/src/utils/package-manager.ts b/src/utils/package-manager.ts new file mode 100644 index 0000000..354b2bb --- /dev/null +++ b/src/utils/package-manager.ts @@ -0,0 +1,61 @@ +import { existsSync, readFileSync } from 'node:fs'; +import { join } from 'node:path'; + +export type PackageManager = 'npm' | 'pnpm' | 'yarn' | 'bun'; + +/** + * Detect the package manager used in a project directory. + * + * Detection priority: + * 1. Lock file presence (most reliable — reflects actual usage) + * 2. packageManager field in package.json (explicit declaration) + * 3. Default: npm + */ +export function detectPackageManager(cwd: string): PackageManager { + // Check lock files first (most reliable indicator of actual usage) + if (existsSync(join(cwd, 'pnpm-lock.yaml'))) return 'pnpm'; + if (existsSync(join(cwd, 'yarn.lock'))) return 'yarn'; + if (existsSync(join(cwd, 'bun.lockb')) || existsSync(join(cwd, 'bun.lock'))) return 'bun'; + + // Check package.json packageManager field + const packageJsonPath = join(cwd, 'package.json'); + if (existsSync(packageJsonPath)) { + try { + const pkg = JSON.parse(readFileSync(packageJsonPath, 'utf-8')); + if (pkg.packageManager) { + const name = pkg.packageManager.split('@')[0]; + if (['pnpm', 'yarn', 'bun'].includes(name)) { + return name as PackageManager; + } + } + } catch { + // Invalid package.json — fall through to default + } + } + + return 'npm'; +} + +/** + * Get the run command for a package manager script. + * For 'test', uses the shorthand (e.g., `pnpm test`). + * For other scripts, uses `run` (e.g., `pnpm run build`). + */ +export function getRunCommand( + pm: PackageManager, + script: string +): { command: string; args: string[] } { + if (script === 'test') { + return { command: pm, args: ['test'] }; + } + return { command: pm, args: ['run', script] }; +} + +/** + * Format a run command as a display string. + * e.g., "pnpm run build" or "bun test" + */ +export function formatRunCommand(pm: PackageManager, script: string): string { + const { command, args } = getRunCommand(pm, script); + return `${command} ${args.join(' ')}`; +} diff --git a/src/wizard/spec-generator.ts b/src/wizard/spec-generator.ts index 66283b4..bafa87e 100644 --- a/src/wizard/spec-generator.ts +++ b/src/wizard/spec-generator.ts @@ -1,3 +1,4 @@ +import { detectPackageManager, formatRunCommand } from '../utils/package-manager.js'; import type { TechStack, WizardAnswers } from './types.js'; import { formatComplexity, formatProjectType } from './ui.js'; @@ -160,9 +161,11 @@ export function generateAgentsMd(answers: WizardAnswers): string { answers.techStack.backend === 'nodejs'; if (hasNodeStack) { - sections.push('- **lint**: `npm run lint`'); - sections.push('- **build**: `npm run build`'); - sections.push('- **test**: `npm test`'); + // Detect PM from working directory if available, default to npm for greenfield projects + const pm = answers.workingDirectory ? detectPackageManager(answers.workingDirectory) : 'npm'; + sections.push(`- **lint**: \`${formatRunCommand(pm, 'lint')}\``); + sections.push(`- **build**: \`${formatRunCommand(pm, 'build')}\``); + sections.push(`- **test**: \`${formatRunCommand(pm, 'test')}\``); } else if (answers.techStack.backend === 'python') { sections.push('- **lint**: `ruff check .`'); sections.push('- **test**: `pytest`');