From 7aea4a25c17dc10de17b3b2f75db5cdcab0891a9 Mon Sep 17 00:00:00 2001
From: ruben-cytonic <ruben@cytonic.com>
Date: Thu, 12 Feb 2026 14:10:31 +0000
Subject: [PATCH] fix(loop): improve validation for greenfield builds

- Reset circuit breaker when tasks advance (prevents false positives
  during multi-task greenfield builds where early tasks can't pass tests)
- Add package manager detection: auto-detect pnpm/yarn/bun from lockfiles
  and packageManager field instead of hardcoding npm
- Add validation warm-up: skip validation until enough tasks are done for
  greenfield builds (auto-detected, configurable via --validation-warmup)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/cli.ts                                  |  4 +
 src/commands/init.ts                        | 16 +++-
 src/commands/run.ts                         | 30 ++++---
 src/loop/__tests__/validation.test.ts       |  2 +-
 src/loop/executor.ts                        |  9 +-
 src/loop/validation.ts                      | 14 ++-
 src/utils/__tests__/package-manager.test.ts | 94 +++++++++++++++++++++
 src/utils/package-manager.ts                | 61 +++++++++++++
 src/wizard/spec-generator.ts                |  9 +-
 9 files changed, 215 insertions(+), 24 deletions(-)
 create mode 100644 src/utils/__tests__/package-manager.test.ts
 create mode 100644 src/utils/package-manager.ts
diff --git a/src/cli.ts b/src/cli.ts
index f5bac3a..8696969 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -82,6 +82,10 @@ program
   .option('--no-track-cost', 'Disable cost tracking')
   .option('--circuit-breaker-failures <n>', 'Max consecutive failures before stopping (default: 3)')
   .option('--circuit-breaker-errors <n>', 'Max same error occurrences before stopping (default: 5)')
+  .option(
+    '--validation-warmup <n>',
+    'Skip validation until N tasks are completed (auto-detected for greenfield builds)'
+  )
   .option(
     '--context-budget <n>',
     'Max input tokens per iteration for smart context trimming (0 = unlimited)'
diff --git a/src/commands/init.ts b/src/commands/init.ts
index ec3754c..4e202dd 100644
--- a/src/commands/init.ts
+++ b/src/commands/init.ts
@@ -6,6 +6,11 @@ import ora from 'ora';
 import YAML from 'yaml';
 import { initGitRepo, isGitRepo } from '../automation/git.js';
 import { type Agent, detectAvailableAgents, printAgentStatus } from '../loop/agents.js';
+import {
+  detectPackageManager,
+  formatRunCommand,
+  type PackageManager,
+} from '../utils/package-manager.js';
 
 interface InitOptions {
   name?: string;
@@ -18,6 +23,7 @@ export type ProjectType = 'nodejs' | 'python' | 'rust' | 'go' | 'unknown';
 export interface ProjectInfo {
   type: ProjectType;
   name: string;
+  packageManager?: PackageManager;
   testCmd?: string;
   buildCmd?: string;
   lintCmd?: string;
@@ -29,12 +35,14 @@ export function detectProject(cwd: string): ProjectInfo {
     try {
       const pkg = JSON.parse(readFileSync(join(cwd, 'package.json'), 'utf-8'));
       const scripts = pkg.scripts || {};
+      const pm = detectPackageManager(cwd);
       return {
         type: 'nodejs',
         name: pkg.name || 'project',
-        testCmd: scripts.test ? 'npm test' : undefined,
-        buildCmd: scripts.build ? 'npm run build' : undefined,
-        lintCmd: scripts.lint ? 'npm run lint' : undefined,
+        packageManager: pm,
+        testCmd: scripts.test ? formatRunCommand(pm, 'test') : undefined,
+        buildCmd: scripts.build ? formatRunCommand(pm, 'build') : undefined,
+        lintCmd: scripts.lint ? formatRunCommand(pm, 'lint') : undefined,
       };
     } catch {
       return { type: 'nodejs', name: 'project' };
@@ -150,7 +158,7 @@ ${validationCmds.length > 0 ? validationCmds.join('\n') : '# Add your test/build
 
 ## Build Instructions
 
-${project.type === 'nodejs' ? '1. Run `npm install` to install dependencies\n2. Run `npm run build` to build (if applicable)\n3. Run `npm test` to verify' : ''}
+${project.type === 'nodejs' ? `1. Run \`${project.packageManager || 'npm'} install\` to install dependencies\n2. Run \`${project.buildCmd || `${project.packageManager || 'npm'} run build`}\` to build (if applicable)\n3. Run \`${project.testCmd || `${project.packageManager || 'npm'} test`}\` to verify` : ''}
 ${project.type === 'python' ? '1. Create virtual environment: `python -m venv venv`\n2. Install dependencies: `pip install -e .`\n3. Run tests: `pytest`' : ''}
 ${project.type === 'rust' ? '1. Run `cargo build` to compile\n2. Run `cargo test` to verify' : ''}
 ${project.type === 'go' ? '1. Run `go mod tidy` to sync dependencies\n2. Run `go build ./...` to compile\n3. Run `go test ./...` to verify' : ''}
diff --git a/src/commands/run.ts b/src/commands/run.ts
index 58ed02e..384eb0f 100644
--- a/src/commands/run.ts
+++ b/src/commands/run.ts
@@ -20,6 +20,7 @@ import { formatPresetsHelp, getPreset, type PresetConfig } from '../presets/inde
 import { autoInstallSkillsFromTask } from '../skills/auto-install.js';
 import { getSourceDefaults } from '../sources/config.js';
 import { fetchFromSource } from '../sources/index.js';
+import { detectPackageManager, formatRunCommand, getRunCommand } from '../utils/package-manager.js';
 
 /** Default fallback repo for GitHub issues when no project is specified */
 const DEFAULT_GITHUB_ISSUES_REPO = 'multivmlabs/ralph-ideas';
@@ -42,19 +43,14 @@ function detectRunCommand(
     try {
       const pkg = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
       const scripts = pkg.scripts || {};
+      const pm = detectPackageManager(cwd);
 
       // Priority order for dev commands
-      if (scripts.dev) {
-        return { command: 'npm', args: ['run', 'dev'], description: 'npm run dev' };
-      }
-      if (scripts.start) {
-        return { command: 'npm', args: ['run', 'start'], description: 'npm run start' };
-      }
-      if (scripts.serve) {
-        return { command: 'npm', args: ['run', 'serve'], description: 'npm run serve' };
-      }
-      if (scripts.preview) {
-        return { command: 'npm', args: ['run', 'preview'], description: 'npm run preview' };
+      for (const script of ['dev', 'start', 'serve', 'preview']) {
+        if (scripts[script]) {
+          const cmd = getRunCommand(pm, script);
+          return { ...cmd, description: formatRunCommand(pm, script) };
+        }
       }
     } catch {
       // Ignore parse errors
@@ -223,6 +219,7 @@ export interface RunCommandOptions {
   circuitBreakerFailures?: number;
   circuitBreakerErrors?: number;
   contextBudget?: number;
+  validationWarmup?: number;
   // Figma options
   figmaMode?: 'spec' | 'tokens' | 'components' | 'assets' | 'content';
   figmaFramework?: 'react' | 'vue' | 'svelte' | 'astro' | 'nextjs' | 'nuxt' | 'html';
@@ -573,6 +570,16 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN
     console.log(chalk.dim(`Max iterations: ${smartIterations} (${reason})`));
   }
 
+  // Auto-detect greenfield builds: skip validation until enough tasks are done
+  const isGreenfield = taskCount.total > 0 && taskCount.completed === 0;
+  const autoWarmup = isGreenfield ? Math.max(2, Math.floor(taskCount.total * 0.5)) : 0;
+  const validationWarmup = options.validationWarmup ? Number(options.validationWarmup) : autoWarmup;
+  if (validationWarmup > 0 && options.validate) {
+    console.log(
+      chalk.dim(`Validation warm-up: skipping until ${validationWarmup} tasks completed`)
+    );
+  }
+
   // Apply preset values with CLI overrides
   const loopOptions: LoopOptions = {
     task: preset?.promptPrefix ? `${preset.promptPrefix}\n\n${finalTask}` : finalTask,
@@ -587,6 +594,7 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN
     prIssueRef: sourceIssueRef,
     prLabels: options.auto ? ['AUTO'] : undefined,
     validate: options.validate ?? preset?.validate,
+    validationWarmup,
     sourceType: options.from?.toLowerCase(),
     // New options
     completionPromise: options.completionPromise ?? preset?.completionPromise,
diff --git a/src/loop/__tests__/validation.test.ts b/src/loop/__tests__/validation.test.ts
index f46a90c..66baa91 100644
--- a/src/loop/__tests__/validation.test.ts
+++ b/src/loop/__tests__/validation.test.ts
@@ -89,7 +89,7 @@ describe('validation', () => {
       expect(commands.find((c) => c.name === 'test')).toEqual({
         name: 'test',
         command: 'npm',
-        args: ['run', 'test'],
+        args: ['test'],
       });
     });
 
diff --git a/src/loop/executor.ts b/src/loop/executor.ts
index 6d35e63..102f69d 100644
--- a/src/loop/executor.ts
+++ b/src/loop/executor.ts
@@ -190,6 +190,7 @@ export interface LoopOptions {
   trackCost?: boolean; // Track token usage and cost
   model?: string; // Model name for cost estimation
   contextBudget?: number; // Max input tokens per iteration (0 = unlimited)
+  validationWarmup?: number; // Skip validation until N tasks completed (for greenfield builds)
 }
 
 export interface LoopResult {
@@ -569,6 +570,9 @@ export async function runLoop(options: LoopOptions): Promise<LoopResult> {
     // Check if tasks were completed since last iteration
     const newlyCompleted = completedTasks - previousCompletedTasks;
     if (newlyCompleted > 0 && i > 1) {
+      // Task completion is forward progress — reset circuit breaker consecutive failures
+      circuitBreaker.recordSuccess();
+
       // Get names of newly completed tasks (strip markdown)
       const maxNameWidth = Math.max(30, getTerminalWidth() - 30);
       const completedNames = taskInfo.tasks
@@ -798,10 +802,13 @@ export async function runLoop(options: LoopOptions): Promise<LoopResult> {
     }
 
     // Run validation (backpressure) if enabled and there are changes
+    // Skip validation during warm-up period (greenfield builds where early tasks can't pass tests)
     let _validationPassed = true;
     let validationResults: ValidationResult[] = [];
+    const warmupThreshold = options.validationWarmup ?? 0;
+    const pastWarmup = completedTasks >= warmupThreshold;
 
-    if (validationCommands.length > 0 && (await hasUncommittedChanges(options.cwd))) {
+    if (validationCommands.length > 0 && pastWarmup && (await hasUncommittedChanges(options.cwd))) {
       spinner.start(chalk.yellow(`Loop ${i}: Running validation...`));
 
       validationResults = await runAllValidations(options.cwd, validationCommands);
diff --git a/src/loop/validation.ts b/src/loop/validation.ts
index 0f3962c..afef26e 100644
--- a/src/loop/validation.ts
+++ b/src/loop/validation.ts
@@ -1,6 +1,7 @@
 import { existsSync, readFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { execa } from 'execa';
+import { detectPackageManager, getRunCommand } from '../utils/package-manager.js';
 
 export interface ValidationCommand {
   name: string;
@@ -68,18 +69,23 @@ export function detectValidationCommands(cwd: string): ValidationCommand[] {
       try {
         const pkg = JSON.parse(readFileSync(packagePath, 'utf-8'));
         const scripts = pkg.scripts || {};
+        const pm = detectPackageManager(cwd);
 
         if (scripts.test && scripts.test !== 'echo "Error: no test specified" && exit 1') {
-          commands.push({ name: 'test', command: 'npm', args: ['run', 'test'] });
+          const cmd = getRunCommand(pm, 'test');
+          commands.push({ name: 'test', ...cmd });
         }
         if (scripts.lint) {
-          commands.push({ name: 'lint', command: 'npm', args: ['run', 'lint'] });
+          const cmd = getRunCommand(pm, 'lint');
+          commands.push({ name: 'lint', ...cmd });
         }
         if (scripts.build) {
-          commands.push({ name: 'build', command: 'npm', args: ['run', 'build'] });
+          const cmd = getRunCommand(pm, 'build');
+          commands.push({ name: 'build', ...cmd });
         }
         if (scripts.typecheck) {
-          commands.push({ name: 'typecheck', command: 'npm', args: ['run', 'typecheck'] });
+          const cmd = getRunCommand(pm, 'typecheck');
+          commands.push({ name: 'typecheck', ...cmd });
         }
       } catch {
         // Invalid package.json
diff --git a/src/utils/__tests__/package-manager.test.ts b/src/utils/__tests__/package-manager.test.ts
new file mode 100644
index 0000000..0ceff38
--- /dev/null
+++ b/src/utils/__tests__/package-manager.test.ts
@@ -0,0 +1,94 @@
+import { existsSync, readFileSync } from 'node:fs';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+import { detectPackageManager, formatRunCommand, getRunCommand } from '../package-manager.js';
+
+vi.mock('node:fs', () => ({
+  existsSync: vi.fn(),
+  readFileSync: vi.fn(),
+}));
+
+const mockExistsSync = vi.mocked(existsSync);
+const mockReadFileSync = vi.mocked(readFileSync);
+
+describe('detectPackageManager', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it('should return pnpm when pnpm-lock.yaml exists', () => {
+    mockExistsSync.mockImplementation((p: any) => p.toString().includes('pnpm-lock.yaml'));
+    expect(detectPackageManager('/test')).toBe('pnpm');
+  });
+
+  it('should return yarn when yarn.lock exists', () => {
+    mockExistsSync.mockImplementation((p: any) => p.toString().includes('yarn.lock'));
+    expect(detectPackageManager('/test')).toBe('yarn');
+  });
+
+  it('should return bun when bun.lockb exists', () => {
+    mockExistsSync.mockImplementation((p: any) => p.toString().includes('bun.lockb'));
+    expect(detectPackageManager('/test')).toBe('bun');
+  });
+
+  it('should return bun when bun.lock exists', () => {
+    mockExistsSync.mockImplementation((p: any) => p.toString().includes('bun.lock'));
+    expect(detectPackageManager('/test')).toBe('bun');
+  });
+
+  it('should read packageManager field from package.json', () => {
+    mockExistsSync.mockImplementation((p: any) => p.toString().includes('package.json'));
+    mockReadFileSync.mockReturnValue(JSON.stringify({ packageManager: 'pnpm@9.0.0' }));
+    expect(detectPackageManager('/test')).toBe('pnpm');
+  });
+
+  it('should prefer lockfile over packageManager field', () => {
+    mockExistsSync.mockImplementation(
+      (p: any) => p.toString().includes('yarn.lock') || p.toString().includes('package.json')
+    );
+    mockReadFileSync.mockReturnValue(JSON.stringify({ packageManager: 'pnpm@9.0.0' }));
+    expect(detectPackageManager('/test')).toBe('yarn');
+  });
+
+  it('should default to npm when no indicators found', () => {
+    mockExistsSync.mockReturnValue(false);
+    expect(detectPackageManager('/test')).toBe('npm');
+  });
+
+  it('should default to npm for unrecognized packageManager', () => {
+    mockExistsSync.mockImplementation((p: any) => p.toString().includes('package.json'));
+    mockReadFileSync.mockReturnValue(JSON.stringify({ packageManager: 'unknown@1.0.0' }));
+    expect(detectPackageManager('/test')).toBe('npm');
+  });
+
+  it('should handle invalid package.json gracefully', () => {
+    mockExistsSync.mockImplementation((p: any) => p.toString().includes('package.json'));
+    mockReadFileSync.mockReturnValue('not valid json');
+    expect(detectPackageManager('/test')).toBe('npm');
+  });
+});
+
+describe('getRunCommand', () => {
+  it('should return shorthand for test script', () => {
+    expect(getRunCommand('pnpm', 'test')).toEqual({ command: 'pnpm', args: ['test'] });
+    expect(getRunCommand('npm', 'test')).toEqual({ command: 'npm', args: ['test'] });
+    expect(getRunCommand('bun', 'test')).toEqual({ command: 'bun', args: ['test'] });
+  });
+
+  it('should use run for non-test scripts', () => {
+    expect(getRunCommand('pnpm', 'build')).toEqual({ command: 'pnpm', args: ['run', 'build'] });
+    expect(getRunCommand('npm', 'lint')).toEqual({ command: 'npm', args: ['run', 'lint'] });
+    expect(getRunCommand('bun', 'dev')).toEqual({ command: 'bun', args: ['run', 'dev'] });
+  });
+});
+
+describe('formatRunCommand', () => {
+  it('should format test commands', () => {
+    expect(formatRunCommand('pnpm', 'test')).toBe('pnpm test');
+    expect(formatRunCommand('npm', 'test')).toBe('npm test');
+  });
+
+  it('should format run commands', () => {
+    expect(formatRunCommand('pnpm', 'build')).toBe('pnpm run build');
+    expect(formatRunCommand('yarn', 'lint')).toBe('yarn run lint');
+  });
+});
diff --git a/src/utils/package-manager.ts b/src/utils/package-manager.ts
new file mode 100644
index 0000000..354b2bb
--- /dev/null
+++ b/src/utils/package-manager.ts
@@ -0,0 +1,61 @@
+import { existsSync, readFileSync } from 'node:fs';
+import { join } from 'node:path';
+
+export type PackageManager = 'npm' | 'pnpm' | 'yarn' | 'bun';
+
+/**
+ * Detect the package manager used in a project directory.
+ *
+ * Detection priority:
+ * 1. Lock file presence (most reliable — reflects actual usage)
+ * 2. packageManager field in package.json (explicit declaration)
+ * 3. Default: npm
+ */
+export function detectPackageManager(cwd: string): PackageManager {
+  // Check lock files first (most reliable indicator of actual usage)
+  if (existsSync(join(cwd, 'pnpm-lock.yaml'))) return 'pnpm';
+  if (existsSync(join(cwd, 'yarn.lock'))) return 'yarn';
+  if (existsSync(join(cwd, 'bun.lockb')) || existsSync(join(cwd, 'bun.lock'))) return 'bun';
+
+  // Check package.json packageManager field
+  const packageJsonPath = join(cwd, 'package.json');
+  if (existsSync(packageJsonPath)) {
+    try {
+      const pkg = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
+      if (pkg.packageManager) {
+        const name = pkg.packageManager.split('@')[0];
+        if (['pnpm', 'yarn', 'bun'].includes(name)) {
+          return name as PackageManager;
+        }
+      }
+    } catch {
+      // Invalid package.json — fall through to default
+    }
+  }
+
+  return 'npm';
+}
+
+/**
+ * Get the run command for a package manager script.
+ * For 'test', uses the shorthand (e.g., `pnpm test`).
+ * For other scripts, uses `run` (e.g., `pnpm run build`).
+ */
+export function getRunCommand(
+  pm: PackageManager,
+  script: string
+): { command: string; args: string[] } {
+  if (script === 'test') {
+    return { command: pm, args: ['test'] };
+  }
+  return { command: pm, args: ['run', script] };
+}
+
+/**
+ * Format a run command as a display string.
+ * e.g., "pnpm run build" or "bun test"
+ */
+export function formatRunCommand(pm: PackageManager, script: string): string {
+  const { command, args } = getRunCommand(pm, script);
+  return `${command} ${args.join(' ')}`;
+}
diff --git a/src/wizard/spec-generator.ts b/src/wizard/spec-generator.ts
index 66283b4..bafa87e 100644
--- a/src/wizard/spec-generator.ts
+++ b/src/wizard/spec-generator.ts
@@ -1,3 +1,4 @@
+import { detectPackageManager, formatRunCommand } from '../utils/package-manager.js';
 import type { TechStack, WizardAnswers } from './types.js';
 import { formatComplexity, formatProjectType } from './ui.js';
 
@@ -160,9 +161,11 @@ export function generateAgentsMd(answers: WizardAnswers): string {
     answers.techStack.backend === 'nodejs';
 
   if (hasNodeStack) {
-    sections.push('- **lint**: `npm run lint`');
-    sections.push('- **build**: `npm run build`');
-    sections.push('- **test**: `npm test`');
+    // Detect PM from working directory if available, default to npm for greenfield projects
+    const pm = answers.workingDirectory ? detectPackageManager(answers.workingDirectory) : 'npm';
+    sections.push(`- **lint**: \`${formatRunCommand(pm, 'lint')}\``);
+    sections.push(`- **build**: \`${formatRunCommand(pm, 'build')}\``);
+    sections.push(`- **test**: \`${formatRunCommand(pm, 'test')}\``);
   } else if (answers.techStack.backend === 'python') {
     sections.push('- **lint**: `ruff check .`');
     sections.push('- **test**: `pytest`');