diff --git a/.egg-state/agent-outputs/871-integrator-output.json b/.egg-state/agent-outputs/871-integrator-output.json new file mode 100644 index 000000000..46a407c04 --- /dev/null +++ b/.egg-state/agent-outputs/871-integrator-output.json @@ -0,0 +1,54 @@ +{ + "status": "success", + "tests": { + "total": 6692, + "passed": 6692, + "failed": 0, + "skipped": 85 + }, + "lint": { + "status": "pass", + "issues_found": 3, + "issues_fixed": 3, + "details": "Removed unused pytest imports from test_handoffs_namespaced.py and test_orchestrator.py; fixed import sort order in test_handoffs_namespaced.py; reformatted 5 files via ruff format" + }, + "integration_issues": { + "found": 2, + "fixed": 2, + "details": [ + { + "file": "orchestrator/routes/pipelines.py", + "line": 2213, + "issue": "Hardcoded tester-output.json in _build_phase_prompt revision instructions not updated to use namespaced path", + "fix": "Updated to use f-string with {issue_number if issue_number is not None else pipeline_id}-tester-output.json" + }, + { + "file": "orchestrator/routes/pipelines.py", + "line": 2810, + "issue": "Hardcoded tester-output.json in _build_phase_scoped_prompt revision checklist not updated to use namespaced path", + "fix": "Updated to use f-string with {pipeline.issue_number if pipeline.issue_number is not None else pipeline_id}-tester-output.json" + } + ] + }, + "regressions": { + "found": false, + "details": "All 6692 existing tests pass. No regressions detected." + }, + "summary": "Integration verified successfully. The coder's implementation correctly namespaces .egg-state files per-pipeline to prevent merge conflicts. Two hardcoded tester-output.json references in prompt-building functions were missed by the coder and have been fixed by the integrator. All tests pass and lint is clean after fixes.", + "changes_reviewed": { + "core_library": "shared/egg_contracts/orchestrator.py - load_agent_output, save_agent_output, collect_handoff_data now accept identifier parameter with backward-compat fallback", + "orchestrator_handoffs": "orchestrator/handoffs.py - wrapper functions forward identifier to core library", + "pipeline_routes": "orchestrator/routes/pipelines.py - all callers pass identifier derived from issue_number or pipeline_id; prompt builders reference namespaced filenames", + "signals": "orchestrator/routes/signals.py - complete signal handler passes identifier when saving agent output", + "docs": "docs/architecture/orchestrator.md, docs/guides/agent-development.md, docs/guides/sdlc-pipeline.md - updated to reference namespaced filenames", + "agent_modes": "sandbox/.claude/commands/{coder,tester,documenter,integrator}-mode.md - updated output filename instructions", + "tests": "orchestrator/tests/test_handoffs_namespaced.py (new), shared/egg_contracts/tests/test_orchestrator.py (new), orchestrator/tests/test_pipeline_prompts.py (extended) - comprehensive coverage of namespaced paths, backward compat, edge cases" + }, + "commit": "fbf79998f", + "acceptance_criteria": { + "all_egg_state_files_namespaced": true, + "no_globally_named_files_remain": true, + "backward_compatible_reader": true, + "documentation_updated": true + } +} diff --git a/.egg-state/agent-outputs/architect-output.json b/.egg-state/agent-outputs/architect-output.json index eeb47bad3..0e255c3d7 100644 --- a/.egg-state/agent-outputs/architect-output.json +++ b/.egg-state/agent-outputs/architect-output.json @@ -1,544 +1,535 @@ { - "issue": 732, + "issue": 871, "phase": "plan", "agent": "architect", - "title": "Support parallel phase-level dispatch for implement phase", - "summary": "Architecture analysis for adding Tier 3 (high-complexity) dispatch to the SDLC pipeline. This tier runs independent plan phases as parallel implement cycles (coder -> tester -> agentic review), with dependent phases running sequentially. An integrator with write access merges sub-branches and fixes integration issues before human review. The recommended approach is a hybrid strategy: deliver sequential phase cycling first (foundation), then add parallel dispatch as an opt-in feature.", + "revision": 2, + "revision_reason": "Addressing plan review feedback: missing wrapper layer callsites (R-1), missing test file (R-7), TASK-2-5 scope, and risk section alignment with risk analyst output.", + "title": "Revised architecture analysis: Namespace .egg-state files per-pipeline", + + "summary": "Add issue/pipeline identifier prefix to all globally-named files in .egg-state/agent-outputs/ and .egg-state/checks/, matching the convention already used by contracts/, drafts/, and reviews/. This eliminates merge conflicts when concurrent pipelines merge to main (e.g., PR #869 had 4 such conflicts). Revision 2 adds the missing wrapper layer (orchestrator/handoffs.py, dispatch.py, signals.py) identified in plan review feedback and aligns the risk section with the risk analyst's findings.", "problem_statement": { - "description": "The SDLC pipeline supports two dispatch tiers: Tier 1 (short-circuit for low complexity) and Tier 2 (single coder with wave-based multi-agent for mid complexity). Large features with multiple independent phases are forced through a single coder processing all tasks sequentially, creating serial bottlenecks, late integration failures, unbounded reviewer scope, and no early-abort capability.", - "goals": [ - "Extend complexity assessment from 2 tiers (low/not-low) to 3 tiers (low/mid/high)", - "Run independent plan phases as parallel implement cycles with per-phase agentic review", - "Enable early abort when a phase's agentic review finds a design flaw", - "Bound reviewer scope to one phase's changes at a time", - "Give the integrator write access to merge sub-branches and fix integration issues", - "Track per-phase-per-role execution status in the contract", - "Isolate parallel phases via sub-branches to prevent merge conflicts" + "description": "The .egg-state/ directory has inconsistent file namespacing. Contracts, drafts, and reviews use per-issue/pipeline prefixed filenames, so concurrent pipelines never conflict on merge. However, agent-outputs/ handoff files and checks/ results use globally-named files. When multiple pipelines merge to main, these global files produce git merge conflicts (e.g., PR #869 had 4 such conflicts).", + "root_cause": "load_agent_output() and save_agent_output() in shared/egg_contracts/orchestrator.py construct paths as {role.value}-output.json with no identifier parameter. Prompt builders embed these same hardcoded filenames. The checks/ directory uses a similar hardcoded implement-results.json.", + "affected_files": [ + "architect-output.json", + "integrator-output.json", + "risk_analyst-output.json", + "coder-output.json", + "tester-output.json", + "documenter-output.json", + "task_planner-output.json", + "implement-results.json" ] }, - "current_architecture": { - "complexity_assessment": { - "location": "orchestrator/routes/pipelines.py:1549-1567", - "mechanism": "Refine agent assesses complexity as low/medium/high. Low signals short_circuit: true in YAML metadata. Medium and high both flow to the plan phase — no Tier 3 differentiation exists.", - "signal_detection": "_check_short_circuit_signal() reads the last YAML block in the analysis draft for short_circuit: true", - "config_gate": "PipelineConfig.allow_short_circuit (default: True)" - }, - "multi_agent_orchestration": { - "location": "orchestrator/multi_agent.py, shared/egg_contracts/orchestrator.py", - "mechanism": "MultiAgentExecutor runs agents in wave-based parallel execution. Waves are computed from role-level dependencies via DependencyGraph. Default implement wave order: CODER -> TESTER+DOCUMENTER -> INTEGRATOR.", - "key_limitation": "DependencyGraph nodes are AgentRole enum values, not (phase_id, role) composites. Only one execution slot exists per role." - }, - "execution_state": { - "location": "shared/egg_contracts/orchestration.py, shared/egg_contracts/models.py", - "mechanism": "OrchestrationState.executions is dict[AgentRole, AgentExecutionModel]. The contract's agent_executions is a flat list of AgentExecutionModel keyed by role.", - "key_limitation": "Running the same role twice (e.g., CODER for Phase 1 and CODER for Phase 2) would overwrite state. No phase_id dimension exists." - }, - "dependency_graph": { - "location": "shared/egg_contracts/dependency_graph.py", - "mechanism": "DependencyNode contains role, dependencies[], dependents[]. compute_waves() returns list[list[AgentRole]] for parallel execution. Topological sort ensures correct ordering.", - "key_limitation": "Graph operates on AgentRole, not (phase_id, role). Cannot represent 'CODER for phase-1 depends on nothing' vs 'CODER for phase-4 depends on phase-1 completion'." - }, - "plan_phase_dependencies": { - "location": "shared/egg_contracts/plan_parser.py:88-96, 427-428", - "mechanism": "ParsedPhase has a dependencies field that is populated from the YAML plan. However, to_contract_phase() at line 98-105 discards this field — the contract Phase model has no dependencies field.", - "gap": "Phase dependencies are parsed but not persisted in the contract schema." - }, - "branch_management": { - "location": "gateway/policy.py:129-147, gateway/worktree_manager.py", - "mechanism": "Branch ownership validated via egg- or egg/ prefix using startswith(). Worktrees created per pipeline with branch pattern egg/{container_id}/work.", - "sub_branch_support": "The prefix check (startswith('egg/')) already supports nested paths like egg/feature/phase-1. No gateway policy changes are needed for sub-branch naming — only worktree lifecycle management needs extension." - }, - "integrator_role": { - "location": "shared/egg_contracts/agent_roles.py:289-318", - "mechanism": "INTEGRATOR_ROLE has blocked_write for src/, lib/, docs/, tests/, test/. Can only write to .egg-state/agent-outputs/.", - "key_limitation": "Read-only for production code. Tier 3 requires write access for merging sub-branches and fixing integration issues." - }, - "phase_transitions": { - "location": "orchestrator/routes/phases.py:50-63", - "mechanism": "REFINE -> [PLAN, IMPLEMENT], PLAN -> [IMPLEMENT], IMPLEMENT -> [PR]. No INTEGRATE phase exists.", - "consideration": "Issue suggests either a new INTEGRATE phase or internal cycling within IMPLEMENT." - } + "call_chain_analysis": { + "description": "Complete call chain for agent output I/O, including the wrapper layer identified in review feedback. This section was ADDED in revision 2 to address the plan reviewer's critical finding about missing callsites.", + "layers": [ + { + "layer": "Shared library (core)", + "file": "shared/egg_contracts/orchestrator.py", + "functions": [ + {"name": "load_agent_output(repo_path, role)", "line": 323, "constructs_path": true}, + {"name": "save_agent_output(repo_path, role, outputs)", "line": 346, "constructs_path": true}, + {"name": "collect_handoff_data(repo_path, target_role)", "line": 372, "calls": "load_agent_output"} + ] + }, + { + "layer": "Orchestrator wrapper (handoffs.py) — MISSED IN ORIGINAL PLAN", + "file": "orchestrator/handoffs.py", + "functions": [ + {"name": "save_agent_output(repo_path, output: AgentOutput)", "line": 152, "calls": "contract_save_output (shared save_agent_output)", "note": "Wraps shared function with ORM AgentOutput type. Does NOT currently accept or forward identifier."}, + {"name": "load_agent_output_data(repo_path, role: AgentRole)", "line": 171, "calls": "load_agent_output (shared)", "note": "Wraps shared function. Returns AgentOutput or None. Does NOT currently accept or forward identifier."}, + {"name": "collect_handoff_data(repo_path, target_role: AgentRole)", "line": 193, "calls": "load_agent_output_data (internal)", "note": "Wraps via internal load. Does NOT currently accept or forward identifier."}, + {"name": "get_handoff_env_var(repo_path, target_role: AgentRole)", "line": 227, "calls": "collect_handoff_data (internal)", "note": "JSON-serializes handoff data. Does NOT currently accept or forward identifier."} + ] + }, + { + "layer": "Dispatch (dispatch.py) — MISSED IN ORIGINAL PLAN", + "file": "orchestrator/dispatch.py", + "callers": [ + {"function": "PipelineDispatcher.complete_agent()", "line": 219, "calls": "save_agent_output (from shared directly, line 34)", "identifier_available_via": "self.contract_key (pipeline.issue_number or pipeline.id)"}, + {"function": "PipelineDispatcher.get_handoff_data()", "line": 264, "calls": "collect_handoff_data (from shared directly, line 31)", "identifier_available_via": "self.contract_key"} + ], + "note": "dispatch.py imports save_agent_output and collect_handoff_data directly from egg_contracts.orchestrator (lines 29-35), NOT through the handoffs.py wrapper." + }, + { + "layer": "Signals (routes/signals.py) — MISSED IN ORIGINAL PLAN", + "file": "orchestrator/routes/signals.py", + "callers": [ + {"function": "handle_complete_signal()", "line": 193, "calls": "save_agent_output (from handoffs wrapper, line 36)", "identifier_available_via": "pipeline.issue_number from store.load_pipeline(pipeline_id) at line 162, or pipeline_id URL param"} + ] + }, + { + "layer": "Pipelines (routes/pipelines.py) — covered in original plan", + "file": "orchestrator/routes/pipelines.py", + "callers": [ + {"function": "_synthesize_plan_draft_from_outputs()", "line": "~4335", "reads": "agent output files directly via Path", "note": "Constructs filenames inline, not via shared function"}, + {"function": "_read_tester_gaps()", "line": "~1619", "reads": "tester-output.json directly via Path", "note": "Hardcoded filename"}, + {"function": "Various prompt builders", "note": "Hardcoded filenames in prompt strings for agents to write to"} + ] + } + ] }, - "key_files": [ - { - "path": "shared/egg_contracts/dependency_graph.py", - "role": "DAG computation for agent execution waves", - "changes_needed": "Add PhaseDependencyGraph class that operates on phase IDs instead of AgentRole. Compute phase waves from Phase.dependencies for determining implement cycle ordering.", - "risk": "High — foundation of parallel execution ordering" - }, - { - "path": "shared/egg_contracts/agent_roles.py", - "role": "Role definitions including dependencies and file access patterns", - "changes_needed": "Modify INTEGRATOR_ROLE to support configurable write access for Tier 3. Add phase_id tracking to AgentExecution.", - "risk": "Medium — must maintain backward compatibility for Tier 2" - }, - { - "path": "shared/egg_contracts/orchestration.py", - "role": "Orchestration state management, dependency checking", - "changes_needed": "Extend OrchestrationState.executions to support (phase_id, role) composite keys. Update can_agent_run() and get_runnable_agents() for phase-scoped dependency checking.", - "risk": "High — central state management module" - }, - { - "path": "shared/egg_contracts/orchestrator.py", - "role": "Dispatch logic and agent lifecycle management", - "changes_needed": "Add phase-aware dispatch decisions. Support per-phase implement cycling (coder -> tester -> agentic review -> next phase or retry).", - "risk": "High — dispatch is the core orchestration logic" - }, - { - "path": "shared/egg_contracts/models.py", - "role": "Pydantic contract models including Phase, AgentExecutionModel, Contract", - "changes_needed": "Add dependencies field to Phase model. Add phase_id field to AgentExecutionModel. Both optional for backward compat.", - "risk": "High — schema change requires backward compatibility" - }, - { - "path": "shared/egg_contracts/plan_parser.py", - "role": "Plan document parsing, extracts phases/tasks from YAML", - "changes_needed": "Preserve ParsedPhase.dependencies when converting to contract Phase via to_contract_phase().", - "risk": "Low — straightforward field propagation" - }, - { - "path": "orchestrator/multi_agent.py", - "role": "MultiAgentExecutor — wave spawning and parallel execution", - "changes_needed": "Support phase-level orchestration: run N implement cycles. Each cycle spawns coder -> tester -> agentic reviewers for a single phase's tasks.", - "risk": "High — significant behavioral change to execution flow" - }, - { - "path": "orchestrator/dispatch.py", - "role": "PipelineDispatcher — wraps contract orchestrator for pipeline integration", - "changes_needed": "Add per-phase dispatching, phase-scoped handoff data, and phase cycling logic.", - "risk": "Medium — wrapper layer, follows orchestrator.py changes" - }, - { - "path": "orchestrator/routes/pipelines.py", - "role": "Pipeline routes: phase transitions, agent prompts, complexity detection", - "changes_needed": "Extend complexity assessment to 3 tiers. Add _run_tier3_implement() for phase cycling. Update implement phase prompt for phase-scoped context.", - "risk": "High — largest file, most complex changes" - }, - { - "path": "orchestrator/models.py", - "role": "Pipeline and PipelineConfig models (orchestrator-side)", - "changes_needed": "Add complexity_tier field to Pipeline. Extend PipelineConfig for Tier 3 settings.", - "risk": "Low — additive model changes" - }, - { - "path": "gateway/worktree_manager.py", - "role": "Worktree lifecycle management", - "changes_needed": "Support per-phase worktrees for parallel execution (Stage 2 only).", - "risk": "Medium — filesystem lifecycle complexity" - }, - { - "path": ".egg/schemas/contract.schema.json", - "role": "JSON schema for contract validation", - "changes_needed": "Add dependencies to Phase, phase_id to AgentExecutionModel.", - "risk": "Medium — schema migration affects all contracts" + "current_architecture": { + "correctly_namespaced": { + "contracts": { + "pattern": "{identifier}.json", + "code": "shared/egg_contracts/loader.py:44 — get_contract_path(identifier)", + "example": "850.json" + }, + "drafts": { + "pattern": "{identifier}-{phase}.md", + "code": "orchestrator/routes/pipelines.py:1060 — _get_draft_path()", + "example": "850-analysis.md" + }, + "reviews": { + "pattern": "{identifier}-{phase}-{type}-review.json", + "code": "orchestrator/routes/pipelines.py:1035 — _verdict_path_for_type()", + "example": "850-implement-code-review.json" + } + }, + "globally_named_problem_files": { + "agent_outputs_writers": { + "programmatic": { + "function": "save_agent_output(repo_path, role, outputs)", + "location": "shared/egg_contracts/orchestrator.py:346-369", + "path_construction": "repo_path / '.egg-state' / 'agent-outputs' / f'{role.value}-output.json'", + "issue": "No identifier parameter — all pipelines write to the same filename per role" + }, + "prompt_embedded": [ + {"role": "architect", "location": "orchestrator/routes/pipelines.py:2524", "path": ".egg-state/agent-outputs/architect-output.json"}, + {"role": "integrator", "location": "orchestrator/routes/pipelines.py:2509", "path": ".egg-state/agent-outputs/integrator-output.json"}, + {"role": "risk_analyst", "location": "orchestrator/routes/pipelines.py:2590", "path": ".egg-state/agent-outputs/risk_analyst-output.json"} + ] + }, + "agent_outputs_readers": { + "load_agent_output": { + "location": "shared/egg_contracts/orchestrator.py:323-343", + "path_construction": "repo_path / '.egg-state' / 'agent-outputs' / f'{role.value}-output.json'", + "callers": ["collect_handoff_data (line 392)", "orchestrator/handoffs.py:185 (load_agent_output_data)"] + }, + "_synthesize_plan_draft": { + "location": "orchestrator/routes/pipelines.py:4335-4350", + "reads": ["architect-output.json", "risk_analyst-output.json"], + "note": "Already receives pipeline_mode and issue_number parameters" + }, + "_read_tester_gaps": { + "location": "orchestrator/routes/pipelines.py:1619-1644", + "reads": ["tester-output.json"], + "callers": ["line 3139", "line 5173"] + } + }, + "checks_writers": { + "prompt_embedded": [ + {"function": "_build_checker_prompt", "location": "orchestrator/routes/pipelines.py:4063"}, + {"function": "_build_check_and_fix_prompt", "location": "orchestrator/routes/pipelines.py:4275"}, + {"function": "_build_autofix_prompt", "location": "orchestrator/routes/pipelines.py:4086-4170"} + ] + } + }, + "gateway_uses_directory_wildcards": { + "agent_restrictions": "'.egg-state/agent-outputs/' in allowed_write patterns — directory-level, not file-level", + "phase_filter": "'.egg-state/agent-outputs/*' and '.egg-state/checks/*' allowed patterns — wildcards match any filename", + "readonly_mounts": "_IMPLEMENT_READONLY_DIRS = ('drafts', 'contracts', 'pipelines', 'reviews') — agent-outputs/ and checks/ NOT included", + "conclusion": "No gateway changes needed — file renames within directories are transparent to all enforcement layers" } - ], + }, "approaches": [ { "id": "A", - "name": "Full Tier 3 in one pass (parallel + sub-branches)", - "description": "Implement complete Tier 3 dispatch: phase-level dependency graph, parallel implement cycles on sub-branches, integrator with write access merging sub-branches, per-phase agentic review with retry.", + "name": "Prefix filenames with issue/pipeline identifier (RECOMMENDED)", + "description": "Rename all globally-named files to use {identifier}-{role}-output.json and {identifier}-implement-results.json. Update load_agent_output/save_agent_output to accept identifier parameter. Thread identifier through wrapper layer (handoffs.py) and all callers (dispatch.py, signals.py, pipelines.py). Update all prompt builders. Add backward-compatible fallback reads.", "pros": [ - "True parallelism with branch-level isolation prevents merge conflicts", - "Bounded agentic review scope per phase", - "Early abort saves tokens when a phase review finds flaws", - "Delivers the full vision from the issue in one delivery" + "Consistent with existing convention in contracts/, drafts/, reviews/", + "Minimal conceptual overhead — developers already understand {identifier}- prefix", + "Identifier (issue_number or pipeline_id) already available at all callsites", + "No gateway, readonly mount, or phase permission changes needed", + "Backward-compat fallback is simple: try new path, fall back to old" ], "cons": [ - "Largest implementation scope — 12+ files across 4 packages", - "Per-phase worktree management adds lifecycle complexity", - "Composite key change is pervasive across orchestration stack", - "Testing parallel phase execution requires complex integration tests", - "High risk of regressions to Tier 1 and Tier 2" + "Requires updating both programmatic code and prompt strings (two code paths)", + "Wrapper layer adds 3 more files to the change set (handoffs.py, dispatch.py, signals.py)" ], - "estimated_complexity": "High", - "files_affected": 12 + "risk": "Low" }, { "id": "B", - "name": "Sequential phase cycling only (no parallelism)", - "description": "Run implement cycles sequentially — one per plan phase — on the same branch. Each cycle spawns coder -> tester -> agentic review. No sub-branches, no gateway changes.", + "name": "Namespace by subdirectory instead of filename prefix", + "description": "Move files into per-issue subdirectories: .egg-state/agent-outputs/{identifier}/architect-output.json", "pros": [ - "No gateway or worktree changes needed", - "No merge conflicts (sequential execution on single branch)", - "Per-phase agentic review, early abort, and prompt isolation all work", - "Simpler integrator: validates rather than merges branches" + "Cleaner directory structure — each pipeline's files grouped together", + "Easier to list/cleanup files for a specific pipeline" ], "cons": [ - "No parallelism — no wall-clock time savings for independent phases", - "Still requires composite execution tracking (phase_id, role)", - "Does not meet the issue's full vision for Tier 3" + "Inconsistent with existing convention (contracts/, drafts/, reviews/ use filename prefixes, not subdirectories)", + "Requires gateway pattern changes (.egg-state/agent-outputs/* -> .egg-state/agent-outputs/**/*)", + "More complex backward-compat (checking both directory structures)", + "git doesn't track empty directories" ], - "estimated_complexity": "Medium", - "files_affected": 9 + "risk": "Medium" }, { "id": "C", - "name": "Hybrid — sequential foundation with optional parallelism (recommended)", - "description": "Stage 1: sequential phase cycling with composite execution tracking, per-phase agentic review, 3-tier complexity assessment, integrator write access. Stage 2: sub-branch isolation, per-phase worktrees, parallel dispatch behind a feature flag.", + "name": "Centralized path builder utility", + "description": "Create a single path-building module that all code uses to resolve .egg-state/ paths.", "pros": [ - "Incremental delivery — each stage is independently valuable and testable", - "Stage 1 provides 80% of value (per-phase review, early abort, bounded scope)", - "Stage 2 adds parallelism without rearchitecting the foundation", - "Feature flag allows gradual rollout and rollback", - "Composite execution tracking built once, used by both modes" + "Single source of truth for all path construction", + "Future-proof for naming convention changes" ], "cons": [ - "Two delivery stages means more review cycles", - "Sequential-first may seem incomplete vs the issue's parallel vision", - "Both stages still require the composite key change" + "Over-engineering for 8 filename changes", + "Still needs prompt string updates (agents respond to embedded paths)", + "_get_draft_path() and _verdict_path_for_type() already serve this role for their directories" ], - "estimated_complexity": "Medium-High (Stage 1: Medium, Stage 2: Medium)", - "files_affected": 12 + "risk": "Low (but unnecessary scope)" } ], "recommended_approach": { - "id": "C", - "name": "Hybrid — sequential foundation with optional parallelism", - "justification": [ - "The refine analysis recommended Option C and both refine reviewers approved. Sequential phase cycling provides most of the value: per-phase agentic review, early abort, bounded reviewer scope, and prompt isolation. Parallelism primarily saves wall-clock time.", - "The composite execution tracking change (phase_id, role) is the riskiest part. Delivering it with sequential cycling first ensures stability before adding parallel dispatch and sub-branch management.", - "Gateway branch prefix checks already support nested paths (egg/feature/phase-1 passes startswith('egg/') check), so Stage 2 gateway changes are smaller than initially expected — the main work is worktree lifecycle management.", - "All 9 acceptance criteria from the issue are satisfiable: 3-tier complexity, per-phase implement cycles, coder -> tester -> agentic review with retry, integrator with write access, (phase_id, role) tracking, and sub-branch isolation (Stage 2)." - ] - }, - - "implementation_plan": { - "stage_1": { - "name": "Sequential phase cycling with composite execution tracking", - "description": "Build the orchestration foundation for Tier 3: 3-tier complexity assessment, sequential per-phase implement cycles, composite (phase_id, role) execution tracking, per-phase agentic review with retry, and integrator write access.", - "phases": [ - { - "id": 1, - "name": "Contract schema and model extensions", - "description": "Extend the data model with phase dependencies, composite execution keys, and complexity tier fields. This is the foundation all other changes build on.", - "key_changes": [ - "Add dependencies: list[str] field to Phase model (default: [] for backward compat)", - "Add phase_id: str | None field to AgentExecutionModel (None for Tier 2 compat)", - "Add complexity_tier: str field to Pipeline model with values low/mid/high", - "Update contract.schema.json with new fields", - "Update ParsedPhase.to_contract_phase() in plan_parser.py to propagate dependencies", - "Add dependencies field to YAML plan template" - ], - "files": [ - "shared/egg_contracts/models.py", - "shared/egg_contracts/plan_parser.py", - ".egg/schemas/contract.schema.json", - "orchestrator/models.py", - "docs/templates/plan.md" - ], - "dependencies": [] - }, - { - "id": 2, - "name": "3-tier complexity assessment", - "description": "Extend refine phase complexity detection from binary to 3 tiers. Add Tier 3 signal detection.", - "key_changes": [ - "Update refine prompt to signal complexity_tier: high alongside parallel_phases: true", - "Add _check_high_complexity_signal() in pipelines.py", - "Set pipeline.complexity_tier from detected signal (default: mid)", - "Tier 1 (low): short-circuit as before. Tier 2 (mid): standard waves. Tier 3 (high): phase cycling." - ], - "files": [ - "orchestrator/routes/pipelines.py", - "orchestrator/models.py" - ], - "dependencies": ["phase-1"] - }, - { - "id": 3, - "name": "Composite execution tracking", - "description": "Extend orchestration state to support (phase_id, role) composite keys.", - "key_changes": [ - "Introduce PhaseExecutionKey type or extend OrchestrationState for (phase_id, role) keying", - "Update OrchestrationState.from_contract() to deserialize phase_id from AgentExecutionModel", - "Update can_agent_run() for phase-scoped dependency checking", - "Update get_runnable_agents() for phase-aware filtering", - "Backward compat: when phase_id is None, behavior matches current Tier 2" - ], - "files": [ - "shared/egg_contracts/orchestration.py", - "shared/egg_contracts/orchestrator.py" - ], - "dependencies": ["phase-1"] - }, - { - "id": 4, - "name": "Phase-level dependency graph", - "description": "Build a phase-level DAG that determines execution order for plan phases.", - "key_changes": [ - "Add PhaseDependencyGraph class operating on phase IDs from Phase.dependencies", - "Compute phase waves: independent phases in same wave, dependent phases in later waves", - "Integrate with orchestrator to determine phase ordering for sequential cycling", - "For Stage 2, same graph determines parallel grouping" - ], - "files": [ - "shared/egg_contracts/dependency_graph.py", - "shared/egg_contracts/orchestration.py" - ], - "dependencies": ["phase-1", "phase-3"] - }, - { - "id": 5, - "name": "Sequential phase cycling in implement phase", - "description": "Modify implement phase to run N cycles (one per plan phase) sequentially. Each cycle: coder -> tester -> agentic review, with retry on rejection.", - "key_changes": [ - "Add _run_tier3_implement() in pipelines.py that loops through phases in dependency order", - "Each iteration calls _run_multi_agent_phase() with phase-scoped task context", - "Per-phase agentic review dispatches REVIEWER_CODE + REVIEWER_CONTRACT after each cycle", - "Retry logic: reviewer rejection triggers coder retry within that phase (no human gate)", - "Phase-scoped prompts: each coder sees only its phase's tasks and files", - "Update _build_agent_prompt() to accept phase_id for task filtering" - ], - "files": [ - "orchestrator/routes/pipelines.py", - "orchestrator/multi_agent.py", - "orchestrator/dispatch.py" - ], - "dependencies": ["phase-2", "phase-3", "phase-4"] - }, - { - "id": 6, - "name": "Integrator write access for Tier 3", - "description": "Give the integrator conditional write access to source, tests, and docs in Tier 3 mode.", - "key_changes": [ - "Create dynamic integrator file access based on complexity_tier", - "In Tier 3: integrator can write src/, tests/, docs/ to fix integration issues", - "In Tier 2: integrator remains read-only (backward compat)", - "Gateway phase_filter updated to allow integrator writes in Tier 3", - "Integrator prompt updated: run full test suite, fix integration issues, report" - ], - "files": [ - "shared/egg_contracts/agent_roles.py", - "gateway/phase_filter.py", - "gateway/agent_restrictions.py" - ], - "dependencies": ["phase-1"] - }, - { - "id": 7, - "name": "Tests for Stage 1", - "description": "Comprehensive test coverage for all Stage 1 changes.", - "key_changes": [ - "Unit tests for 3-tier complexity detection and signal parsing", - "Unit tests for composite execution tracking (phase_id, role)", - "Unit tests for phase dependency graph computation", - "Integration tests for sequential phase cycling flow", - "Backward compat tests: Tier 1 and Tier 2 unchanged", - "Plan parser tests: dependencies field preserved" - ], - "files": [ - "shared/egg_contracts/tests/", - "orchestrator/tests/" - ], - "dependencies": ["phase-5", "phase-6"] - } - ] - }, - "stage_2": { - "name": "Parallel dispatch with sub-branch isolation", - "description": "Add parallel execution of independent phases on sub-branches, gated behind a feature flag. Integrator merges sub-branches before human review.", - "phases": [ - { - "id": 8, - "name": "Per-phase worktree management", - "description": "Extend gateway WorktreeManager for per-phase worktrees.", - "key_changes": [ - "Add create_phase_worktree() for sub-worktrees from pipeline worktree", - "Branch naming: egg//phase-N (passes existing prefix check)", - "Worktree path: .egg-worktrees/{pipeline_id}/phase-{N}/{repo_name}", - "Lifecycle: cleanup phase worktrees after integrator merges" - ], - "files": ["gateway/worktree_manager.py"], - "dependencies": ["phase-7"] - }, - { - "id": 9, - "name": "Parallel phase dispatch", - "description": "Concurrent execution of independent plan phases on sub-branches.", - "key_changes": [ - "Use PhaseDependencyGraph to identify independent phase groups", - "Spawn parallel implement cycles for phases in same wave", - "Each coder pushes to egg//phase-N", - "Gated behind PipelineConfig.enable_parallel_phases (default: False)" - ], - "files": ["orchestrator/routes/pipelines.py", "orchestrator/multi_agent.py"], - "dependencies": ["phase-8"] - }, - { - "id": 10, - "name": "Integrator sub-branch merging", - "description": "Integrator merges phase sub-branches and resolves conflicts.", - "key_changes": [ - "Integrator receives list of sub-branches to merge", - "Sequential merge of phase branches into main feature branch", - "Conflict resolution as part of integrator's fixup diff", - "Full test suite after merge, report integration issues" - ], - "files": ["orchestrator/routes/pipelines.py", "shared/egg_contracts/agent_roles.py"], - "dependencies": ["phase-9"] - }, - { - "id": 11, - "name": "Tests for Stage 2", - "description": "Test coverage for parallel dispatch and sub-branch merging.", - "key_changes": [ - "Integration tests for parallel phase execution", - "Tests for sub-branch creation, merge, cleanup", - "Merge conflict scenario tests", - "End-to-end: 3 independent + 1 dependent phase pipeline" - ], - "files": ["orchestrator/tests/", "gateway/tests/"], - "dependencies": ["phase-10"] - } - ] - } + "id": "A", + "name": "Prefix filenames with issue/pipeline identifier", + "justification": "Directly matches the established convention used by all other .egg-state/ directories. The scope is well-bounded and the identifier is already available at all callsites (verified in revision 2 for the wrapper layer too: dispatch.py has self.contract_key, signals.py has pipeline.issue_number). No infrastructure changes needed." }, - "technical_decisions": [ + "architecture_decisions": [ { - "id": "TD-1", - "decision": "Use composite key (phase_id, AgentRole) for execution tracking", - "rationale": "The contract's agent_executions list must support multiple CODER instances (one per phase). A composite key is the minimal change. phase_id is optional (None for Tier 2) for backward compatibility.", - "alternatives_considered": [ - "Nested structure (phases -> executions) — rejected: too disruptive to contract consumers", - "Separate execution store per phase — rejected: fragments state management" - ] + "id": "AD-1", + "decision": "Namespace ALL agent output files, not just the 3 listed in the issue", + "rationale": "load_agent_output()/save_agent_output() generate paths for ALL agent roles using the same global pattern. Fixing only 3 of 7 leaves coder-output.json, tester-output.json, documenter-output.json, and task_planner-output.json vulnerable to the same merge conflict. Since we change the shared functions, all roles benefit at zero additional cost." }, { - "id": "TD-2", - "decision": "Store phase dependencies in the Phase model as dependencies: list[str]", - "rationale": "The Phase model is the natural home. PhaseDependencyGraph is computed at runtime from this data. The plan parser already parses dependencies — just need to propagate them.", - "alternatives_considered": [ - "Separate phase_graph field — rejected: duplicates information in phases", - "Dynamic computation from files_affected — rejected: imprecise and unreliable" - ] + "id": "AD-2", + "decision": "Add identifier parameter to load_agent_output/save_agent_output rather than creating a new path builder module", + "rationale": "These functions are the single point of path construction for agent outputs. Adding identifier: int | str | None follows the same pattern as get_contract_path(identifier). A new centralized path builder (Option C) would be over-engineering for this fix." }, { - "id": "TD-3", - "decision": "Tier 3 signaled by refine agent with same HITL override model as Tier 1", - "rationale": "The refine agent already assesses complexity. Adding a third tier follows the same pattern. Human can override during HITL review. Requiring explicit approval for every Tier 3 task adds friction without proportional benefit.", - "alternatives_considered": [ - "Always require human approval for Tier 3 — rejected: unnecessary friction", - "Auto-select from plan phase count — rejected: phase count alone doesn't indicate complexity" - ] + "id": "AD-3", + "decision": "Keep backward-compatible fallback permanently rather than planning removal", + "rationale": "Cost is one stat() syscall per read (negligible). Benefits: handles in-flight pipelines, protects against agent misbehavior (risk R-6), eliminates need for planned removal. The risk analyst recommends this approach.", + "updated_in": "revision 2" }, { - "id": "TD-4", - "decision": "Integrator gets conditional write access in Tier 3 only", - "rationale": "Privilege escalation scoped to Tier 3 limits blast radius. In Tier 2, integrator remains read-only. Integrator runs after all phase-level agentic reviews and before human review, providing defense in depth.", - "alternatives_considered": [ - "Unconditional write access — rejected: breaks least privilege for Tier 2", - "New INTEGRATOR_TIER3 role — rejected: role proliferation" - ] + "id": "AD-4", + "decision": "Thread identifier through wrapper layer (handoffs.py) and all callers (dispatch.py, signals.py)", + "rationale": "The original plan only covered the shared library and pipelines.py. The review identified that orchestrator/handoffs.py wraps all three shared functions, dispatch.py calls them directly, and signals.py calls them through the wrapper. Without threading identifier through all layers, the shared functions receive identifier=None and silently fall back to global paths.", + "added_in": "revision 2" }, { - "id": "TD-5", - "decision": "Sequential first, parallel second (hybrid delivery)", - "rationale": "Sequential cycling validates the orchestration foundation (composite keys, phase DAG, per-phase review) without concurrent execution complexity. Foundation is the risky part; parallelism is optimization.", - "alternatives_considered": [ - "Full parallel from start — rejected: too much risk in one delivery", - "Only sequential — rejected: doesn't meet the issue's Tier 3 vision" - ] - }, - { - "id": "TD-6", - "decision": "No new PipelinePhase.INTEGRATE — implement phase manages cycling internally", - "rationale": "A new pipeline phase would require updating transitions, prompts, HITL gates, and phase_filter restrictions across the system. The implement phase can manage cycle-then-integrate internally. The integrator already runs as the final wave.", - "alternatives_considered": [ - "New INTEGRATE phase — rejected: too many cross-cutting changes for modest benefit" - ] - }, - { - "id": "TD-7", - "decision": "Gateway prefix check already supports sub-branches — no policy change needed", - "rationale": "gateway/policy.py _is_bot_branch() uses startswith(('egg-', 'egg/')) which matches egg/feature/phase-1. Confirmed by code analysis. Only worktree lifecycle management needs extension in Stage 2.", - "alternatives_considered": [ - "Add explicit sub-branch pattern — rejected: unnecessary given existing prefix logic" - ] + "id": "AD-5", + "decision": "No gateway, readonly mount, or phase permission changes needed", + "rationale": "Gateway uses directory-level wildcards (.egg-state/agent-outputs/*, .egg-state/checks/*). Readonly mounts only cover drafts/, contracts/, pipelines/, reviews/. Neither agent-outputs/ nor checks/ are affected. File renames within these directories are transparent to all enforcement layers." } ], + "implementation_plan": { + "summary": "5 phases: (1) shared library → (1.5) wrapper layer & callers → (2) pipelines.py prompts & readers → (3) tests → (4) docs. Phase 1.5 was ADDED in revision 2.", + "phases": [ + { + "id": "phase-1", + "name": "Core path construction (shared library)", + "goal": "Update shared utility functions to support identifier-prefixed agent output paths", + "tasks": [ + { + "id": "TASK-1-1", + "description": "Add optional identifier parameter (int | str | None) to load_agent_output() in shared/egg_contracts/orchestrator.py. When identifier is provided, construct path as {identifier}-{role.value}-output.json. Add fallback: if prefixed path does not exist, try old {role.value}-output.json path. When identifier is None, use old path directly (backward compat).", + "acceptance": "load_agent_output(repo, role, identifier=871) reads from 871-{role}-output.json, falling back to {role}-output.json. load_agent_output(repo, role) reads from {role}-output.json (unchanged).", + "files": ["shared/egg_contracts/orchestrator.py"] + }, + { + "id": "TASK-1-2", + "description": "Add optional identifier parameter to save_agent_output() in shared/egg_contracts/orchestrator.py. When identifier is provided, write to {identifier}-{role.value}-output.json. When None, write to {role.value}-output.json (backward compat).", + "acceptance": "save_agent_output(repo, role, data, identifier=871) writes to 871-{role}-output.json. save_agent_output(repo, role, data) writes to {role}-output.json.", + "files": ["shared/egg_contracts/orchestrator.py"] + }, + { + "id": "TASK-1-3", + "description": "Add optional identifier parameter to collect_handoff_data() in shared/egg_contracts/orchestrator.py and pass it through to load_agent_output() for each dependency.", + "acceptance": "collect_handoff_data(repo, role, identifier=871) reads dependency outputs from prefixed paths with fallback.", + "files": ["shared/egg_contracts/orchestrator.py"] + } + ], + "dependencies": [] + }, + { + "id": "phase-1.5", + "name": "Wrapper layer and caller updates (ADDED IN REVISION 2)", + "goal": "Thread identifier parameter through orchestrator/handoffs.py wrappers and all callers in dispatch.py and signals.py", + "rationale": "Addresses plan review feedback: without this phase, shared functions receive identifier=None from these code paths and fall back to global filenames, silently defeating the entire fix. Risk analyst flagged this as R-1 (HIGH/HIGH).", + "tasks": [ + { + "id": "TASK-1.5-1", + "description": "Add optional identifier parameter (int | str | None) to all 4 wrapper functions in orchestrator/handoffs.py: save_agent_output(), load_agent_output_data(), collect_handoff_data(), get_handoff_env_var(). Forward identifier to the underlying shared functions. save_agent_output wraps contract_save_output (shared) at line 168. load_agent_output_data calls load_agent_output (shared) at line 185. collect_handoff_data calls load_agent_output_data at line 215. get_handoff_env_var calls collect_handoff_data at line 240.", + "acceptance": "All 4 wrapper functions accept and forward identifier. handoffs.save_agent_output(repo, output, identifier=871) writes to 871-{role}-output.json.", + "files": ["orchestrator/handoffs.py"] + }, + { + "id": "TASK-1.5-2", + "description": "Update PipelineDispatcher.complete_agent() in orchestrator/dispatch.py (line 219) to pass self.contract_key as identifier to save_agent_output(). Update PipelineDispatcher.get_handoff_data() (line 264) to pass self.contract_key as identifier to collect_handoff_data(). Note: dispatch.py imports these functions directly from shared (lines 29-35), not through handoffs.py.", + "acceptance": "Both callers pass self.contract_key (which returns pipeline.issue_number or pipeline.id) as identifier. No calls to save_agent_output or collect_handoff_data without identifier in dispatch.py.", + "files": ["orchestrator/dispatch.py"] + }, + { + "id": "TASK-1.5-3", + "description": "Update handle_complete_signal() in orchestrator/routes/signals.py (line 193) to pass identifier to handoffs.save_agent_output(). Derive identifier from pipeline object loaded at line 162: use pipeline.issue_number if not None, else pipeline_id (the URL param). Note: signals.py imports save_agent_output from handoffs (line 36), not from shared.", + "acceptance": "save_agent_output call at line 193 passes identifier. Agent output from signal handlers is written to prefixed path.", + "files": ["orchestrator/routes/signals.py"] + } + ], + "dependencies": ["phase-1"] + }, + { + "id": "phase-2", + "name": "Prompt builder and reader updates (pipelines.py)", + "goal": "Update all prompt builders and reader functions in orchestrator/routes/pipelines.py to use identifier-prefixed paths", + "tasks": [ + { + "id": "TASK-2-1", + "description": "Update _build_agent_prompt() for architect, integrator, and risk_analyst roles. Replace hardcoded output filenames with dynamically constructed {identifier}-prefixed paths using issue_number/pipeline_id already available in the prompt builder context.", + "acceptance": "Prompts contain e.g. '871-architect-output.json' for issue 871.", + "files": ["orchestrator/routes/pipelines.py"] + }, + { + "id": "TASK-2-2", + "description": "Update _build_checker_prompt(), _build_check_and_fix_prompt(), and _build_autofix_prompt() to replace hardcoded 'implement-results.json' with {identifier}-implement-results.json.", + "acceptance": "Checker and autofix prompts contain e.g. '871-implement-results.json'.", + "files": ["orchestrator/routes/pipelines.py"] + }, + { + "id": "TASK-2-3", + "description": "Update _synthesize_plan_draft_from_outputs() (~line 4335) to construct prefixed filenames. Derive identifier from existing pipeline_mode/issue_number parameters. Fall back to old filenames if prefixed files don't exist.", + "acceptance": "Function reads from {identifier}-architect-output.json first, falls back to architect-output.json.", + "files": ["orchestrator/routes/pipelines.py"] + }, + { + "id": "TASK-2-4", + "description": "Update _read_tester_gaps() (~line 1619) to accept identifier parameter and construct prefixed path with fallback. Update both callers (~line 3139 and ~line 5173) to pass identifier.", + "acceptance": "Function reads from {identifier}-tester-output.json first, falls back to tester-output.json.", + "files": ["orchestrator/routes/pipelines.py"] + }, + { + "id": "TASK-2-5", + "description": "Update all remaining callers of load_agent_output(), save_agent_output(), and collect_handoff_data() WITHIN orchestrator/routes/pipelines.py to pass identifier from pipeline context. Scope: pipelines.py only — dispatch.py and signals.py callers are covered in Phase 1.5.", + "acceptance": "No calls to these functions without identifier in pipelines.py pipeline code paths.", + "files": ["orchestrator/routes/pipelines.py"] + } + ], + "dependencies": ["phase-1.5"] + }, + { + "id": "phase-3", + "name": "Tests", + "goal": "Update all existing tests and add backward-compat fallback tests", + "tasks": [ + { + "id": "TASK-3-1", + "description": "Update test_pipeline_prompts.py — change all assertions that check for 'implement-results.json' in prompt content to expect prefixed filenames. Update TestReadTesterGaps tests.", + "acceptance": "All tests in test_pipeline_prompts.py pass with prefixed filenames.", + "files": ["orchestrator/tests/test_pipeline_prompts.py"] + }, + { + "id": "TASK-3-2", + "description": "Update test_health_check_tester_coverage.py and test_health_check_tier1_advanced.py — update mock file creation to use prefixed filenames.", + "acceptance": "Health check tests pass with prefixed filenames.", + "files": ["orchestrator/tests/test_health_check_tester_coverage.py", "orchestrator/tests/test_health_check_tier1_advanced.py"] + }, + { + "id": "TASK-3-3", + "description": "Update integration_tests/local_pipeline/mock-sandbox/phase-runner.sh — update RESULTS_FILE to use prefixed filename from EGG_ISSUE_NUMBER or EGG_PIPELINE_ID.", + "acceptance": "Integration test writes to {identifier}-implement-results.json.", + "files": ["integration_tests/local_pipeline/mock-sandbox/phase-runner.sh"] + }, + { + "id": "TASK-3-4", + "description": "Add tests for backward-compat fallback in load_agent_output(). Test cases: (1) prefixed path exists, (2) only old global path exists and fallback reads it, (3) neither exists, (4) both exist and prefixed takes priority.", + "acceptance": "All 4 fallback test cases pass.", + "files": ["shared/egg_contracts/tests/test_orchestrator.py"] + }, + { + "id": "TASK-3-5", + "description": "Update orchestrator/tests/test_signals.py — the mock at line 86 patches 'routes.signals.save_agent_output'. After Phase 1.5, handoffs.save_agent_output adds an optional identifier parameter. Update mock setup and assertions to account for the new parameter. ADDED IN REVISION 2 per review feedback (risk analyst R-7).", + "acceptance": "test_signals.py tests pass with updated mock.", + "files": ["orchestrator/tests/test_signals.py"] + } + ], + "dependencies": ["phase-2"] + }, + { + "id": "phase-4", + "name": "Documentation and agent mode commands", + "goal": "Update all documentation and sandbox commands referencing old globally-named filenames", + "tasks": [ + { + "id": "TASK-4-1", + "description": "Update docs/guides/sdlc-pipeline.md — change references to {role}-output.json naming convention.", + "files": ["docs/guides/sdlc-pipeline.md"] + }, + { + "id": "TASK-4-2", + "description": "Update docs/guides/agent-development.md — update agent output file naming tree.", + "files": ["docs/guides/agent-development.md"] + }, + { + "id": "TASK-4-3", + "description": "Update docs/architecture/orchestrator.md — update implement-results.json reference.", + "files": ["docs/architecture/orchestrator.md"] + }, + { + "id": "TASK-4-4", + "description": "Update sandbox/.claude/commands/ agent mode files — replace hardcoded {role}-output.json paths.", + "files": ["sandbox/.claude/commands/coder-mode.md", "sandbox/.claude/commands/tester-mode.md", "sandbox/.claude/commands/integrator-mode.md", "sandbox/.claude/commands/documenter-mode.md"] + }, + { + "id": "TASK-4-5", + "description": "Update docstring in orchestrator/health_checks/tier1/phase_output.py.", + "files": ["orchestrator/health_checks/tier1/phase_output.py"] + } + ], + "dependencies": ["phase-2"] + } + ] + }, + + "files_modified": [ + {"file": "shared/egg_contracts/orchestrator.py", "phase": "1", "change": "Add identifier param to load_agent_output, save_agent_output, collect_handoff_data; add fallback logic"}, + {"file": "orchestrator/handoffs.py", "phase": "1.5", "change": "Add identifier param to save_agent_output, load_agent_output_data, collect_handoff_data, get_handoff_env_var wrappers; forward to shared functions", "added_in": "revision 2"}, + {"file": "orchestrator/dispatch.py", "phase": "1.5", "change": "Pass self.contract_key as identifier at lines 219 and 264", "added_in": "revision 2"}, + {"file": "orchestrator/routes/signals.py", "phase": "1.5", "change": "Pass identifier (from pipeline.issue_number or pipeline_id) at line 193", "added_in": "revision 2"}, + {"file": "orchestrator/routes/pipelines.py", "phase": "2", "change": "Update prompt builders, reader functions, and all callers to use prefixed paths"}, + {"file": "orchestrator/tests/test_pipeline_prompts.py", "phase": "3", "change": "Update prompt assertions and tester gap tests"}, + {"file": "orchestrator/tests/test_health_check_tester_coverage.py", "phase": "3", "change": "Update mock file creation"}, + {"file": "orchestrator/tests/test_health_check_tier1_advanced.py", "phase": "3", "change": "Update mock file creation"}, + {"file": "integration_tests/local_pipeline/mock-sandbox/phase-runner.sh", "phase": "3", "change": "Update RESULTS_FILE to prefixed path"}, + {"file": "shared/egg_contracts/tests/test_orchestrator.py", "phase": "3", "change": "Add backward-compat fallback tests"}, + {"file": "orchestrator/tests/test_signals.py", "phase": "3", "change": "Update save_agent_output mock at line 86 for new identifier parameter", "added_in": "revision 2"}, + {"file": "docs/guides/sdlc-pipeline.md", "phase": "4", "change": "Update naming convention references"}, + {"file": "docs/guides/agent-development.md", "phase": "4", "change": "Update agent output file naming tree"}, + {"file": "docs/architecture/orchestrator.md", "phase": "4", "change": "Update implement-results.json reference"}, + {"file": "sandbox/.claude/commands/coder-mode.md", "phase": "4", "change": "Update output file path"}, + {"file": "sandbox/.claude/commands/tester-mode.md", "phase": "4", "change": "Update output file paths"}, + {"file": "sandbox/.claude/commands/integrator-mode.md", "phase": "4", "change": "Update output file paths"}, + {"file": "sandbox/.claude/commands/documenter-mode.md", "phase": "4", "change": "Update output file paths"}, + {"file": "orchestrator/health_checks/tier1/phase_output.py", "phase": "4", "change": "Update docstring"} + ], + + "no_changes_needed": [ + {"component": "Gateway phase filter (gateway/phase_filter.py)", "reason": "Uses directory-level wildcards — file renames transparent"}, + {"component": "Gateway agent restrictions (gateway/agent_restrictions.py)", "reason": "Uses directory-level allowed_write patterns"}, + {"component": "Readonly mount enforcement (shared/egg_container/__init__.py)", "reason": "agent-outputs/ and checks/ not in _IMPLEMENT_READONLY_DIRS"}, + {"component": "Phase permissions config (.egg/phase-permissions.json)", "reason": "Uses directory-level patterns"}, + {"component": "Container spawner (orchestrator/container_spawner.py)", "reason": "Does not reference specific filenames"}, + {"component": "Health check functional code (phase_output.py:152)", "reason": "Scans directory via iterdir() — works with any filename"} + ], + "risks": [ { "id": "R-1", - "description": "Composite key migration breaks existing pipeline state", - "likelihood": "Medium", - "impact": "High", - "mitigation": "phase_id defaults to None on AgentExecutionModel. OrchestrationState falls back to role-only keying when phase_id is None. Explicit backward-compat tests required." + "title": "Missed wrapper callsites in orchestrator/handoffs.py, dispatch.py, signals.py", + "source": "Risk analyst R-1 (HIGH/HIGH), plan reviewer critical finding", + "likelihood": "HIGH (without mitigation)", + "impact": "HIGH", + "status": "MITIGATED in revision 2 — Phase 1.5 added", + "description": "The original plan updated shared functions but not the wrapper layer. Without updating handoffs.py, dispatch.py, and signals.py, the shared functions receive identifier=None and fall back to global paths, silently defeating the entire fix.", + "mitigation": "Phase 1.5 now explicitly covers all 3 files with dedicated tasks (TASK-1.5-1, TASK-1.5-2, TASK-1.5-3)." }, { "id": "R-2", - "description": "Per-phase prompts leak cross-phase context to coders", - "likelihood": "Low", - "impact": "Medium", - "mitigation": "Build phase-filtered prompt function that only includes current phase's tasks and files_affected. Test that coder prompts don't reference other phases." + "title": "In-flight pipeline backward compatibility during rollout", + "source": "Risk analyst R-2 (MEDIUM/MEDIUM)", + "likelihood": "MEDIUM", + "impact": "MEDIUM", + "status": "MITIGATED by fallback strategy", + "description": "In-flight pipelines may have already written agent outputs to old global paths.", + "mitigation": "Backward-compat fallback in all readers: try new prefixed path first, fall back to old path. Keep permanently (AD-3)." }, { "id": "R-3", - "description": "Integrator write access introduces security risk", - "likelihood": "Low", - "impact": "Medium", - "mitigation": "Conditional on Tier 3 mode. Integrator runs after all agentic reviews and before human review. Defense in depth." + "title": "Agent sandbox commands reference hardcoded global filenames", + "source": "Risk analyst R-3", + "likelihood": "MEDIUM", + "impact": "LOW", + "status": "MITIGATED — Phase 4 updates commands; fallback covers reads in interim" }, { "id": "R-4", - "description": "Sub-branch merge conflicts despite files_affected boundaries (Stage 2)", - "likelihood": "Medium", - "impact": "Medium", - "mitigation": "Sub-branch isolation is primary mechanism. files_affected is safety net. Integrator explicitly tasked with conflict resolution. Conflict detection in integrator prompt." + "title": "Identifier unavailable in local/non-issue pipeline modes", + "source": "Risk analyst R-4", + "likelihood": "LOW", + "impact": "LOW", + "status": "MITIGATED — follow existing _get_draft_path() pattern: issue_number > pipeline_id > 'local'" }, { "id": "R-5", - "description": "Change scope across 12+ files increases merge conflict risk with other PRs", - "likelihood": "Medium", - "impact": "Medium", - "mitigation": "Staged delivery (Stage 1 then Stage 2). Each stage independently mergeable. Coordinate with in-flight orchestration work." + "title": "Integration test hardcoded path in phase-runner.sh", + "source": "Risk analyst R-5", + "likelihood": "HIGH", + "impact": "LOW", + "status": "MITIGATED — TASK-3-3 covers this" }, { "id": "R-6", - "description": "Token cost underestimated due to agentic review retries per phase", - "likelihood": "Low", - "impact": "Low", - "mitigation": "Each phase has max_review_cycles (default: 3). If exhausted, escalates to human review. Total budget bounded: phases * (agents_per_cycle + retry_budget)." + "title": "Prompt-agent path mismatch if agents ignore prompt instructions", + "source": "Risk analyst R-6", + "likelihood": "LOW", + "impact": "MEDIUM", + "status": "MITIGATED — keeping fallback permanently (AD-3)" + }, + { + "id": "R-7", + "title": "Test coverage for wrapper module changes", + "source": "Risk analyst R-7, plan reviewer finding", + "likelihood": "MEDIUM", + "impact": "LOW", + "status": "MITIGATED — TASK-3-5 added in revision 2" } ], - "constraints": [ - "Contract schema changes must be backward-compatible with existing contracts", - "Tier 1 (short-circuit) and Tier 2 (standard multi-agent) must continue working unchanged", - "Gateway branch prefix check already supports sub-branches (no policy change needed)", - "Per-phase worktrees (Stage 2) must not break single-worktree-per-pipeline model", - "Integrator write access conditional on Tier 3 only", - "Plan template and parser must support dependencies field in YAML plan", - "Per-phase review cycles must not require human approval — only agentic review gates" + "backward_compatibility": { + "strategy": "Readers try new prefixed path first, fall back to old global path", + "recommendation": "Keep fallback permanently (AD-3). Cost is one stat() syscall per read. Protects against agent misbehavior and simplifies rollout.", + "applies_to": ["load_agent_output (shared)", "load_agent_output_data (handoffs wrapper)", "_synthesize_plan_draft_from_outputs (pipelines.py)", "_read_tester_gaps (pipelines.py)"] + }, + + "acceptance_criteria": [ + "All .egg-state/ files include issue or pipeline ID in their filename", + "No globally-named files remain that could conflict across concurrent pipelines", + "Existing in-flight pipelines not broken (backward-compatible reader fallback)", + "Wrapper layer (handoffs.py) threads identifier to shared functions", + "All callers in dispatch.py, signals.py, and pipelines.py pass identifier", + "Documentation updated to reflect new naming convention", + "All existing tests updated and passing (including test_signals.py)", + "New tests for backward-compat fallback behavior added", + "Gateway and phase permissions continue working without changes" ], - "open_questions_resolved": { - "Q1_integrator_write_scope": { - "question": "Should the integrator's write access be unrestricted, scoped to modified files, or unrestricted with separate review?", - "recommendation": "Unrestricted within src/, tests/, docs/. The integrator needs to fix integration issues that span beyond individual coders' changed files. Scoping to changed_files is too restrictive for merge conflict resolution. Agentic review of integrator changes is already planned.", - "decision": "TD-4" - }, - "Q2_phase_dependency_storage": { - "question": "Should phase dependencies be in Phase model, separate field, or computed dynamically?", - "recommendation": "Phase model with dependencies: list[str]. Simplest, most natural location.", - "decision": "TD-2" - }, - "Q3_tier_selection_authority": { - "question": "Should Tier 3 require explicit human approval?", - "recommendation": "Same model as Tier 1 — refine agent signals, human overrides during HITL.", - "decision": "TD-3" - }, - "Q4_delivery_sequencing": { - "question": "Sequential-first or full parallel in one pass?", - "recommendation": "Sequential-first (hybrid). Foundation validated before adding parallelism.", - "decision": "TD-5" + "scope": { + "production_code_files": 5, + "test_files": 5, + "integration_test_files": 1, + "documentation_files": 8, + "total_files": 19, + "complexity": "mid", + "note": "Increased from 14 files in revision 1 to 19 files after adding wrapper layer (3 production files + 1 test file + 1 revision of plan doc)" + }, + + "revision_changelog": [ + { + "revision": 2, + "changes": [ + "Added call_chain_analysis section documenting complete I/O flow including wrapper layer", + "Added Phase 1.5 (wrapper layer and caller updates) with 3 tasks covering handoffs.py, dispatch.py, signals.py", + "Added TASK-3-5 for test_signals.py mock update", + "Updated TASK-2-5 scope description to clarify it covers pipelines.py only (dispatch.py and signals.py in Phase 1.5)", + "Updated AD-3 to recommend keeping fallback permanently", + "Added AD-4 for wrapper layer threading decision", + "Updated risks section to incorporate risk analyst R-1 through R-7 findings", + "Added handoffs.py, dispatch.py, signals.py, test_signals.py to files_modified table", + "Updated scope from 14 to 19 files", + "Updated acceptance criteria to include wrapper layer and test_signals.py" + ] } - } + ] } diff --git a/.egg-state/agent-outputs/risk_analyst-output.json b/.egg-state/agent-outputs/risk_analyst-output.json index 4887a8585..e1bd26a1c 100644 --- a/.egg-state/agent-outputs/risk_analyst-output.json +++ b/.egg-state/agent-outputs/risk_analyst-output.json @@ -1,260 +1,250 @@ { - "issue": 887, + "issue": 871, "phase": "plan", "agent": "risk_analyst", - "title": "Risk Assessment: Improve checkpoint discoverability for agents", + "revision": 2, + "revision_reason": "Updated risk assessment after architect revision 2 addressed R-1 (wrapper layer). Incorporates plan review feedback. Identifies new risks R-8 through R-11 not covered in previous revision or architect output.", + "title": "Risk assessment: Namespace .egg-state files per-pipeline (revision 2)", + "overall_risk_level": "LOW", - "overall_risk_level": "Low", - "overall_assessment": "This is a low-risk change. The recommended approach (Option A: prompt-only changes) modifies only text content in prompt templates and markdown files. There are no schema migrations, no new dependencies, no logic changes to Python functions, and no changes to control flow. The changes are purely additive — appending lines to existing prompt sections. The primary risks are around test maintenance and minor merge conflicts, both of which are straightforward to resolve.", + "summary": "The architect's revision 2 correctly addresses the primary R-1 gap (wrapper layer callsites) by adding Phase 1.5. The core approach — prefix filenames with issue/pipeline identifier — remains well-scoped, follows established convention, and requires no gateway or enforcement layer changes. This revision identifies 4 new risks: (1) the plan draft is out-of-sync with the architect's revision 2, (2) two additional hardcoded prompt references to tester-output.json are not covered by any task, (3) two health check test files listed in the plan do not actually need changes, and (4) one additional test file (test_health_check_tier2.py) that references globally-named files is not listed. Overall risk remains LOW — the change is a file naming convention update with a solid backward-compat strategy.", + + "architect_analysis_review": { + "revision_reviewed": 2, + "accuracy": "Accurate — revision 2 addresses all findings from revision 1", + "improvements_in_revision_2": [ + "Phase 1.5 added for wrapper layer (handoffs.py, dispatch.py, signals.py) — directly addresses R-1", + "TASK-3-5 added for test_signals.py mock update — addresses R-7", + "AD-3 updated to keep fallback permanently — aligns with R-6 recommendation", + "AD-4 added for wrapper layer threading decision", + "Call chain analysis section added with complete 5-layer breakdown", + "Risk section now incorporates all R-1 through R-7 findings with mitigation status" + ], + "remaining_gaps": [ + { + "gap": "Two hardcoded tester-output.json references in coder prompt builder at pipelines.py:2196 and 2790 not covered by any task", + "detail": "TASK-2-1 covers architect, integrator, risk_analyst prompt builders. These two references are in the coder prompt builder (revision checklist sections) and tell coder agents to check .egg-state/agent-outputs/tester-output.json directly. After the change, the tester writes to {id}-tester-output.json, so the coder won't find it at the hardcoded path.", + "severity": "LOW — tester gap data is injected programmatically into the coder prompt by _read_tester_gaps(), so the agent has the information regardless. The hardcoded path is a convenience reference." + }, + { + "gap": "Plan draft (871-plan.md) does not include Phase 1.5 or wrapper layer files", + "detail": "The plan draft is still on revision 1. The architect produced a revision 2 JSON with the corrected plan, but 871-plan.md was not regenerated. If the plan draft drives SDLC task creation, the wrapper layer work will be missing.", + "severity": "HIGH — this could result in the same silent failure that R-1 described" + }, + { + "gap": "Test files test_health_check_tier1_advanced.py and test_health_check_tester_coverage.py listed for changes but don't actually need them", + "detail": "Both tests create architect-output.json in .egg-state/drafts/ (not agent-outputs/). The health check _check_plan_outputs() scans drafts/ via iterdir() matching 'plan' or 'architect' in filenames. Since the change only namespaces files in agent-outputs/ and checks/, these tests are unaffected. The architect's plan incorrectly lists them as needing updates.", + "severity": "LOW — unnecessary changes, but modifications to the file-creation filename would still pass the health check since 'architect' would still be in the name" + } + ], + "correct_claims_verified": [ + "Gateway uses directory-level wildcards — no changes needed (verified: gateway/phase_filter.py:495 uses '.egg-state/agent-outputs/*' and '.egg-state/checks/*')", + "Readonly mounts don't cover agent-outputs/ or checks/ (verified: _IMPLEMENT_READONLY_DIRS only has drafts, contracts, pipelines, reviews)", + "Health check _check_plan_outputs() scans drafts/ via iterdir() — filename-agnostic (verified: phase_output.py:152-155)", + "Health context _read_agent_outputs() scans agent-outputs/ via iterdir() — filename-agnostic (verified: context.py:192)", + "dispatch.py has self.contract_key property returning pipeline.issue_number or pipeline.id (verified: dispatch.py:118-126)", + "signals.py has pipeline object available from store.load_pipeline() at line 162 with issue_number attribute (verified)", + "pipelines.py does NOT import load_agent_output/save_agent_output/collect_handoff_data — it constructs paths inline in prompt builders and reader functions (verified: only a comment reference at line 3722)" + ] + }, "risks": [ { "id": "R-1", - "title": "Existing prompt tests may fail after adding checkpoint hints", - "category": "compatibility", - "description": "The test file orchestrator/tests/test_pipeline_prompts.py (2029 lines) has extensive test classes covering all three target functions: TestBuildRoleContext (lines 837-961), TestBuildAgentPromptRoleContext (lines 963-1075), and TestBuildPhaseScopedPromptOverview (lines 1077+). Several tests assert on the presence or absence of specific sections (e.g., '## For More Context' in result). Adding new lines to these sections could break tests that assert on exact content or ordering. Notably, test_context_pointers_always_present_for_execution_roles (line 949) asserts on 'For More Context' — this should still pass since we're adding to that section, not removing it. However, edge-case tests in TestBuildRoleContextEdgeCases (line 1284) and TestBuildAgentPromptEdgeCases (line 1430) may be more fragile.", - "likelihood": "Medium", - "impact": "Low", - "risk_score": "Low", - "affected_files": [ - "orchestrator/tests/test_pipeline_prompts.py" - ], - "mitigation": { - "strategy": "Run tests before and after, update assertions as needed", - "steps": [ - "Run pytest orchestrator/tests/test_pipeline_prompts.py before making any changes to establish a baseline", - "After adding checkpoint hints, re-run the same test file and inspect any failures", - "Update tests that assert on exact prompt content to include the new checkpoint lines", - "Add new test assertions verifying checkpoint hints appear for the correct roles" - ], - "residual_risk": "Negligible — additive text changes cause predictable test failures that are easy to fix" - }, - "human_review_required": false + "title": "Missed wrapper callsites in orchestrator/handoffs.py, dispatch.py, signals.py", + "category": "Completeness", + "likelihood": "HIGH (without mitigation)", + "impact": "HIGH", + "risk_score": "HIGH (MITIGATED in architect revision 2)", + "status": "MITIGATED", + "description": "The original plan updated shared/egg_contracts/orchestrator.py but not the wrapper layer. Without Phase 1.5, shared functions receive identifier=None from these code paths and fall back to global paths, silently defeating the entire fix.", + "mitigation_applied": "Architect revision 2 added Phase 1.5 with TASK-1.5-1 (handoffs.py), TASK-1.5-2 (dispatch.py), TASK-1.5-3 (signals.py). All three files and their specific callsites are now covered.", + "residual_risk": "LOW — plan draft (871-plan.md) has not been updated to include Phase 1.5. See R-8." }, { "id": "R-2", - "title": "Merge conflict with PR #893 (move agent rules to ~/.claude/CLAUDE.md)", - "category": "compatibility", - "description": "Open PR #893 modifies sandbox/.claude/rules/README.md, sandbox/entrypoint.py, and tests/sandbox/test_entrypoint.py. Our changes target sandbox/.claude/commands/{tester,integrator,documenter,coder}-mode.md and sandbox/.claude/rules/mission.md. There is no file-level overlap — PR #893 does not touch any of the mode command files or mission.md. However, if a follow-up PR restructures the sandbox/.claude/ directory layout, there could be indirect conflicts.", - "likelihood": "Low", - "impact": "Low", - "risk_score": "Low", - "affected_files": [ - "sandbox/.claude/commands/tester-mode.md", - "sandbox/.claude/commands/integrator-mode.md", - "sandbox/.claude/commands/documenter-mode.md", - "sandbox/.claude/commands/coder-mode.md", - "sandbox/.claude/rules/mission.md" - ], - "mitigation": { - "strategy": "Check PR #893 status before implementation", - "steps": [ - "Check if PR #893 is merged before starting implementation — if so, rebase onto main", - "Files are disjoint so git merge should handle concurrent landing cleanly", - "No proactive action needed" - ], - "residual_risk": "Negligible — no file overlap" - }, - "human_review_required": false + "title": "In-flight pipeline backward compatibility during rollout", + "category": "Compatibility", + "likelihood": "MEDIUM", + "impact": "MEDIUM", + "risk_score": "MEDIUM (MITIGATED)", + "status": "MITIGATED", + "description": "In-flight pipelines may have already written agent outputs to old global paths. Readers updated to look for prefixed paths without fallback would fail to find dependency outputs.", + "mitigation_applied": "Backward-compat fallback in all readers: try new prefixed path first, fall back to old path. AD-3 decides to keep fallback permanently.", + "residual_risk": "NEGLIGIBLE" }, { "id": "R-3", - "title": "Prompt token budget increase may degrade agent performance on context-heavy sessions", - "category": "performance", - "description": "Adding checkpoint hints increases prompt size for every downstream agent session. Estimated additions: ~2-3 lines in _build_role_context() (shared across all execution roles), ~2 lines per role in _build_agent_prompt() (tester, documenter, integrator), ~1 line in _build_phase_scoped_prompt() revision checklist. Total: approximately 10-15 lines of additional prompt text per session. The checkpoint.md rule (62 lines) is already loaded into every session. Combined, checkpoint-related content will be ~75 lines per session. For context-heavy sessions (large issues, many files), this adds marginal token pressure.", - "likelihood": "Low", - "impact": "Low", - "risk_score": "Low", - "affected_files": [ - "orchestrator/routes/pipelines.py" - ], - "mitigation": { - "strategy": "Keep hints concise, reference existing rule for details", - "steps": [ - "Limit each injection point to 1-2 lines maximum", - "Reference the checkpoint.md rule for full CLI docs rather than duplicating workflow blocks", - "The architect's TD-2 decision (brief nudge + role-specific one-liner) is the correct approach" - ], - "residual_risk": "Negligible — ~10-15 lines is a trivial percentage of typical agent prompt budgets" - }, - "human_review_required": false + "title": "Agent sandbox commands reference hardcoded global filenames", + "category": "Compatibility", + "likelihood": "MEDIUM", + "impact": "LOW", + "risk_score": "LOW (MITIGATED)", + "status": "MITIGATED", + "description": "Sandbox agent mode commands (coder-mode.md, tester-mode.md, integrator-mode.md, documenter-mode.md) contain hardcoded file paths. Used only in manual/interactive agent mode.", + "mitigation_applied": "Phase 4 tasks update all sandbox commands. Backward-compat fallback covers reads during transition.", + "residual_risk": "LOW — sandbox commands may use shell variable interpolation which must be verified to work in the template rendering context" }, { "id": "R-4", - "title": "Agents may waste tokens on unnecessary checkpoint queries", - "category": "performance", - "description": "If prompt hints are too directive (e.g., 'You MUST review checkpoints before proceeding'), agents may spend tokens running egg-checkpoint commands even when no useful checkpoints exist (e.g., first-cycle coders, new pipelines with no prior sessions). Each checkpoint query adds tool-call overhead (~500-1000 tokens for command + result parsing).", - "likelihood": "Low", - "impact": "Low", - "risk_score": "Low", - "affected_files": [ - "orchestrator/routes/pipelines.py", - "sandbox/.claude/commands/tester-mode.md", - "sandbox/.claude/commands/integrator-mode.md", - "sandbox/.claude/commands/documenter-mode.md" - ], - "mitigation": { - "strategy": "Use suggestive language, not imperative", - "steps": [ - "Phrase hints as optional context gathering: 'For additional context, you can review...' not 'You must review...'", - "The coder-mode.md hint should be conditional on revision cycles only (per architect TD-7)", - "First-cycle agents seeing an empty checkpoint result is cheap (~100 tokens) and self-correcting — agents learn to skip irrelevant queries" - ], - "residual_risk": "Negligible — even worst case, a few unnecessary checkpoint queries cost <2000 tokens per session" - }, - "human_review_required": false + "title": "Identifier unavailable in local/non-issue pipeline modes", + "category": "Edge Case", + "likelihood": "LOW", + "impact": "LOW", + "risk_score": "LOW (MITIGATED)", + "status": "MITIGATED", + "description": "In local pipeline mode without an issue number, identifier must be derived from pipeline_id. If both are None, a sensible fallback is needed.", + "mitigation_applied": "Follow existing _get_draft_path() pattern: issue_number > pipeline_id > 'local'. Already proposed in AD-4.", + "residual_risk": "NEGLIGIBLE" }, { "id": "R-5", - "title": "egg-checkpoint CLI failures or empty results could confuse agents", - "category": "reliability", - "description": "The egg-checkpoint CLI queries the egg/checkpoints/v2 branch via git. If that branch doesn't exist (new repos), if the checkpoint store is empty, or if the CLI encounters git errors, agents will see error or empty output. Agents unfamiliar with the tool could waste time debugging checkpoint access issues rather than doing their primary task.", - "likelihood": "Low", - "impact": "Low", - "risk_score": "Low", - "affected_files": [], - "mitigation": { - "strategy": "No special handling needed — this is a pre-existing condition", - "steps": [ - "egg-checkpoint already handles 'no results' gracefully (returns empty list, not an error)", - "Hints should not frame checkpoint review as a prerequisite — agents should proceed with their primary task regardless", - "The existing checkpoint.md rule documents the CLI behavior" - ], - "residual_risk": "Negligible — pre-existing condition not introduced by our changes" - }, - "human_review_required": false + "title": "Integration test hardcoded path in phase-runner.sh", + "category": "Test Infrastructure", + "likelihood": "HIGH", + "impact": "LOW", + "risk_score": "LOW (MITIGATED)", + "status": "MITIGATED", + "description": "phase-runner.sh:143 hardcodes RESULTS_FILE with implement-results.json. After the change, prompts tell agents to write to a prefixed filename, but the mock still uses the old name.", + "mitigation_applied": "TASK-3-3 covers updating phase-runner.sh to use ${EGG_ISSUE_NUMBER:-local}-implement-results.json.", + "residual_risk": "NEGLIGIBLE" }, { "id": "R-6", - "title": "Mode command changes have lower reach than orchestrator prompt changes", - "category": "effectiveness", - "description": "Agent mode commands (sandbox/.claude/commands/*.md) are slash commands that agents invoke manually. In pipeline mode, agents receive orchestrator-generated prompts automatically and may not invoke slash commands. If only mode commands were implemented (without orchestrator prompt changes), the impact would be minimal since pipeline agents typically do not activate mode commands. This risk is about implementation ordering, not a failure mode.", - "likelihood": "Medium", - "impact": "Medium", - "risk_score": "Low-Medium", - "affected_files": [ - "sandbox/.claude/commands/tester-mode.md", - "sandbox/.claude/commands/integrator-mode.md", - "sandbox/.claude/commands/documenter-mode.md", - "sandbox/.claude/commands/coder-mode.md" - ], - "mitigation": { - "strategy": "Prioritize orchestrator prompt injection (Phase 1)", - "steps": [ - "The architect correctly identifies orchestrator prompt injection (Phase 1) as the highest-leverage change — it must be implemented", - "Mode command updates (Phase 2) are supplementary and reinforce the orchestrator hints", - "The coder should implement Phase 1 first, then Phase 2, then Phase 3", - "If the implementation budget is tight, Phase 1 alone delivers most of the value" - ], - "residual_risk": "Low — with Phase 1 implemented, checkpoint awareness reaches all pipeline agents automatically" - }, - "human_review_required": false + "title": "Prompt-agent path mismatch if agents ignore prompt instructions", + "category": "Robustness", + "likelihood": "LOW", + "impact": "MEDIUM", + "risk_score": "LOW (MITIGATED)", + "status": "MITIGATED", + "description": "Agent output file paths are communicated via prompt instructions. If an agent writes to a different path, readers won't find the output at the expected prefixed path.", + "mitigation_applied": "AD-3 keeps backward-compat fallback permanently. This handles both transition and misbehaving agents.", + "residual_risk": "NEGLIGIBLE" }, { "id": "R-7", - "title": "Deferred handoff enrichment creates a known but acceptable gap", - "category": "completeness", - "description": "The architect deferred items 5-8 from the issue (slash command, handoff enrichment, revision summaries, egg-agent-context wrapper). The most notable deferral is checkpoint_ids in handoff data (item 6). Without this, agents query checkpoints by pipeline ID or issue number rather than receiving exact checkpoint IDs in their handoff data. This means agents could theoretically see stale checkpoints from prior pipeline runs for the same issue.", - "likelihood": "Medium", - "impact": "Low", - "risk_score": "Low", - "affected_files": [], - "mitigation": { - "strategy": "Accept deferral — query-based discovery is sufficient", - "steps": [ - "Querying by $EGG_PIPELINE_ID scopes results to the current pipeline run, which is sufficient for the primary use case", - "The architect's TD-5 correctly identifies the race condition risk with checkpoint write timing — handoff enrichment should not be attempted until that is understood", - "A follow-up issue should be filed to track handoff enrichment as future work", - "The coder should NOT attempt to implement handoff enrichment in this PR" - ], - "residual_risk": "Low — query-based discovery is reliable for the primary use cases" - }, - "human_review_required": false + "title": "Test coverage for wrapper module changes (test_signals.py)", + "category": "Test Infrastructure", + "likelihood": "MEDIUM", + "impact": "LOW", + "risk_score": "LOW (MITIGATED)", + "status": "MITIGATED", + "description": "test_signals.py:86 mocks save_agent_output. If the function signature changes, the mock needs updating.", + "mitigation_applied": "TASK-3-5 added in architect revision 2 to update test_signals.py.", + "residual_risk": "NEGLIGIBLE" }, { "id": "R-8", - "title": "No security risks identified", - "category": "security", - "description": "The changes are limited to prompt text and markdown content. No new user inputs are processed, no new API endpoints are added, no authentication or authorization logic is modified, and no new dependencies are introduced. The checkpoint CLI already exists and is accessible to all agents — we are only making agents aware of it. There is no privilege escalation or information disclosure risk since agents already have access to all checkpoints in their pipeline.", - "likelihood": "N/A", - "impact": "N/A", - "risk_score": "None", - "affected_files": [], - "mitigation": { - "strategy": "None needed", - "steps": [] - }, - "human_review_required": false - } - ], - - "areas_requiring_human_review": [ + "title": "Plan draft (871-plan.md) does not reflect architect revision 2", + "category": "Process / Completeness", + "likelihood": "HIGH", + "impact": "HIGH", + "risk_score": "HIGH", + "status": "OPEN", + "description": "The plan draft at .egg-state/drafts/871-plan.md is still on revision 1. It does not include Phase 1.5 (wrapper layer) or the wrapper layer files (orchestrator/handoffs.py, orchestrator/dispatch.py, orchestrator/routes/signals.py). It also does not include TASK-3-5 (test_signals.py). If the plan draft is what gets turned into SDLC contract tasks, the wrapper layer work will be omitted — reproducing the exact problem described in R-1.", + "affected_artifacts": [ + ".egg-state/drafts/871-plan.md (Phases section, Files Modified table, yaml-tasks block)" + ], + "mitigation": "The plan draft must be regenerated or manually updated to include: (1) Phase 1.5 with TASK-1.5-1, TASK-1.5-2, TASK-1.5-3, (2) TASK-3-5 in Phase 3, (3) handoffs.py, dispatch.py, signals.py, test_signals.py in Files Modified table. The architect's revision 2 JSON has the correct content — the plan draft needs to match it.", + "rollback_plan": "N/A — this is a process fix, not a code change." + }, { - "area": "Prompt wording review", - "reason": "The exact phrasing of checkpoint hints in orchestrator prompts directly affects agent behavior. Overly directive language causes token waste on unnecessary queries; overly vague language gets ignored. The human reviewer should evaluate whether the suggested commands and framing strike the right balance between discoverability and non-intrusiveness. This is a judgment call best informed by observing agent behavior in production.", - "files": ["orchestrator/routes/pipelines.py"], - "decision_needed": "Approve the specific wording of checkpoint hints before merge" + "id": "R-9", + "title": "Two additional hardcoded tester-output.json references in coder prompt builder", + "category": "Completeness", + "likelihood": "MEDIUM", + "impact": "LOW", + "risk_score": "LOW", + "status": "OPEN", + "description": "orchestrator/routes/pipelines.py has two hardcoded references to tester-output.json in the coder prompt builder that are NOT covered by any task: (1) line 2196 in a revision instruction telling coder to 'Check .egg-state/agent-outputs/tester-output.json for test failures and gaps', (2) line 2790 in a revision checklist with the same instruction. TASK-2-1 only covers architect, integrator, and risk_analyst prompt builders. These references are in the coder prompt builder.", + "affected_files": [ + "orchestrator/routes/pipelines.py:2196 (coder revision instruction)", + "orchestrator/routes/pipelines.py:2790 (coder revision checklist)" + ], + "mitigation": "Expand TASK-2-1 scope to include ALL prompt builders that reference agent output filenames, not just architect/integrator/risk_analyst. Or create a dedicated sweep task. Impact is LOW because tester gap data is injected programmatically into the coder prompt by _read_tester_gaps() — the hardcoded path is a convenience reference. With the permanent fallback (AD-3), the agent would get a 'file not found' but the data is already in the prompt.", + "rollback_plan": "N/A — low severity." }, { - "area": "Test coverage for new prompt content", - "reason": "The existing test suite (test_pipeline_prompts.py, 2029 lines) is comprehensive but was written before checkpoint hints existed. The coder should add test assertions for the new checkpoint-related content in each affected function. The human reviewer should verify these tests exist and are meaningful (not just 'assert checkpoint in result').", - "files": ["orchestrator/tests/test_pipeline_prompts.py"], - "decision_needed": "Verify adequate test coverage in PR review" + "id": "R-10", + "title": "Plan incorrectly lists two health check test files as needing updates", + "category": "Implementation Accuracy", + "likelihood": "LOW", + "impact": "LOW", + "risk_score": "LOW", + "status": "OPEN", + "description": "TASK-3-2 lists test_health_check_tester_coverage.py and test_health_check_tier1_advanced.py as needing updates for 'mock file creation'. Both tests create architect-output.json in .egg-state/DRAFTS/ (not agent-outputs/). The health check _check_plan_outputs() scans the drafts/ directory via iterdir() matching files with 'plan' or 'architect' in the name (phase_output.py:155). Since the change only namespaces files in agent-outputs/ and checks/, these test files are NOT affected.", + "evidence": [ + "test_health_check_tier1_advanced.py:434 creates file at Path(tmpdir) / '.egg-state' / 'drafts' / 'architect-output.json'", + "test_health_check_tester_coverage.py:235-237 creates file at Path(tmp) / 'repo' / '.egg-state' / 'drafts' / 'architect-output.json'", + "phase_output.py:152-156 _check_plan_outputs() scans drafts_dir.iterdir() for files with 'plan' or 'architect' in name" + ], + "mitigation": "Remove test_health_check_tier1_advanced.py and test_health_check_tester_coverage.py from TASK-3-2 scope. These tests verify health check behavior against the drafts/ directory, which is unrelated to the agent-outputs/ namespacing change. Making unnecessary changes risks introducing errors.", + "rollback_plan": "N/A — the tests don't need changing." }, { - "area": "Deferred scope acknowledgment", - "reason": "The issue lists 8 items but the implementation covers only items 1-4. The architect's justification for deferring items 5-8 is technically sound (TD-5, TD-6), but the issue author may have had different prioritization. The human reviewer should confirm they are comfortable with the deferred scope and whether follow-up issues should be filed.", - "files": [], - "decision_needed": "Confirm items 5-8 can be deferred to follow-up issues" + "id": "R-11", + "title": "test_health_check_tier2.py references globally-named agent output file", + "category": "Test Infrastructure", + "likelihood": "LOW", + "impact": "LOW", + "risk_score": "LOW", + "status": "OPEN (no action required)", + "description": "orchestrator/tests/test_health_check_tier2.py:239 creates coder-output.json in agent-outputs/ and asserts at line 254 that 'coder-output.json' is a key in ctx.agent_outputs. The health context reads via iterdir() (context.py:192), so it's filename-agnostic and the test will continue to pass regardless of the naming change. This file is not listed in the architect's files_modified table. The test does NOT strictly need updating since it creates its own fixture data with an arbitrary name.", + "affected_files": [ + "orchestrator/tests/test_health_check_tier2.py:239,254-255" + ], + "mitigation": "No action required. The test creates its own fixture data and verifies that the health context can read from agent-outputs/ — it doesn't test the naming convention. Including it in the plan would be over-scoping.", + "rollback_plan": "N/A" } ], - "rollback_plan": { - "strategy": "Simple git revert", - "description": "All changes are additive text in prompt templates and markdown files. A single git revert of the implementation commit(s) fully restores the previous behavior. No database migrations, no schema changes, no configuration changes, and no dependency changes require unwinding.", - "steps": [ - "1. Identify the merge commit(s) from the PR", - "2. git revert ", - "3. Push the revert and verify agent prompts no longer contain checkpoint hints", - "4. No downstream cleanup needed — agents simply won't see checkpoint hints in subsequent sessions" - ], - "data_impact": "None. No data is created, modified, or migrated by these changes.", - "service_impact": "None. The orchestrator does not need to be restarted — prompt changes take effect on the next agent session." + "security_assessment": { + "risk_level": "NONE", + "details": "This change is purely a file naming convention update within the .egg-state directory. No new input validation, authentication, authorization, or data handling code is introduced. The identifier used in path construction comes from issue_number (integer) or pipeline_id (server-generated string), both of which are trusted internal values. Gateway and phase filter enforcement layers use directory-level wildcards that are transparent to file renames. No path traversal risk since the identifier is an integer or a controlled string, and paths are constructed via Path() object joining, not string interpolation." }, - "risk_matrix_summary": { - "high_risks": 0, - "medium_risks": 0, - "low_risks": 7, - "no_risk": 1, - "recommendation": "Proceed with implementation. All identified risks are low severity. The medium-likelihood items (R-1: test breakage, R-6: mode command reach, R-7: deferred scope gap) all have low impact and straightforward mitigations. No risks warrant blocking the implementation or escalating to a human decision before proceeding." + "performance_assessment": { + "risk_level": "NEGLIGIBLE", + "details": "The backward-compat fallback adds one additional file existence check (Path.exists() -> one stat() syscall) per read when the new prefixed file doesn't exist. This is negligible. No new I/O patterns, database queries, or network calls are introduced." }, - "architect_assessment_review": { - "agreement": [ - "Option A (prompt-only changes) is the correct approach — lowest risk with highest leverage", - "Orchestrator prompt injection is correctly identified as the highest-leverage change point", - "Brief nudge + role-specific one-liner (TD-2) avoids duplicating the checkpoint.md rule content", - "Deferring handoff enrichment (TD-5) is correct given checkpoint write timing uncertainty", - "Excluding egg-agent-context wrapper and slash command (TD-6) avoids over-engineering", - "Including coder-mode.md with revision-cycle-only hint (TD-7) adds value without noise" - ], - "concerns": [ - "No significant concerns with the architect's analysis. The approach is well-scoped and the risks identified in the architect output (R-1 through R-4) align with this assessment." + "rollback_plan": { + "strategy": "Git revert of the PR", + "steps": [ + "Revert the PR on main", + "Since all shared functions default to identifier=None (backward compat), reverting writers restores old global paths immediately", + "Any in-flight pipelines on the new code that already wrote prefixed files: readers revert to old-path-only behavior and prefixed files are orphaned (harmless)", + "Clean approach: revert the entire PR. In-flight pipelines may need manual restart but this is standard pipeline failure recovery" ], - "additional_risks_identified": [ - "R-5 (checkpoint CLI failures confusing agents) — minor, pre-existing condition", - "R-6 (mode command reach in pipeline mode) — design consideration about implementation ordering, not a new risk" - ] + "estimated_blast_radius": "Low — only affects .egg-state file I/O within the SDLC pipeline. No production code, gateway enforcement, or user-facing behavior is impacted." }, - "recommendations_for_coder": [ - "Prioritize Phase 1 (orchestrator prompt injection in pipelines.py) — it is the highest-leverage change and the most likely to need test updates", - "Run pytest orchestrator/tests/test_pipeline_prompts.py before and after changes to catch test breakage early", - "Keep checkpoint hints to 1-2 lines per injection point — reference the checkpoint.md rule for details rather than duplicating workflow blocks", - "Use suggestive language ('For additional context, review...') not imperative ('You must review...')", - "Add new test assertions for checkpoint content in the affected test classes (TestBuildRoleContext, TestBuildAgentPromptRoleContext, TestBuildPhaseScopedPromptOverview)", - "Do NOT implement handoff enrichment (checkpoint_ids in AgentOutput) — that is explicitly deferred per architect TD-5", - "Check the status of PR #893 before starting — if merged, rebase onto main" - ] + "areas_requiring_human_review": [ + { + "area": "Plan draft synchronization with architect revision 2 (R-8)", + "priority": "HIGH", + "reason": "The plan draft (871-plan.md) must be updated to include Phase 1.5 (wrapper layer) before it becomes SDLC tasks. The architect's JSON output has the correct content, but the markdown plan has not been regenerated. Without this update, the implementation will miss 3 critical files (handoffs.py, dispatch.py, signals.py) and 1 test file (test_signals.py)." + }, + { + "area": "Coder prompt tester-output.json references (R-9)", + "priority": "LOW", + "reason": "Two hardcoded references in coder prompt builder at pipelines.py:2196 and 2790 are not covered by any task. Impact is low because tester gap data is injected programmatically. Reviewer should decide whether to expand TASK-2-1 scope or accept the minor discrepancy." + }, + { + "area": "Removal of two test files from TASK-3-2 scope (R-10)", + "priority": "LOW", + "reason": "test_health_check_tier1_advanced.py and test_health_check_tester_coverage.py create files in drafts/, not agent-outputs/. They don't need updating. Reviewer should confirm the health check only scans drafts/ (verified at phase_output.py:149-155)." + } + ], + + "recommendation": { + "proceed": true, + "overall_assessment": "The proposed change is well-designed, follows established conventions, and has a solid backward-compat strategy. The architect's revision 2 successfully addresses all findings from revision 1. The remaining risks are LOW severity with one HIGH process risk (R-8 — plan draft sync) that must be addressed before implementation begins.", + "conditions": [ + "CRITICAL: Update 871-plan.md to include Phase 1.5 (wrapper layer) and TASK-3-5 (test_signals.py) before generating SDLC tasks (R-8)", + "Consider expanding TASK-2-1 scope to cover coder prompt builder tester-output.json references at lines 2196 and 2790 (R-9)", + "Remove test_health_check_tier1_advanced.py and test_health_check_tester_coverage.py from TASK-3-2 scope — they create files in drafts/, not agent-outputs/ (R-10)", + "Verify TASK-2-5 acceptance criterion — pipelines.py doesn't call load_agent_output/save_agent_output/collect_handoff_data directly (verified: no imports found). TASK-2-5 should be reworded to cover hardcoded path strings and inline Path constructions, not function calls" + ] + } } diff --git a/.egg-state/checks/implement-results.json b/.egg-state/checks/implement-results.json index fdbb03489..21fb6ab9f 100644 --- a/.egg-state/checks/implement-results.json +++ b/.egg-state/checks/implement-results.json @@ -4,12 +4,12 @@ { "name": "lint", "passed": true, - "output": "==> Ruff check...\nAll checks passed!\n==> Ruff format check...\n429 files already formatted\n==> Mypy...\nSuccess: no issues found in 118 source files\n==> Shellcheck...\n==> Custom checks... all passed" + "output": "==> Ruff check...\nAll checks passed!\n==> Ruff format check...\n430 files already formatted\n==> Mypy...\nSuccess: no issues found in 118 source files\n==> Shellcheck...\n==> Yamllint...\nSKIP: yamllint not installed\n==> Hadolint...\nSKIP: hadolint not installed\n==> Actionlint...\nSKIP: actionlint not installed\n==> Custom checks...\n bin-symlinks...\nOK: All 2 bin/ symlinks are valid\n claude-imports...\nWarning: host-services directory not found\n container-host-boundary...\nOK: No forbidden host-services imports found in sandbox\n container-paths...\nOK: No problematic sys.path patterns found\n docker-and-claude-invocations...\nOK: No docker/claude invocation violations found\n gh-cli-usage...\nWarning: host-services directory not found\n hardcoded-ports...\nOK: No hardcoded port numbers found\n llm-api-calls...\nOK: No direct LLM API usage found outside sandbox\n model-versions...\nOK: No non-alias Claude model references found\n reviewer-job-names...\nOK: All reviewer jobs use the required naming prefix\n workflow-secrets...\nOK: No untrusted script execution with secrets found in workflows" }, { "name": "test", "passed": true, - "output": "=========== 6685 passed, 85 skipped, 4 warnings in 73.80s (0:01:13) ============\n\nFixed: 2 docker_client test failures caused by sys.modules mock pollution from test_checkpoint_discovery.py. Added early docker import in orchestrator/tests/conftest.py to ensure real docker module is in sys.modules before test collection." + "output": "==> Running unit tests...\npytest tests/ gateway/tests/ orchestrator/tests/ -v\n6692 passed, 85 skipped, 4 warnings in 73.24s (0:01:13)" }, { "name": "security", diff --git a/.egg-state/contracts/871.json b/.egg-state/contracts/871.json new file mode 100644 index 000000000..69fa8eca1 --- /dev/null +++ b/.egg-state/contracts/871.json @@ -0,0 +1,488 @@ +{ + "schemaVersion": "1.0", + "issue": { + "number": 871, + "title": "Issue #871", + "url": "https://github.com/jwbron/egg/issues/871" + }, + "pipeline_id": null, + "current_phase": "refine", + "acceptance_criteria": [], + "phases": [ + { + "id": "phase-1", + "name": "Core path construction", + "status": "pending", + "review_cycles": 0, + "max_cycles": 3, + "escalated": false, + "escalation_reason": null, + "tasks": [ + { + "id": "task-1-1", + "description": "Add optional identifier parameter (int | str | None) to load_agent_output() in shared/egg_contracts/orchestrator.py. When identifier is provided, construct path as {identifier}-{role.value}-output.json. Add fallback \u2014 if prefixed path does not exist, try old {role.value}-output.json path. When identifier is None, use old path directly (backward compat).", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "load_agent_output(repo, role, identifier=871) reads from 871-{role}-output.json, falling back to {role}-output.json. load_agent_output(repo, role) reads from {role}-output.json (unchanged behavior).", + "files_affected": [ + "shared/egg_contracts/orchestrator.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-1-2", + "description": "Add optional identifier parameter to save_agent_output() in shared/egg_contracts/orchestrator.py. When identifier is provided, write to {identifier}-{role.value}-output.json. When None, write to {role.value}-output.json (backward compat).", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "save_agent_output(repo, role, data, identifier=871) writes to 871-{role}-output.json. save_agent_output(repo, role, data) writes to {role}-output.json.", + "files_affected": [ + "shared/egg_contracts/orchestrator.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-1-3", + "description": "Add optional identifier parameter to collect_handoff_data() in shared/egg_contracts/orchestrator.py and pass it through to load_agent_output() for each dependency.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "collect_handoff_data(repo, role, identifier=871) reads dependency outputs from prefixed paths with fallback.", + "files_affected": [ + "shared/egg_contracts/orchestrator.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + } + ], + "dependencies": [], + "review_feedback": [] + }, + { + "id": "phase-2", + "name": "Orchestrator wrapper and caller updates", + "status": "pending", + "review_cycles": 0, + "max_cycles": 3, + "escalated": false, + "escalation_reason": null, + "tasks": [ + { + "id": "task-2-1", + "description": "Add optional identifier parameter to all wrapper functions in orchestrator/handoffs.py \u2014 save_agent_output() (line 152), load_agent_output_data() (line 171), collect_handoff_data() (line 193), and get_handoff_env_var() (line 227). Each wrapper must accept identifier and forward it to the underlying shared library function.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "All four wrapper functions accept an optional identifier parameter and pass it through. Calling save_agent_output(repo, output, identifier=871) results in the shared save_agent_output receiving identifier=871.", + "files_affected": [ + "orchestrator/handoffs.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-2-2", + "description": "Update orchestrator/dispatch.py \u2014 PipelineDispatcher.complete_agent() (line 219) must pass self.contract_key as identifier to save_agent_output(). PipelineDispatcher.get_handoff_data() (line 264) must pass self.contract_key as identifier to collect_handoff_data(). The contract_key property (line 118) already returns issue_number for issue-mode or pipeline_id for local-mode.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "PipelineDispatcher.complete_agent() calls save_agent_output(self.repo_path, contract_role, outputs, identifier=self.contract_key). PipelineDispatcher.get_handoff_data() calls collect_handoff_data(self.repo_path, contract_role, identifier=self.contract_key). Verified by grep showing no calls without identifier.", + "files_affected": [ + "orchestrator/dispatch.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-2-3", + "description": "Update orchestrator/routes/signals.py \u2014 handle_complete_signal() (line 193) must pass the pipeline identifier to save_agent_output(). Derive identifier from the pipeline object (pipeline.issue_number for issue-mode, pipeline_id for local-mode).", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "save_agent_output at line 193 receives identifier. Verified by grep showing no calls without identifier.", + "files_affected": [ + "orchestrator/routes/signals.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + } + ], + "dependencies": [], + "review_feedback": [] + }, + { + "id": "phase-3", + "name": "Prompt builder and reader updates", + "status": "pending", + "review_cycles": 0, + "max_cycles": 3, + "escalated": false, + "escalation_reason": null, + "tasks": [ + { + "id": "task-3-1", + "description": "Update _build_agent_prompt() in orchestrator/routes/pipelines.py for architect, integrator, and risk_analyst roles. Replace hardcoded output filenames (e.g., \"architect-output.json\") with dynamically constructed {identifier}-prefixed paths using the issue_number/pipeline_id already available in the prompt builder context. Affects lines ~2509, ~2524, ~2590.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "Prompts contain e.g. \"871-architect-output.json\" for issue 871. Verified by running test_pipeline_prompts.py.", + "files_affected": [ + "orchestrator/routes/pipelines.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-3-2", + "description": "Update _build_checker_prompt(), _build_check_and_fix_prompt(), and _build_autofix_prompt() to replace hardcoded \"implement-results.json\" with {identifier}-implement-results.json. Add identifier parameter if not already available from pipeline context. Affects lines ~4063, ~4275, and related autofix prompt code.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "Checker and autofix prompts contain e.g. \"871-implement-results.json\".", + "files_affected": [ + "orchestrator/routes/pipelines.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-3-3", + "description": "Update _synthesize_plan_draft_from_outputs() (~line 4335) to construct prefixed filenames in the agent_files list. Derive identifier from the existing pipeline_mode/issue_number parameters. Fall back to old filenames if prefixed files don't exist.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "Function reads from {identifier}-architect-output.json first, falls back to architect-output.json. Same for risk_analyst.", + "files_affected": [ + "orchestrator/routes/pipelines.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-3-4", + "description": "Update _read_tester_gaps() (~line 1619) to accept identifier parameter and construct prefixed path with fallback. Update both callers (~line 3139 and ~line 5173) to pass identifier from pipeline context.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "Function reads from {identifier}-tester-output.json first, falls back to tester-output.json.", + "files_affected": [ + "orchestrator/routes/pipelines.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-3-5", + "description": "Update all remaining callers of load_agent_output(), save_agent_output(), and collect_handoff_data() in orchestrator/routes/pipelines.py to pass the identifier from pipeline context. This covers any callers not addressed by TASK-3-1 through TASK-3-4.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "No calls to load_agent_output, save_agent_output, or collect_handoff_data without identifier in any orchestrator code path. grep across orchestrator/ confirms all invocations pass identifier.", + "files_affected": [ + "orchestrator/routes/pipelines.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + } + ], + "dependencies": [], + "review_feedback": [] + }, + { + "id": "phase-4", + "name": "Tests", + "status": "pending", + "review_cycles": 0, + "max_cycles": 3, + "escalated": false, + "escalation_reason": null, + "tasks": [ + { + "id": "task-4-1", + "description": "Update test_pipeline_prompts.py \u2014 change all assertions that check for \"implement-results.json\" in prompt content to expect prefixed filenames. Update TestReadTesterGaps tests to create/expect prefixed filenames.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "All tests in test_pipeline_prompts.py pass with prefixed filenames.", + "files_affected": [ + "orchestrator/tests/test_pipeline_prompts.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-4-2", + "description": "Update test_health_check_tester_coverage.py and test_health_check_tier1_advanced.py \u2014 update mock file creation to use prefixed filenames where these tests create/reference architect-output.json or similar files.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "Health check tests pass with prefixed filenames.", + "files_affected": [ + "orchestrator/tests/test_health_check_tester_coverage.py", + "orchestrator/tests/test_health_check_tier1_advanced.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-4-3", + "description": "Update orchestrator/tests/test_signals.py \u2014 the mock at line 86 patches save_agent_output. Update this mock and any assertions to account for the new identifier parameter in the function signature.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "test_signals.py tests pass. Mock correctly matches updated save_agent_output signature.", + "files_affected": [ + "orchestrator/tests/test_signals.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-4-4", + "description": "Update integration_tests/local_pipeline/mock-sandbox/phase-runner.sh \u2014 update RESULTS_FILE assignment (~line 143) to use prefixed filename derived from pipeline context environment variables (EGG_ISSUE_NUMBER or EGG_PIPELINE_ID).", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "Integration test writes to {identifier}-implement-results.json. Local pipeline integration test passes.", + "files_affected": [ + "integration_tests/local_pipeline/mock-sandbox/phase-runner.sh" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-4-5", + "description": "Add tests for backward-compat fallback in load_agent_output(). Test cases \u2014 (1) prefixed path exists and is returned, (2) only old global path exists and fallback reads it, (3) neither exists and empty dict returned, (4) both exist and prefixed path takes priority.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "All 4 fallback test cases pass. Tests placed alongside existing orchestrator contract tests.", + "files_affected": [ + "shared/egg_contracts/tests/test_orchestrator.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + } + ], + "dependencies": [], + "review_feedback": [] + }, + { + "id": "phase-5", + "name": "Documentation and agent mode commands", + "status": "pending", + "review_cycles": 0, + "max_cycles": 3, + "escalated": false, + "escalation_reason": null, + "tasks": [ + { + "id": "task-5-1", + "description": "Update docs/guides/sdlc-pipeline.md \u2014 change references to {role}-output.json naming convention (lines ~362, ~717) to document {identifier}-{role}-output.json.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "Documentation reflects new naming convention with examples.", + "files_affected": [ + "docs/guides/sdlc-pipeline.md" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-5-2", + "description": "Update docs/guides/agent-development.md \u2014 update the agent output file naming tree (lines ~215-218) to show {identifier}-prefixed filenames.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "File tree example shows prefixed filenames.", + "files_affected": [ + "docs/guides/agent-development.md" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-5-3", + "description": "Update docs/architecture/orchestrator.md \u2014 update implement-results.json reference (line ~121) to show prefixed filename.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "Architecture doc reflects new naming.", + "files_affected": [ + "docs/architecture/orchestrator.md" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-5-4", + "description": "Update sandbox/.claude/commands/ agent mode files (coder-mode.md, tester-mode.md, integrator-mode.md, documenter-mode.md) \u2014 replace all hardcoded {role}-output.json paths with a note that paths are prefixed with the issue/pipeline identifier.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "All agent mode command files reference {identifier}-prefixed paths.", + "files_affected": [ + "sandbox/.claude/commands/coder-mode.md", + "sandbox/.claude/commands/tester-mode.md", + "sandbox/.claude/commands/integrator-mode.md", + "sandbox/.claude/commands/documenter-mode.md" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + }, + { + "id": "task-5-5", + "description": "Update docstring in orchestrator/health_checks/tier1/phase_output.py (line ~10) to reflect new naming convention.", + "status": "pending", + "commit": null, + "checkpoint_id": null, + "notes": "", + "acceptance_criteria": "Docstring mentions {identifier}-prefixed filenames.", + "files_affected": [ + "orchestrator/health_checks/tier1/phase_output.py" + ], + "review_cycles": 0, + "max_cycles": 3, + "escalated": false + } + ], + "dependencies": [], + "review_feedback": [] + } + ], + "decisions": [], + "workflow_owner": null, + "audit_log": [], + "refine_review_cycles": 0, + "refine_review_feedback": "", + "plan_review_cycles": 0, + "plan_review_feedback": "", + "pr": null, + "feedback": null, + "phase_configs": null, + "agent_executions": [ + { + "role": "coder", + "phase_id": null, + "status": "complete", + "started_at": "2026-02-23T08:12:52.633680Z", + "completed_at": "2026-02-23T08:29:08.592986Z", + "commit": null, + "checkpoint_id": null, + "outputs": {}, + "error": null, + "retry_count": 0, + "conflicts": [] + }, + { + "role": "tester", + "phase_id": null, + "status": "complete", + "started_at": "2026-02-23T08:29:08.595385Z", + "completed_at": "2026-02-23T08:36:15.806011Z", + "commit": null, + "checkpoint_id": null, + "outputs": {}, + "error": null, + "retry_count": 0, + "conflicts": [] + }, + { + "role": "documenter", + "phase_id": null, + "status": "complete", + "started_at": "2026-02-23T08:29:08.594891Z", + "completed_at": "2026-02-23T08:33:57.425052Z", + "commit": null, + "checkpoint_id": null, + "outputs": {}, + "error": null, + "retry_count": 0, + "conflicts": [] + }, + { + "role": "integrator", + "phase_id": null, + "status": "complete", + "started_at": "2026-02-23T08:36:15.807700Z", + "completed_at": "2026-02-23T08:46:01.735213Z", + "commit": null, + "checkpoint_id": null, + "outputs": {}, + "error": null, + "retry_count": 0, + "conflicts": [] + }, + { + "role": "reviewer_contract", + "phase_id": null, + "status": "complete", + "started_at": null, + "completed_at": "2026-02-23T08:53:37.715915Z", + "commit": null, + "checkpoint_id": null, + "outputs": {}, + "error": null, + "retry_count": 0, + "conflicts": [] + }, + { + "role": "reviewer_code", + "phase_id": null, + "status": "complete", + "started_at": null, + "completed_at": "2026-02-23T08:58:13.622549Z", + "commit": null, + "checkpoint_id": null, + "outputs": {}, + "error": null, + "retry_count": 0, + "conflicts": [] + } + ], + "multi_agent_config": null +} diff --git a/.egg-state/drafts/871-analysis.md b/.egg-state/drafts/871-analysis.md new file mode 100644 index 000000000..78f4c6a4b --- /dev/null +++ b/.egg-state/drafts/871-analysis.md @@ -0,0 +1,190 @@ +# Analysis: Namespace .egg-state files per-pipeline to prevent merge conflicts + +> Issue: #871 | Phase: refine + +## Problem Statement + +The `.egg-state/` directory has inconsistent file namespacing. Most subdirectories (`contracts/`, `drafts/`, `reviews/`) use per-issue or per-pipeline prefixes in filenames (e.g., `850.json`, `850-analysis.md`), so concurrent pipelines on different branches never conflict when merged. However, two subdirectories still use globally-named files: + +- `agent-outputs/`: `architect-output.json`, `integrator-output.json`, `risk_analyst-output.json` +- `checks/`: `implement-results.json` + +When multiple pipelines run concurrently and their branches merge to main, these globally-named files produce merge conflicts because they contain issue-specific data at the same file path. PR #869 demonstrated this with 4 such conflicts. + +The desired outcome is that all `.egg-state/` files use per-issue/pipeline prefixed filenames, eliminating merge conflicts entirely. + +## Current Behavior + +### Correctly namespaced directories (no conflicts) + +| Directory | Pattern | Example | Code | +|-----------|---------|---------|------| +| `contracts/` | `{identifier}.json` | `850.json` | `shared/egg_contracts/loader.py:44` — `get_contract_path()` | +| `drafts/` | `{identifier}-{phase}.md` | `850-analysis.md` | `orchestrator/routes/pipelines.py:1060` — `_get_draft_path()` | +| `reviews/` | `{identifier}-{phase}-{type}-review.json` | `850-implement-code-review.json` | `orchestrator/routes/pipelines.py:1035` — `_verdict_path_for_type()` | + +### Globally-named files (cause conflicts) + +**`agent-outputs/` handoff files** — Written by agents via prompt instructions, read by the orchestrator: + +| File | Writer | Reader | Writer code | Reader code | +|------|--------|--------|-------------|-------------| +| `architect-output.json` | Architect agent (prompt) | `_synthesize_plan_draft_from_outputs()` | `pipelines.py:2524` | `pipelines.py:4344-4345` | +| `integrator-output.json` | Integrator agent (prompt) | Downstream consumers | `pipelines.py:2509` | — | +| `risk_analyst-output.json` | Risk analyst agent (prompt) | `_synthesize_plan_draft_from_outputs()` | `pipelines.py:2590` | `pipelines.py:4346` | + +There are also additional globally-named handoff files that were not listed in the issue but follow the same pattern via `load_agent_output()` and `save_agent_output()` in `shared/egg_contracts/orchestrator.py:323-369`: +- `coder-output.json` — Written by coder, read by tester/documenter/integrator +- `tester-output.json` — Written by tester, read by `_read_tester_gaps()` at `pipelines.py:1631` and by coder revision prompts at `pipelines.py:2196`, `pipelines.py:2790` +- `documenter-output.json` — Written by documenter + +These additional files are also globally-named and would cause the same conflict pattern. + +**`checks/` results file** — Written by the checker+autofixer agent, read by autofix prompt and referenced in documentation: + +| File | Writer | Reader | Writer code | Reader code | +|------|--------|--------|-------------|-------------| +| `implement-results.json` | Checker agent (prompt) | Autofix prompt, health checks | `pipelines.py:4063` | `pipelines.py:4136` (prompt ref) | + +### How paths are currently constructed + +**Prompt-based paths** (hardcoded strings in agent prompts): +- `pipelines.py:2524` — `"Write your analysis to .egg-state/agent-outputs/architect-output.json."` +- `pipelines.py:2509` — `"Write your integration report to .egg-state/agent-outputs/integrator-output.json."` +- `pipelines.py:2590` — `"Write your risk assessment to .egg-state/agent-outputs/risk_analyst-output.json."` +- `pipelines.py:4063` — `"write results to .egg-state/checks/implement-results.json"` +- `pipelines.py:4275` — Same in `_build_check_and_fix_prompt()` + +**Programmatic paths** (Python code constructing paths): +- `shared/egg_contracts/orchestrator.py:333` — `load_agent_output()`: `repo_path / ".egg-state" / "agent-outputs" / f"{role.value}-output.json"` +- `shared/egg_contracts/orchestrator.py:364` — `save_agent_output()`: Same pattern +- `pipelines.py:4335` — `_synthesize_plan_draft_from_outputs()`: `outputs_dir / filename` where filename is `"architect-output.json"` +- `pipelines.py:1631` — `_read_tester_gaps()`: hardcoded `"tester-output.json"` + +### Gateway and file access patterns + +The gateway's file access enforcement uses directory-level patterns (e.g., `.egg-state/agent-outputs/`) rather than specific filenames: +- `gateway/agent_restrictions.py` — All agent roles have `".egg-state/agent-outputs/"` in their `allowed_write` patterns (lines 192, 243, 273, 301, 329, 373, 383, 393, 403, 449) +- `gateway/phase_filter.py:495,505` — REFINE and PLAN phases allow `".egg-state/agent-outputs/*"` +- `shared/egg_contracts/agent_roles.py` — Role definitions also use directory-level patterns + +Since these patterns are directory-level wildcards, **renaming files within the directory requires no gateway changes**. + +### Readonly mount enforcement + +`shared/egg_container/__init__.py:132` — `_IMPLEMENT_READONLY_DIRS = ("drafts", "contracts", "pipelines", "reviews")`. Neither `agent-outputs` nor `checks` are in the readonly set, so **no mount changes needed**. + +## Constraints + +- **Backward compatibility**: The issue explicitly requires that existing in-flight pipelines not break. Readers should check both old (global) and new (prefixed) paths. +- **Agent-written files**: Several of these files are written by agents responding to prompt instructions (not orchestrator code). Agents write to the exact path specified in their prompt, so prompt changes are required. +- **Programmatic and prompt paths**: Some paths are constructed in Python (`load_agent_output`, `save_agent_output`), others are hardcoded strings in prompt builders. Both need updating. +- **Issue number availability**: The issue number (or pipeline ID for local mode) must be available at every callsite. The prompt builders already receive `issue_number` and `pipeline_id` parameters (or can derive them from `pipeline_mode`). +- **Test coverage**: The test file `orchestrator/tests/test_pipeline_prompts.py` has extensive tests checking prompt content and file operations. The integration test `integration_tests/local_pipeline/mock-sandbox/phase-runner.sh` hardcodes `implement-results.json`. +- **Documentation**: `docs/guides/sdlc-pipeline.md`, `docs/guides/agent-development.md`, `docs/architecture/orchestrator.md`, `sandbox/.claude/commands/tester-mode.md`, `sandbox/.claude/commands/integrator-mode.md` all reference these filenames. + +## Options Considered + +### Option A: Prefix filenames with issue number (matching existing convention) + +**Approach**: Rename all globally-named files to use `{identifier}-{role}-output.json` and `{identifier}-implement-results.json`, matching the convention already used by `contracts/`, `drafts/`, and `reviews/`. Update `load_agent_output()` / `save_agent_output()` to accept an `identifier` parameter. Update all prompt builders to include the identifier in the path. Add backward-compatible fallback reads. + +**Pros**: +- Consistent with the existing namespacing convention used by `contracts/`, `drafts/`, `reviews/` +- Simple and predictable — the identifier is already available at all callsites +- Minimal conceptual overhead — developers already understand the `{identifier}-` prefix pattern + +**Cons**: +- Requires updating both programmatic code and prompt strings (two different code paths) +- Backward-compat fallback adds temporary code debt (can be removed after all in-flight pipelines complete) + +### Option B: Namespace by subdirectory instead of filename prefix + +**Approach**: Move files into per-issue subdirectories: `.egg-state/agent-outputs/{identifier}/architect-output.json` instead of `.egg-state/agent-outputs/{identifier}-architect-output.json`. + +**Pros**: +- Cleaner directory structure — each pipeline's files are grouped together +- Easier to list/cleanup files for a specific pipeline + +**Cons**: +- **Inconsistent with existing convention** — `contracts/`, `drafts/`, `reviews/` all use filename prefixes, not subdirectories +- More changes needed to gateway patterns (currently use `".egg-state/agent-outputs/*"`, would need `".egg-state/agent-outputs/**/*"`) +- More complex backward-compatibility: checking both directory structures +- git doesn't track empty directories, so cleanup is trickier + +### Option C: Use a centralized path builder utility + +**Approach**: Create a single path-building module (e.g., `shared/egg_contracts/paths.py`) that all code uses to resolve `.egg-state/` paths. This would centralize the naming convention and make future changes easier. + +**Pros**: +- Single source of truth for all `.egg-state/` path construction +- Makes it easy to change naming conventions in the future +- Reduces risk of inconsistencies + +**Cons**: +- Over-engineering for this specific fix — the immediate problem is just adding a prefix to 4-6 files +- Still needs prompt string updates (agents write based on prompt instructions, not Python utilities) +- `_get_draft_path()` and `_verdict_path_for_type()` already serve this role for their respective directories + +## Recommended Approach + +**Option A: Prefix filenames with issue number** is recommended. It directly matches the established convention used by `contracts/`, `drafts/`, and `reviews/`, making the codebase consistent. The scope is well-bounded (update `load_agent_output` / `save_agent_output`, update prompt builders, add fallback reads, update tests and docs). + +The implementation touches two categories of code: + +1. **Programmatic paths** (`load_agent_output`, `save_agent_output`, `_synthesize_plan_draft_from_outputs`, `_read_tester_gaps`): Add `identifier` parameter, construct `f"{identifier}-{role.value}-output.json"`. Fallback to old path if new doesn't exist. + +2. **Prompt-embedded paths** (architect, integrator, risk_analyst, checker, check-and-fix prompts): Replace hardcoded filenames with dynamically constructed paths using the issue number / pipeline ID. These prompt builders already receive `pipeline_id` and `pipeline_mode` parameters. + +The backward-compatible fallback in readers is straightforward: try the new prefixed path first, fall back to the old global path. This ensures in-flight pipelines that already wrote to the old paths continue to work. + +## Open Questions + +### Scope of files to namespace + +The issue explicitly lists 4 files: `architect-output.json`, `integrator-output.json`, `risk_analyst-output.json`, and `implement-results.json`. However, the `load_agent_output()` / `save_agent_output()` functions use the same global naming pattern for ALL agent roles (`coder-output.json`, `tester-output.json`, `documenter-output.json`, `task_planner-output.json`, etc.). Should ALL `{role}-output.json` files be namespaced, or only the three listed in the issue? + +**Recommendation**: Namespace all of them. If we only fix the three listed, the remaining files (`coder-output.json`, `tester-output.json`, `documenter-output.json`) will cause the same merge conflicts when pipelines use those roles concurrently. + +### Identifier format for local-mode pipelines + +For issue-mode pipelines, the identifier is the issue number (e.g., `871`). For local-mode pipelines, it's the pipeline ID (e.g., `local-a1b2c3d4`). The existing namespaced directories handle both modes (see `_get_draft_path()` and `_verdict_path_for_type()`). Should we follow the same dual-mode pattern for agent outputs? + +**Recommendation**: Yes, follow the existing pattern exactly. `load_agent_output` and `save_agent_output` would accept an `identifier: int | str` parameter, matching how `get_contract_path()` works. + +### Cleanup of old globally-named files + +After this change ships, old globally-named files (`architect-output.json`, etc.) will remain in `.egg-state/agent-outputs/` on branches that were created before the change. Should we: +1. Add a one-time cleanup migration? +2. Let them age out naturally as branches merge? +3. Add a `.gitignore` entry for the old filenames? + +### Health check references + +`orchestrator/health_checks/tier1/phase_output.py:10` references `architect-output.json` in a docstring. The health check code may also look for this file at the old path. Should health checks be updated to use the new naming, or are they purely informational? + +--- + +*Authored-by: egg* + + +## Decision: Scope of files to namespace + +Which files should be namespaced with issue/pipeline prefix? + +- [ ] Only the 4 files listed in the issue (architect-output.json, integrator-output.json, risk_analyst-output.json, implement-results.json) +- [ ] All agent role output files ({role}-output.json) plus implement-results.json +- [ ] Other (explain in reply) + + +## Decision: Cleanup strategy for old globally-named files + +How should old globally-named files be handled after the change ships? + +- [ ] Let them age out naturally (backward-compat fallback handles reads) +- [ ] Add a one-time cleanup script that renames existing files on active branches +- [ ] Add .gitignore entries for old filenames to prevent accidental creation +- [ ] Other (explain in reply) + +# metadata +complexity_tier: mid diff --git a/.egg-state/drafts/871-plan.md b/.egg-state/drafts/871-plan.md new file mode 100644 index 000000000..db9dfb9d1 --- /dev/null +++ b/.egg-state/drafts/871-plan.md @@ -0,0 +1,397 @@ +# Plan: Namespace .egg-state files per-pipeline to prevent merge conflicts + +> Issue: #871 | Phase: plan | Revision: 2 (addresses plan reviewer feedback) + +## Summary + +Add issue/pipeline identifier prefix to all globally-named files in +`.egg-state/agent-outputs/` and `.egg-state/checks/`, matching the convention +already used by `contracts/`, `drafts/`, and `reviews/`. This eliminates merge +conflicts when concurrent pipelines merge to main (e.g., PR #869 had 4 such +conflicts). + +## Approach + +The implementation follows Option A from the architect analysis: prefix +filenames with the issue/pipeline identifier, matching the `{identifier}-` +convention established by `contracts/`, `drafts/`, and `reviews/`. + +### Scope + +All agent output files are namespaced, not just the 4 listed in the issue. +The `load_agent_output()` / `save_agent_output()` functions use the same +global pattern for ALL agent roles, so fixing only the 3 handoff files leaves +`coder-output.json`, `tester-output.json`, `documenter-output.json`, and +`task_planner-output.json` vulnerable to the same conflict. Since we modify +the shared functions, all roles benefit at zero additional cost. + +Files affected: +- `agent-outputs/`: `{role}-output.json` -> `{identifier}-{role}-output.json` +- `checks/`: `implement-results.json` -> `{identifier}-implement-results.json` + +### Path construction changes + +Three categories of code construct these paths: + +1. **Shared library** (`shared/egg_contracts/orchestrator.py`) — `load_agent_output()`, + `save_agent_output()`, `collect_handoff_data()`. These are the foundational + functions that get an `identifier` parameter added. + +2. **Orchestrator wrapper layer** — `orchestrator/handoffs.py` wraps the shared + functions with `save_agent_output()`, `load_agent_output_data()`, + `collect_handoff_data()`, and `get_handoff_env_var()`. These wrappers must + accept and forward the `identifier` parameter. `orchestrator/dispatch.py` + imports `save_agent_output` and `collect_handoff_data` directly from shared + and calls them at lines 219 and 264. `orchestrator/routes/signals.py` imports + `save_agent_output` from `handoffs` and calls it at line 193. All three files + must be updated to pass `identifier` from pipeline context. + +3. **Prompt-embedded paths** — Hardcoded strings in prompt builders for architect, + integrator, risk_analyst, checker, and check-and-fix agents in `pipelines.py`. + These are replaced with dynamically constructed paths using the issue number + or pipeline ID already available as parameters. + +### Backward compatibility + +Readers use a fallback strategy: try the new prefixed path first, fall back +to the old global path. This ensures in-flight pipelines that already wrote +to old paths continue to work. Writers always write to the new prefixed path. +The fallback can be removed after all in-flight pipelines complete. + +### What does NOT change + +- **Gateway phase filter** (`gateway/phase_filter.py`) — uses directory-level + wildcards (`.egg-state/agent-outputs/*`), so file renames are transparent. +- **Gateway agent restrictions** (`gateway/agent_restrictions.py`) — same + directory-level patterns. +- **Readonly mount enforcement** (`shared/egg_container/__init__.py`) — + `agent-outputs/` and `checks/` are not in `_IMPLEMENT_READONLY_DIRS`. +- **Phase permissions config** (`.egg/phase-permissions.json`) — directory-level. + +## Phases + +### Phase 1: Core path construction (shared library) + +Update the shared utility functions that build agent output file paths. This +is the foundation all other changes depend on. + +The `load_agent_output()` and `save_agent_output()` functions in +`shared/egg_contracts/orchestrator.py` get an optional `identifier` parameter. +When provided, paths use `{identifier}-{role.value}-output.json`. When `None` +(default), the old `{role.value}-output.json` path is used for backward +compatibility. `load_agent_output()` additionally falls back to the old path +when the new path doesn't exist. + +`collect_handoff_data()` similarly gets an `identifier` parameter that it +passes through to `load_agent_output()`. + +### Phase 2: Orchestrator wrapper and caller updates + +Update all orchestrator-level code that calls the shared functions. This covers +three files beyond `pipelines.py` that the risk analyst flagged as HIGH risk: + +**`orchestrator/handoffs.py`** — The wrapper layer. Functions `save_agent_output()` +(line 152), `load_agent_output_data()` (line 171), `collect_handoff_data()` +(line 193), and `get_handoff_env_var()` (line 227) all call shared library +functions without an `identifier` parameter. Each wrapper must accept an optional +`identifier` and forward it to the underlying shared function. + +**`orchestrator/dispatch.py`** — Imports `save_agent_output` and +`collect_handoff_data` directly from `egg_contracts.orchestrator` (lines 29-35). +`PipelineDispatcher.complete_agent()` calls `save_agent_output()` at line 219 +and `get_handoff_data()` calls `collect_handoff_data()` at line 264. The +`PipelineDispatcher` already has `self.pipeline` which provides the pipeline +identifier via `contract_key` (line 118). Both calls must pass this identifier. + +**`orchestrator/routes/signals.py`** — Imports `save_agent_output` from the +handoffs wrapper (line 36). Calls it at line 193 when handling agent completion +signals. The `pipeline_id` is available from the URL parameter and the pipeline +object provides the issue number. Must pass identifier to the wrapper. + +### Phase 3: Prompt builder and reader updates (orchestrator/routes/pipelines.py) + +Update all prompt builders and reader functions in `orchestrator/routes/pipelines.py`: + +**Prompt builders** (write paths embedded in agent instructions): +- `_build_agent_prompt()` for architect, integrator, risk_analyst — replace + hardcoded `{role}-output.json` with `{identifier}-{role}-output.json` +- `_build_checker_prompt()` — replace `implement-results.json` with + `{identifier}-implement-results.json` +- `_build_check_and_fix_prompt()` — same change +- `_build_autofix_prompt()` — same change for any implement-results.json refs + +**Reader functions**: +- `_synthesize_plan_draft_from_outputs()` — use prefixed filenames with + fallback to old names +- `_read_tester_gaps()` — use prefixed path with fallback; update both + callers (~line 3139 and ~line 5173) to pass the identifier + +**Callers**: Update all remaining callers of `load_agent_output`, +`save_agent_output`, and `collect_handoff_data` in `pipelines.py` to pass the +identifier from their pipeline context (`issue_number` for issue mode, +`pipeline_id` for local mode). + +### Phase 4: Tests + +Update existing tests and add new ones: +- `orchestrator/tests/test_pipeline_prompts.py` — update assertions that check + for `implement-results.json` in prompt content; update `TestReadTesterGaps` + tests to use prefixed paths +- `orchestrator/tests/test_health_check_tester_coverage.py` — update mock file + creation to use prefixed filenames +- `orchestrator/tests/test_health_check_tier1_advanced.py` — same +- `orchestrator/tests/test_signals.py` — update the `save_agent_output` mock + at line 86 to match the updated function signature (new `identifier` parameter) +- `integration_tests/local_pipeline/mock-sandbox/phase-runner.sh` — update + `RESULTS_FILE` to use prefixed filename from pipeline context variables +- Add new tests for backward-compat fallback in `load_agent_output()`: + new path preferred, falls back to old, returns empty dict when neither exists + +### Phase 5: Documentation and agent mode commands + +Update all documentation and sandbox agent mode commands that reference old +globally-named filenames: +- `docs/guides/sdlc-pipeline.md` — update `{role}-output.json` convention refs +- `docs/guides/agent-development.md` — update agent output file naming tree +- `docs/architecture/orchestrator.md` — update `implement-results.json` ref +- `sandbox/.claude/commands/coder-mode.md` — update coder-output.json path +- `sandbox/.claude/commands/tester-mode.md` — update coder/tester output paths +- `sandbox/.claude/commands/integrator-mode.md` — update all output file refs +- `sandbox/.claude/commands/documenter-mode.md` — update coder/documenter refs +- `orchestrator/health_checks/tier1/phase_output.py` — update docstring (line 10) + +## Test Strategy + +**Existing test updates**: All tests referencing old filenames must be updated +to expect prefixed filenames. Key test files: +- `test_pipeline_prompts.py` — prompt content assertions, tester gap tests +- `test_health_check_tester_coverage.py` — mock file creation +- `test_health_check_tier1_advanced.py` — mock file creation +- `test_signals.py` — `save_agent_output` mock (line 86) needs updated signature +- `phase-runner.sh` integration test + +**New tests**: Add tests for the backward-compat fallback behavior in +`load_agent_output()`: +1. When prefixed path exists, it is used +2. When only old global path exists, fallback reads it +3. When neither exists, returns empty dict +4. When both exist, prefixed path takes priority + +**Gateway tests**: `tests/gateway/test_agent_restrictions.py` and +`gateway/tests/test_phase_filter.py` use directory-level wildcard patterns, +not specific filenames. These tests pass with any filename within the +`agent-outputs/` directory, so no changes are needed. + +## Risks + +### R-1 (HIGH): Wrapper layer callsites receive identifier=None + +**Likelihood: HIGH | Impact: HIGH** + +Three orchestrator files (`handoffs.py`, `dispatch.py`, `signals.py`) wrap or +directly call `save_agent_output`, `load_agent_output`, and `collect_handoff_data` +from the shared library. If these callers are not updated to pass `identifier`, +the shared functions receive `identifier=None` and fall back to global paths — +silently defeating the entire fix. Phase 2 addresses this with dedicated tasks +for each file. + +### R-2 (MEDIUM): In-flight pipeline breakage + +**Likelihood: LOW | Impact: HIGH** + +Pipelines already running when this change deploys may have written output files +to old global paths. If readers only check the new prefixed paths, those files +are invisible. Mitigated by backward-compat fallback in `load_agent_output()`: +try prefixed path first, fall back to global path. + +### R-3 (MEDIUM): Identifier not available in some code paths + +**Likelihood: LOW | Impact: MEDIUM** + +The `identifier` parameter defaults to `None` in all shared functions, preserving +old behavior for any callsite that can't provide it. All active pipeline code +paths have `issue_number` or `pipeline_id` available. The `PipelineDispatcher` +already exposes `contract_key` (line 118 of `dispatch.py`) that returns the +appropriate identifier for both issue-mode and local-mode pipelines. + +### R-4 (LOW): Agent mode commands reference old paths + +**Likelihood: LOW | Impact: LOW** + +Sandbox agent mode command files reference old filenames. Pipeline-spawned agents +receive correct paths from `_build_agent_prompt()`, so mode commands are secondary. +Phase 5 updates these for consistency. + +## Files Modified + +| File | Phase | Change | +|------|-------|--------| +| `shared/egg_contracts/orchestrator.py` | 1 | Add `identifier` param to `load_agent_output`, `save_agent_output`, `collect_handoff_data`; add fallback logic | +| `orchestrator/handoffs.py` | 2 | Add `identifier` param to `save_agent_output`, `load_agent_output_data`, `collect_handoff_data`, `get_handoff_env_var` wrappers; forward to shared functions | +| `orchestrator/dispatch.py` | 2 | Update `PipelineDispatcher.complete_agent()` (line 219) and `get_handoff_data()` (line 264) to pass `self.contract_key` as identifier | +| `orchestrator/routes/signals.py` | 2 | Update `handle_complete_signal()` (line 193) to pass pipeline identifier to `save_agent_output` | +| `orchestrator/routes/pipelines.py` | 3 | Update prompt builders, reader functions, and all remaining callers to use prefixed paths | +| `orchestrator/tests/test_pipeline_prompts.py` | 4 | Update prompt assertions and tester gap tests | +| `orchestrator/tests/test_health_check_tester_coverage.py` | 4 | Update mock file creation | +| `orchestrator/tests/test_health_check_tier1_advanced.py` | 4 | Update mock file creation | +| `orchestrator/tests/test_signals.py` | 4 | Update `save_agent_output` mock to match new signature | +| `integration_tests/local_pipeline/mock-sandbox/phase-runner.sh` | 4 | Update `RESULTS_FILE` to prefixed path | +| `shared/egg_contracts/tests/test_orchestrator.py` | 4 | Add backward-compat fallback tests | +| `docs/guides/sdlc-pipeline.md` | 5 | Update naming convention references | +| `docs/guides/agent-development.md` | 5 | Update agent output file naming tree | +| `docs/architecture/orchestrator.md` | 5 | Update `implement-results.json` reference | +| `sandbox/.claude/commands/coder-mode.md` | 5 | Update output file path | +| `sandbox/.claude/commands/tester-mode.md` | 5 | Update output file paths | +| `sandbox/.claude/commands/integrator-mode.md` | 5 | Update output file paths | +| `sandbox/.claude/commands/documenter-mode.md` | 5 | Update output file paths | +| `orchestrator/health_checks/tier1/phase_output.py` | 5 | Update docstring | + +--- + +```yaml +# yaml-tasks +pr: + title: "Namespace .egg-state files per-pipeline to prevent merge conflicts" + description: | + Add issue/pipeline identifier prefix to all globally-named files in + .egg-state/agent-outputs/ and .egg-state/checks/, matching the convention + already used by contracts/, drafts/, and reviews/. This eliminates merge + conflicts when concurrent pipelines merge to main. Includes backward-compat + fallback so in-flight pipelines continue working. Updates the full caller + chain: shared library, orchestrator wrappers (handoffs.py, dispatch.py, + signals.py), prompt builders in pipelines.py, tests, and documentation. +phases: + - id: 1 + name: Core path construction + goal: Update shared utility functions to support identifier-prefixed agent output paths + tasks: + - id: TASK-1-1 + description: Add optional identifier parameter (int | str | None) to load_agent_output() in shared/egg_contracts/orchestrator.py. When identifier is provided, construct path as {identifier}-{role.value}-output.json. Add fallback — if prefixed path does not exist, try old {role.value}-output.json path. When identifier is None, use old path directly (backward compat). + acceptance: load_agent_output(repo, role, identifier=871) reads from 871-{role}-output.json, falling back to {role}-output.json. load_agent_output(repo, role) reads from {role}-output.json (unchanged behavior). + files: + - shared/egg_contracts/orchestrator.py + - id: TASK-1-2 + description: Add optional identifier parameter to save_agent_output() in shared/egg_contracts/orchestrator.py. When identifier is provided, write to {identifier}-{role.value}-output.json. When None, write to {role.value}-output.json (backward compat). + acceptance: save_agent_output(repo, role, data, identifier=871) writes to 871-{role}-output.json. save_agent_output(repo, role, data) writes to {role}-output.json. + files: + - shared/egg_contracts/orchestrator.py + - id: TASK-1-3 + description: Add optional identifier parameter to collect_handoff_data() in shared/egg_contracts/orchestrator.py and pass it through to load_agent_output() for each dependency. + acceptance: collect_handoff_data(repo, role, identifier=871) reads dependency outputs from prefixed paths with fallback. + files: + - shared/egg_contracts/orchestrator.py + - id: 2 + name: Orchestrator wrapper and caller updates + goal: Update all orchestrator-level wrappers and callers of the shared functions to pass the pipeline identifier + tasks: + - id: TASK-2-1 + description: Add optional identifier parameter to all wrapper functions in orchestrator/handoffs.py — save_agent_output() (line 152), load_agent_output_data() (line 171), collect_handoff_data() (line 193), and get_handoff_env_var() (line 227). Each wrapper must accept identifier and forward it to the underlying shared library function. + acceptance: All four wrapper functions accept an optional identifier parameter and pass it through. Calling save_agent_output(repo, output, identifier=871) results in the shared save_agent_output receiving identifier=871. + files: + - orchestrator/handoffs.py + - id: TASK-2-2 + description: Update orchestrator/dispatch.py — PipelineDispatcher.complete_agent() (line 219) must pass self.contract_key as identifier to save_agent_output(). PipelineDispatcher.get_handoff_data() (line 264) must pass self.contract_key as identifier to collect_handoff_data(). The contract_key property (line 118) already returns issue_number for issue-mode or pipeline_id for local-mode. + acceptance: PipelineDispatcher.complete_agent() calls save_agent_output(self.repo_path, contract_role, outputs, identifier=self.contract_key). PipelineDispatcher.get_handoff_data() calls collect_handoff_data(self.repo_path, contract_role, identifier=self.contract_key). Verified by grep showing no calls without identifier. + files: + - orchestrator/dispatch.py + - id: TASK-2-3 + description: Update orchestrator/routes/signals.py — handle_complete_signal() (line 193) must pass the pipeline identifier to save_agent_output(). Derive identifier from the pipeline object (pipeline.issue_number for issue-mode, pipeline_id for local-mode). + acceptance: save_agent_output at line 193 receives identifier. Verified by grep showing no calls without identifier. + files: + - orchestrator/routes/signals.py + - id: 3 + name: Prompt builder and reader updates + goal: Update all prompt builders and reader functions in pipelines.py to use identifier-prefixed paths + tasks: + - id: TASK-3-1 + description: Update _build_agent_prompt() in orchestrator/routes/pipelines.py for architect, integrator, and risk_analyst roles. Replace hardcoded output filenames (e.g., "architect-output.json") with dynamically constructed {identifier}-prefixed paths using the issue_number/pipeline_id already available in the prompt builder context. Affects lines ~2509, ~2524, ~2590. + acceptance: Prompts contain e.g. "871-architect-output.json" for issue 871. Verified by running test_pipeline_prompts.py. + files: + - orchestrator/routes/pipelines.py + - id: TASK-3-2 + description: Update _build_checker_prompt(), _build_check_and_fix_prompt(), and _build_autofix_prompt() to replace hardcoded "implement-results.json" with {identifier}-implement-results.json. Add identifier parameter if not already available from pipeline context. Affects lines ~4063, ~4275, and related autofix prompt code. + acceptance: Checker and autofix prompts contain e.g. "871-implement-results.json". + files: + - orchestrator/routes/pipelines.py + - id: TASK-3-3 + description: Update _synthesize_plan_draft_from_outputs() (~line 4335) to construct prefixed filenames in the agent_files list. Derive identifier from the existing pipeline_mode/issue_number parameters. Fall back to old filenames if prefixed files don't exist. + acceptance: Function reads from {identifier}-architect-output.json first, falls back to architect-output.json. Same for risk_analyst. + files: + - orchestrator/routes/pipelines.py + - id: TASK-3-4 + description: Update _read_tester_gaps() (~line 1619) to accept identifier parameter and construct prefixed path with fallback. Update both callers (~line 3139 and ~line 5173) to pass identifier from pipeline context. + acceptance: Function reads from {identifier}-tester-output.json first, falls back to tester-output.json. + files: + - orchestrator/routes/pipelines.py + - id: TASK-3-5 + description: Update all remaining callers of load_agent_output(), save_agent_output(), and collect_handoff_data() in orchestrator/routes/pipelines.py to pass the identifier from pipeline context. This covers any callers not addressed by TASK-3-1 through TASK-3-4. + acceptance: No calls to load_agent_output, save_agent_output, or collect_handoff_data without identifier in any orchestrator code path. grep across orchestrator/ confirms all invocations pass identifier. + files: + - orchestrator/routes/pipelines.py + - id: 4 + name: Tests + goal: Update all existing tests and add backward-compat fallback tests + tasks: + - id: TASK-4-1 + description: Update test_pipeline_prompts.py — change all assertions that check for "implement-results.json" in prompt content to expect prefixed filenames. Update TestReadTesterGaps tests to create/expect prefixed filenames. + acceptance: All tests in test_pipeline_prompts.py pass with prefixed filenames. + files: + - orchestrator/tests/test_pipeline_prompts.py + - id: TASK-4-2 + description: Update test_health_check_tester_coverage.py and test_health_check_tier1_advanced.py — update mock file creation to use prefixed filenames where these tests create/reference architect-output.json or similar files. + acceptance: Health check tests pass with prefixed filenames. + files: + - orchestrator/tests/test_health_check_tester_coverage.py + - orchestrator/tests/test_health_check_tier1_advanced.py + - id: TASK-4-3 + description: Update orchestrator/tests/test_signals.py — the mock at line 86 patches save_agent_output. Update this mock and any assertions to account for the new identifier parameter in the function signature. + acceptance: test_signals.py tests pass. Mock correctly matches updated save_agent_output signature. + files: + - orchestrator/tests/test_signals.py + - id: TASK-4-4 + description: Update integration_tests/local_pipeline/mock-sandbox/phase-runner.sh — update RESULTS_FILE assignment (~line 143) to use prefixed filename derived from pipeline context environment variables (EGG_ISSUE_NUMBER or EGG_PIPELINE_ID). + acceptance: Integration test writes to {identifier}-implement-results.json. Local pipeline integration test passes. + files: + - integration_tests/local_pipeline/mock-sandbox/phase-runner.sh + - id: TASK-4-5 + description: Add tests for backward-compat fallback in load_agent_output(). Test cases — (1) prefixed path exists and is returned, (2) only old global path exists and fallback reads it, (3) neither exists and empty dict returned, (4) both exist and prefixed path takes priority. + acceptance: All 4 fallback test cases pass. Tests placed alongside existing orchestrator contract tests. + files: + - shared/egg_contracts/tests/test_orchestrator.py + - id: 5 + name: Documentation and agent mode commands + goal: Update all documentation and sandbox commands referencing old globally-named filenames + tasks: + - id: TASK-5-1 + description: Update docs/guides/sdlc-pipeline.md — change references to {role}-output.json naming convention (lines ~362, ~717) to document {identifier}-{role}-output.json. + acceptance: Documentation reflects new naming convention with examples. + files: + - docs/guides/sdlc-pipeline.md + - id: TASK-5-2 + description: Update docs/guides/agent-development.md — update the agent output file naming tree (lines ~215-218) to show {identifier}-prefixed filenames. + acceptance: File tree example shows prefixed filenames. + files: + - docs/guides/agent-development.md + - id: TASK-5-3 + description: Update docs/architecture/orchestrator.md — update implement-results.json reference (line ~121) to show prefixed filename. + acceptance: Architecture doc reflects new naming. + files: + - docs/architecture/orchestrator.md + - id: TASK-5-4 + description: Update sandbox/.claude/commands/ agent mode files (coder-mode.md, tester-mode.md, integrator-mode.md, documenter-mode.md) — replace all hardcoded {role}-output.json paths with a note that paths are prefixed with the issue/pipeline identifier. + acceptance: All agent mode command files reference {identifier}-prefixed paths. + files: + - sandbox/.claude/commands/coder-mode.md + - sandbox/.claude/commands/tester-mode.md + - sandbox/.claude/commands/integrator-mode.md + - sandbox/.claude/commands/documenter-mode.md + - id: TASK-5-5 + description: Update docstring in orchestrator/health_checks/tier1/phase_output.py (line ~10) to reflect new naming convention. + acceptance: Docstring mentions {identifier}-prefixed filenames. + files: + - orchestrator/health_checks/tier1/phase_output.py +``` + +--- + +*Authored-by: egg* diff --git a/.egg-state/reviews/871-implement-code-review.json b/.egg-state/reviews/871-implement-code-review.json new file mode 100644 index 000000000..736cd297a --- /dev/null +++ b/.egg-state/reviews/871-implement-code-review.json @@ -0,0 +1,7 @@ +{ + "reviewer": "code", + "verdict": "approved", + "summary": "Namespacing .egg-state files per-pipeline is correctly implemented across all layers (shared contracts, orchestrator handoffs, dispatch, signals, prompt builders, agent mode commands, docs). All 6692 tests pass with 0 failures. Backward-compatible fallback from prefixed to global paths is consistent throughout. Minor: entrypoint.py symlink creation doesn't handle pre-existing regular files (low risk), and the CLAUDE.md location change is scope creep but works correctly.", + "feedback": "", + "timestamp": "2026-02-23T09:15:00Z" +} \ No newline at end of file diff --git a/.egg-state/reviews/871-implement-contract-review.json b/.egg-state/reviews/871-implement-contract-review.json new file mode 100644 index 000000000..0e2b2768a --- /dev/null +++ b/.egg-state/reviews/871-implement-contract-review.json @@ -0,0 +1,7 @@ +{ + "reviewer": "contract", + "verdict": "approved", + "summary": "All 20 tasks across 5 phases are implemented and verified. Core namespacing functions (load/save/collect) have correct identifier parameter with backward-compat fallback. All orchestrator wrappers and callers pass identifier. Prompt builders use namespaced filenames. Tests cover all 4 fallback scenarios plus extensive edge cases. Documentation and agent mode commands updated. 6692 tests pass, lint clean.", + "feedback": "", + "timestamp": "2026-02-23T08:50:00Z" +} diff --git a/.egg-state/reviews/871-plan-plan-review.json b/.egg-state/reviews/871-plan-plan-review.json new file mode 100644 index 000000000..f19ba0f92 --- /dev/null +++ b/.egg-state/reviews/871-plan-plan-review.json @@ -0,0 +1,7 @@ +{ + "reviewer": "plan", + "verdict": "approved", + "summary": "All prior review feedback fully addressed. The revised plan adds Phase 2 for the wrapper layer callsites (handoffs.py, dispatch.py, signals.py), includes test_signals.py in Phase 4, restructures the old TASK-2-5 to eliminate scope ambiguity, and incorporates the risk analyst's R-1 finding as a HIGH-rated risk with concrete mitigation. Task breakdown is discrete and well-scoped across 18 tasks in 5 phases. Acceptance criteria are specific and testable with edge case coverage. Dependency ordering is correct (shared lib → wrappers → pipelines.py → tests → docs). Line number references verified against source code — all accurate.", + "feedback": "", + "timestamp": "2026-02-23T12:45:00Z" +} diff --git a/.egg-state/reviews/871-refine-agent-design-review.json b/.egg-state/reviews/871-refine-agent-design-review.json new file mode 100644 index 000000000..b300af0c4 --- /dev/null +++ b/.egg-state/reviews/871-refine-agent-design-review.json @@ -0,0 +1,7 @@ +{ + "reviewer": "agent-design", + "verdict": "approved", + "summary": "No agent-mode design anti-patterns found. The analysis addresses a file-naming consistency issue and recommends aligning globally-named handoff files with the existing per-issue prefix convention. The structured JSON output files are legitimate machine-to-machine handoffs between agents and the orchestrator. Prompt path updates are minimal and necessary.", + "feedback": "", + "timestamp": "2026-02-23T00:00:00Z" +} diff --git a/.egg-state/reviews/871-refine-refine-review.json b/.egg-state/reviews/871-refine-refine-review.json new file mode 100644 index 000000000..13054109f --- /dev/null +++ b/.egg-state/reviews/871-refine-refine-review.json @@ -0,0 +1,7 @@ +{ + "reviewer": "refine", + "verdict": "approved", + "summary": "Thorough, well-researched analysis with precise code references, clear options analysis, and a sound recommendation that aligns with existing codebase conventions.", + "feedback": "", + "timestamp": "2026-02-23T12:00:00Z" +} diff --git a/docs/architecture/orchestrator.md b/docs/architecture/orchestrator.md index 0f64d8252..2d922fde6 100644 --- a/docs/architecture/orchestrator.md +++ b/docs/architecture/orchestrator.md @@ -118,7 +118,8 @@ The orchestrator reads pipeline artifacts (verdict files, draft documents, check - `.egg-state/drafts/{identifier}-analysis.md` — Draft for `refine` phase (special-cased to `analysis`) - `.egg-state/drafts/{identifier}-{phase}.md` — Draft for other phases (e.g., `plan`). No draft for `implement` phase. - `.egg-state/reviews/{identifier}-{phase}-{reviewer_type}-review.json` — Review verdict files -- `.egg-state/checks/implement-results.json` — Check results from the `implement` phase +- `.egg-state/agent-outputs/{identifier}-{role}-output.json` — Agent handoff data (e.g., `871-coder-output.json`). Falls back to `{role}-output.json` for backward compatibility. +- `.egg-state/checks/{identifier}-implement-results.json` — Check results from the `implement` phase (e.g., `871-implement-results.json`) **Volume mounts:** - Orchestrator: Bind mount from `${HOST_HOME}/.egg-worktrees` to `/home/egg/.egg-worktrees` (read container-written artifacts) diff --git a/docs/guides/agent-development.md b/docs/guides/agent-development.md index 844784ac7..6dbb074a6 100644 --- a/docs/guides/agent-development.md +++ b/docs/guides/agent-development.md @@ -208,16 +208,23 @@ Pattern evaluation order: ## Handoff Data -Agents communicate via JSON files in `.egg-state/agent-outputs/`: +Agents communicate via JSON files in `.egg-state/agent-outputs/`, namespaced +by the issue number or pipeline ID to prevent merge conflicts: ``` .egg-state/agent-outputs/ -├── coder-output.json -├── tester-output.json -├── documenter-output.json -└── integrator-output.json +├── {identifier}-coder-output.json +├── {identifier}-tester-output.json +├── {identifier}-documenter-output.json +└── {identifier}-integrator-output.json ``` +For example, issue #871 produces `871-coder-output.json`, `871-tester-output.json`, etc. + +**Backward compatibility:** `load_agent_output()` checks the namespaced path first, then +falls back to the old `{role}-output.json` path. This ensures in-flight pipelines created +before the namespacing change continue to work. + Standard handoff format: ```json diff --git a/docs/guides/sdlc-pipeline.md b/docs/guides/sdlc-pipeline.md index 47bba71d2..df3c9986d 100644 --- a/docs/guides/sdlc-pipeline.md +++ b/docs/guides/sdlc-pipeline.md @@ -359,7 +359,7 @@ The implement phase supports multi-agent orchestration, where specialized agents The gateway enforces file access patterns for each agent role via `gateway/agent_restrictions.py`. For example, the Coder agent cannot modify documentation files, and the Tester agent cannot modify source code. This prevents agents from overstepping their responsibilities. Access patterns are tier-aware: `check_agent_file_access()` and `validate_agent_push()` accept an optional `complexity_tier` parameter. **Handoff Data:** -Agents communicate via handoff data stored in `.egg-state/agent-outputs/{role}-output.json`. For example, the Coder agent outputs a list of changed files, which the Tester and Documenter agents read to focus their work. +Agents communicate via handoff data stored in `.egg-state/agent-outputs/{identifier}-{role}-output.json` (where `{identifier}` is the issue number or pipeline ID). For example, the Coder agent outputs a list of changed files, which the Tester and Documenter agents read to focus their work. The identifier prefix prevents merge conflicts when concurrent pipelines merge to main. **Orchestration:** Multi-agent orchestration is managed by the local orchestrator (`orchestrator/container_spawner.py`). The orchestrator reads the contract state, determines which agents can run based on dependencies, and dispatches them in parallel where possible. @@ -716,7 +716,7 @@ To disable multi-agent mode and use single-agent execution: ### Agent Handoffs -Each agent produces handoff data stored in `.egg-state/agent-outputs/{role}-output.json`: +Each agent produces handoff data stored in `.egg-state/agent-outputs/{identifier}-{role}-output.json` (e.g., `871-coder-output.json` for issue #871): ```json { diff --git a/integration_tests/local_pipeline/mock-sandbox/phase-runner.sh b/integration_tests/local_pipeline/mock-sandbox/phase-runner.sh index 4050349d4..f2daa0499 100644 --- a/integration_tests/local_pipeline/mock-sandbox/phase-runner.sh +++ b/integration_tests/local_pipeline/mock-sandbox/phase-runner.sh @@ -140,7 +140,15 @@ if [ "$EGG_AGENT_ROLE" = "checker" ]; then ALL_PASSED="true" fi - RESULTS_FILE="$CHECKS_DIR/implement-results.json" + # Derive pipeline identifier for namespaced results filename + if [ -n "$EGG_ISSUE_NUMBER" ]; then + _IDENT="$EGG_ISSUE_NUMBER" + elif [ -n "$EGG_PIPELINE_ID" ]; then + _IDENT="$EGG_PIPELINE_ID" + else + _IDENT="unknown" + fi + RESULTS_FILE="$CHECKS_DIR/${_IDENT}-implement-results.json" if [ "$ALL_PASSED" = "true" ]; then cat > "$RESULTS_FILE" < dict[str, Any]: contract_role = map_agent_role_to_contract_role(role) if contract_role is None: return {} - return collect_handoff_data(self.repo_path, contract_role) + return collect_handoff_data(self.repo_path, contract_role, identifier=self.contract_key) def save_contract(self) -> None: """Save updated contract to disk. diff --git a/orchestrator/handoffs.py b/orchestrator/handoffs.py index 88f37d240..bc17def42 100644 --- a/orchestrator/handoffs.py +++ b/orchestrator/handoffs.py @@ -152,12 +152,14 @@ def from_dict(cls, d: dict[str, Any]) -> "AgentOutput": def save_agent_output( repo_path: Path, output: AgentOutput, + identifier: int | str | None = None, ) -> Path: """Save agent output to disk. Args: repo_path: Path to repository output: Agent output to save + identifier: Pipeline/issue identifier for namespaced paths. Returns: Path to saved file @@ -165,24 +167,26 @@ def save_agent_output( contract_role = ROLE_MAP[output.role] output_data = output.to_dict() - return contract_save_output(repo_path, contract_role, output_data) + return contract_save_output(repo_path, contract_role, output_data, identifier=identifier) def load_agent_output_data( repo_path: Path, role: AgentRole, + identifier: int | str | None = None, ) -> AgentOutput | None: """Load agent output from disk. Args: repo_path: Path to repository role: Agent role + identifier: Pipeline/issue identifier for namespaced paths. Returns: AgentOutput or None if not found """ contract_role = ROLE_MAP[role] - data = load_agent_output(repo_path, contract_role) + data = load_agent_output(repo_path, contract_role, identifier=identifier) if not data: return None @@ -193,6 +197,7 @@ def load_agent_output_data( def collect_handoff_data( repo_path: Path, target_role: AgentRole, + identifier: int | str | None = None, ) -> dict[str, HandoffData]: """Collect handoff data for a target agent. @@ -201,6 +206,7 @@ def collect_handoff_data( Args: repo_path: Path to repository target_role: Target agent role + identifier: Pipeline/issue identifier for namespaced paths. Returns: Dictionary mapping source role to handoff data @@ -212,7 +218,7 @@ def collect_handoff_data( for dep_role in role_def.dependencies: orch_role = REVERSE_ROLE_MAP[dep_role] - output = load_agent_output_data(repo_path, orch_role) + output = load_agent_output_data(repo_path, orch_role, identifier=identifier) if output and output.handoff_data: handoffs[orch_role.value] = HandoffData( @@ -227,17 +233,19 @@ def collect_handoff_data( def get_handoff_env_var( repo_path: Path, target_role: AgentRole, + identifier: int | str | None = None, ) -> str: """Get handoff data as a JSON string for environment variable. Args: repo_path: Path to repository target_role: Target agent role + identifier: Pipeline/issue identifier for namespaced paths. Returns: JSON string of handoff data """ - handoffs = collect_handoff_data(repo_path, target_role) + handoffs = collect_handoff_data(repo_path, target_role, identifier=identifier) data = {role: handoff.data for role, handoff in handoffs.items()} diff --git a/orchestrator/health_checks/tier1/phase_output.py b/orchestrator/health_checks/tier1/phase_output.py index e490fcc4c..82f0e4cb6 100644 --- a/orchestrator/health_checks/tier1/phase_output.py +++ b/orchestrator/health_checks/tier1/phase_output.py @@ -7,7 +7,7 @@ For each phase type, verifies: - implement: new commits on the remote branch beyond origin/main -- plan: architect-output.json (or plan draft) exists +- plan: {identifier}-architect-output.json (or plan draft) exists - refine: refine output exists Returns DEGRADED (not FAILED) when agents succeeded but artifacts diff --git a/orchestrator/routes/pipelines.py b/orchestrator/routes/pipelines.py index debb118b7..6390ac2c7 100644 --- a/orchestrator/routes/pipelines.py +++ b/orchestrator/routes/pipelines.py @@ -95,6 +95,18 @@ def get_logger(name: str, **kwargs) -> logging.Logger: # type: ignore[misc] # functions to adapt language when tester findings are present. TESTER_FINDINGS_HEADER = "### tester findings" + +def _pipeline_identifier( + issue_number: int | None, + pipeline_id: str, +) -> int | str: + """Derive the pipeline identifier used for namespaced .egg-state filenames. + + Prefers ``issue_number`` when available, falling back to ``pipeline_id``. + """ + return issue_number if issue_number is not None else pipeline_id + + # Network constants for sandbox container URLs try: from egg_config import ( @@ -1620,19 +1632,36 @@ def _read_review_verdict( return None -def _read_tester_gaps(repo_path: Path) -> str | None: +def _read_tester_gaps( + repo_path: Path, + identifier: int | str | None = None, +) -> str | None: """Read tester output and extract gap findings for feedback to the coder. - Reads `.egg-state/agent-outputs/tester-output.json` and formats any - test failures and gaps found into a summary string. + Reads `.egg-state/agent-outputs/{identifier}-tester-output.json` (with + fallback to `tester-output.json`) and formats any test failures and gaps + found into a summary string. Falls back to scanning the `summary` field for failure keywords when `gaps_found` is not present (backwards compat with old tester outputs). + Args: + repo_path: Path to the repository. + identifier: Pipeline/issue identifier for namespaced filenames. + Returns: Formatted gap summary string, or None if no gaps found. """ - tester_output_file = repo_path / ".egg-state" / "agent-outputs" / "tester-output.json" + outputs_dir = repo_path / ".egg-state" / "agent-outputs" + + # Try prefixed filename first, fall back to old global filename + tester_output_file = None + if identifier is not None: + prefixed = outputs_dir / f"{identifier}-tester-output.json" + if prefixed.exists(): + tester_output_file = prefixed + if tester_output_file is None: + tester_output_file = outputs_dir / "tester-output.json" if not tester_output_file.exists(): return None @@ -2197,8 +2226,9 @@ def _build_phase_prompt( "Focus on addressing the specific feedback above.\n", "1. Review the feedback in the **Prior Review Feedback** section above", "2. Check `git diff` to understand the current state of changes", - "3. Check `.egg-state/agent-outputs/tester-output.json` " - "for test failures and gaps", + f"3. Check `.egg-state/agent-outputs/" + f"{_pipeline_identifier(issue_number, pipeline_id)}" + f"-tester-output.json` for test failures and gaps", "4. Fix the specific issues raised", "5. Run tests to verify your fixes", "6. Commit with descriptive messages", @@ -2460,6 +2490,9 @@ def _build_agent_prompt( lines.append(review_feedback) lines.append("") + # Derive the pipeline identifier for namespaced output filenames. + _identifier = _pipeline_identifier(issue_number, pipeline_id) + # Role-specific instructions lines.append("## Your Task\n") @@ -2516,7 +2549,7 @@ def _build_agent_prompt( "3. Verify no regressions were introduced", "4. Produce an integration report", "", - "Write your integration report to `.egg-state/agent-outputs/integrator-output.json`.", + f"Write your integration report to `.egg-state/agent-outputs/{_identifier}-integrator-output.json`.", "", "Review pipeline overview and costs before integrating:", "`egg-checkpoint context --pipeline $EGG_PIPELINE_ID --files` and " @@ -2535,7 +2568,7 @@ def _build_agent_prompt( "4. Consider multiple implementation approaches", "5. Recommend an approach with justification and document technical decisions", "", - "Write your analysis to `.egg-state/agent-outputs/architect-output.json`.", + f"Write your analysis to `.egg-state/agent-outputs/{_identifier}-architect-output.json`.", "", ] ) @@ -2601,7 +2634,7 @@ def _build_agent_prompt( "4. Propose mitigation strategies and rollback plans", "5. Flag areas that need human review", "", - "Write your risk assessment to `.egg-state/agent-outputs/risk_analyst-output.json`.", + f"Write your risk assessment to `.egg-state/agent-outputs/{_identifier}-risk_analyst-output.json`.", "", ] ) @@ -2801,7 +2834,9 @@ def _build_phase_scoped_prompt( lines.append("### Revision Checklist\n") lines.append("- [ ] Review the feedback in **Prior Review Feedback** above") lines.append( - "- [ ] Check `.egg-state/agent-outputs/tester-output.json` for test failures and gaps" + f"- [ ] Check `.egg-state/agent-outputs/" + f"{_pipeline_identifier(pipeline.issue_number, pipeline_id)}" + f"-tester-output.json` for test failures and gaps" ) lines.append("- [ ] Fix the specific issues raised") lines.append("- [ ] Run tests to verify fixes") @@ -2878,9 +2913,7 @@ def _run_tier3_implement( ) pipeline_mode = pipeline.mode or "issue" - contract_key: int | str = ( - pipeline.issue_number if pipeline.issue_number is not None else pipeline_id - ) + contract_key = _pipeline_identifier(pipeline.issue_number, pipeline_id) # Ensure contract exists in the worktree. Agent git checkout in a # prior phase (e.g. `git checkout -b egg/... origin/main`) may have @@ -3154,7 +3187,10 @@ def _run_single_phase_cycle(phase_id: str) -> tuple[int, list[str]]: # Only read when tester succeeded — a failed tester may have left # stale output from a previous cycle on disk. if tester_exit == 0: - tester_gap_summary = _read_tester_gaps(worktree_repo_path) + tester_gap_summary = _read_tester_gaps( + worktree_repo_path, + identifier=_pipeline_identifier(pipeline.issue_number, pipeline_id), + ) if tester_gap_summary: logger.info( "Tester found gaps", @@ -3254,6 +3290,7 @@ def _run_single_phase_cycle(phase_id: str) -> tuple[int, list[str]]: repo=pipeline.repo, repo_checks=repo_checks, repo_path=str(worktree_repo_path), + issue_number=pipeline.issue_number, ) try: @@ -3740,9 +3777,7 @@ def spawn_fn(role: AgentRole, prompt_text: str, extra_env: dict[str, str]) -> tu # separately via save_agent_output(). from egg_contracts.loader import contract_exists, create_contract, create_local_contract - contract_key: int | str = ( - pipeline.issue_number if pipeline.issue_number is not None else pipeline_id - ) + contract_key = _pipeline_identifier(pipeline.issue_number, pipeline_id) if not contract_exists(contract_key, worktree_repo_path): logger.warning( "Contract missing from worktree, recreating for multi-agent phase", @@ -4022,6 +4057,7 @@ def _build_checker_prompt( pipeline_mode: str, repo: str | None = None, repo_checks: list[dict] | None = None, + issue_number: int | None = None, ) -> str: """Build a prompt for the checker agent that runs tests/lint. @@ -4030,13 +4066,14 @@ def _build_checker_prompt( autofixer into a single agent session to avoid context loss. The checker discovers and runs project test/lint commands, then - writes structured results to .egg-state/checks/implement-results.json. + writes structured results to .egg-state/checks/{identifier}-implement-results.json. Args: pipeline_id: Pipeline identifier. pipeline_mode: Pipeline mode (e.g. "local", "issue"). repo: Target repository in "owner/repo" format. repo_checks: Pre-configured check commands from repositories.yaml. + issue_number: GitHub issue number (used for namespaced filenames). """ lines = [ "You are the **checker** for the SDLC pipeline implement phase.\n", @@ -4076,9 +4113,12 @@ def _build_checker_prompt( ] ) + _ck_id = _pipeline_identifier(issue_number, pipeline_id) + results_filename = f"{_ck_id}-implement-results.json" + lines.extend( [ - "After running checks, **write results** to `.egg-state/checks/implement-results.json`:\n", + f"After running checks, **write results** to `.egg-state/checks/{results_filename}`:\n", "```json", "{", ' "all_passed": true/false,', @@ -4107,6 +4147,7 @@ def _build_autofix_prompt( check_results: dict, repo: str | None = None, repo_path: str | None = None, + issue_number: int | None = None, ) -> str: """Build a prompt for the autofixer agent. @@ -4148,10 +4189,11 @@ def _build_autofix_prompt( if repo: repo_name = repo.split("/")[-1] lines.append(f"Work in the `~/repos/{repo_name}` directory.\n") + _af_id = _pipeline_identifier(issue_number, pipeline_id) lines.extend( [ "1. **Investigate the failures above**: The full check output is included — " - "do NOT re-read `.egg-state/checks/implement-results.json`", + f"do NOT re-read `.egg-state/checks/{_af_id}-implement-results.json`", "2. **Fix without committing yet**: For each auto-fixable issue " "(lint errors, formatting, simple type errors, obvious test fixes), make the fix", "3. **Verify locally**: Run the same checks again to confirm fixes work", @@ -4195,6 +4237,7 @@ def _build_check_and_fix_prompt( repo: str | None = None, repo_checks: list[dict] | None = None, repo_path: str | None = None, + issue_number: int | None = None, ) -> str: """Build a combined check-and-fix prompt for a single agent session. @@ -4208,6 +4251,7 @@ def _build_check_and_fix_prompt( repo: Target repository in "owner/repo" format. repo_checks: Pre-configured check commands from repositories.yaml. repo_path: Filesystem path to repository (for loading autofixer rules). + issue_number: GitHub issue number (used for namespaced filenames). """ lines = [ "You are the **checker and autofixer** for the SDLC pipeline implement phase.\n", @@ -4290,7 +4334,7 @@ def _build_check_and_fix_prompt( "", "### Results File\n", "After the final check run, write results to " - "`.egg-state/checks/implement-results.json`:\n", + f"`.egg-state/checks/{_pipeline_identifier(issue_number, pipeline_id)}-implement-results.json`:\n", "```json", "{", ' "all_passed": true/false,', @@ -4358,6 +4402,9 @@ def _synthesize_plan_draft( ) return + # Derive the pipeline identifier for namespaced output filenames. + _synth_id = _pipeline_identifier(issue_number, pipeline_id) + sections: list[str] = [] agent_files = [ ("architect-output.json", "Architecture Analysis"), @@ -4365,7 +4412,12 @@ def _synthesize_plan_draft( ] for filename, heading in agent_files: - output_file = outputs_dir / filename + # Try prefixed filename first, fall back to old global filename + prefixed_file = outputs_dir / f"{_synth_id}-{filename}" + if prefixed_file.exists(): + output_file = prefixed_file + else: + output_file = outputs_dir / filename if not output_file.exists(): continue try: @@ -5136,6 +5188,7 @@ def _run_pipeline(pipeline_id: str, repo_path: Path) -> None: repo=pipeline.repo, repo_checks=repo_checks, repo_path=str(worktree_repo_path), + issue_number=pipeline.issue_number, ) check_fix_command = [ "claude", @@ -5188,7 +5241,10 @@ def _run_pipeline(pipeline_id: str, repo_path: Path) -> None: # Only read when the phase succeeded — a failed phase may # have left stale output from a previous cycle on disk. if use_multi_agent and not phase_failed: - tester_gap_summary = _read_tester_gaps(worktree_repo_path) + tester_gap_summary = _read_tester_gaps( + worktree_repo_path, + identifier=_pipeline_identifier(pipeline.issue_number, pipeline_id), + ) if tester_gap_summary: logger.info( "Tester found gaps", diff --git a/orchestrator/routes/signals.py b/orchestrator/routes/signals.py index 61612cef7..f1e5aa769 100644 --- a/orchestrator/routes/signals.py +++ b/orchestrator/routes/signals.py @@ -190,7 +190,11 @@ def handle_complete_signal( handoff_data=outputs, metrics=data.get("metrics", {}), ) - save_agent_output(contract_path, output) + # Derive pipeline identifier matching PipelineDispatcher.contract_key + identifier: int | str = ( + pipeline.issue_number if pipeline.issue_number is not None else pipeline_id + ) + save_agent_output(contract_path, output, identifier=identifier) logger.info( "Agent completed", diff --git a/orchestrator/tests/test_handoffs_namespaced.py b/orchestrator/tests/test_handoffs_namespaced.py new file mode 100644 index 000000000..c96dd5b4a --- /dev/null +++ b/orchestrator/tests/test_handoffs_namespaced.py @@ -0,0 +1,242 @@ +""" +Tests for orchestrator/handoffs.py identifier-namespaced wrappers. + +These tests verify that the handoffs module correctly forwards the +``identifier`` parameter to the underlying egg_contracts functions, +covering save_agent_output, load_agent_output_data, collect_handoff_data, +and get_handoff_env_var. +""" + +import json +from pathlib import Path + +from handoffs import ( + AgentOutput, + collect_handoff_data, + get_handoff_env_var, + load_agent_output_data, + save_agent_output, +) +from models import AgentRole + + +class TestSaveAgentOutputWrapper: + """Tests for handoffs.save_agent_output with identifier.""" + + def test_save_with_identifier_creates_prefixed_file(self, tmp_path: Path): + """Wrapper saves to {identifier}-{role}-output.json.""" + output = AgentOutput( + role=AgentRole.CODER, + commit="abc123", + files_changed=["file.py"], + handoff_data={"changed_files": ["file.py"]}, + ) + + path = save_agent_output(tmp_path, output, identifier=871) + assert path.name == "871-coder-output.json" + data = json.loads(path.read_text()) + assert data["role"] == "coder" + assert data["commit"] == "abc123" + + def test_save_without_identifier_creates_global_file(self, tmp_path: Path): + """Wrapper saves to {role}-output.json when identifier is None.""" + output = AgentOutput( + role=AgentRole.TESTER, + handoff_data={"tests_passed": 10}, + ) + + path = save_agent_output(tmp_path, output) + assert path.name == "tester-output.json" + + +class TestLoadAgentOutputDataWrapper: + """Tests for handoffs.load_agent_output_data with identifier.""" + + def test_load_with_identifier_reads_prefixed(self, tmp_path: Path): + """Loads from prefixed file when identifier provided.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "871-coder-output.json").write_text( + json.dumps( + { + "role": "coder", + "commit": "abc123", + "files_changed": ["a.py"], + "handoff_data": {"files": ["a.py"]}, + "logs": None, + "metrics": {}, + "timestamp": "2026-01-01T00:00:00", + } + ) + ) + + result = load_agent_output_data(tmp_path, AgentRole.CODER, identifier=871) + assert result is not None + assert result.role == AgentRole.CODER + assert result.commit == "abc123" + + def test_load_fallback_to_global(self, tmp_path: Path): + """Falls back to global file when prefixed not found.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "coder-output.json").write_text( + json.dumps( + { + "role": "coder", + "commit": "def456", + "files_changed": [], + "handoff_data": {}, + "logs": None, + "metrics": {}, + "timestamp": "2026-01-01T00:00:00", + } + ) + ) + + result = load_agent_output_data(tmp_path, AgentRole.CODER, identifier=999) + assert result is not None + assert result.commit == "def456" + + def test_load_returns_none_when_missing(self, tmp_path: Path): + """Returns None when no output file exists.""" + result = load_agent_output_data(tmp_path, AgentRole.CODER, identifier=871) + assert result is None + + +class TestCollectHandoffDataWrapper: + """Tests for handoffs.collect_handoff_data with identifier.""" + + def test_collect_with_identifier(self, tmp_path: Path): + """Collects handoff data from prefixed files.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "871-coder-output.json").write_text( + json.dumps( + { + "role": "coder", + "commit": "abc", + "files_changed": ["main.py"], + "handoff_data": {"changed_files": ["main.py"]}, + "logs": None, + "metrics": {}, + "timestamp": "2026-01-01T00:00:00", + } + ) + ) + + result = collect_handoff_data(tmp_path, AgentRole.TESTER, identifier=871) + assert "coder" in result + assert result["coder"].data == {"changed_files": ["main.py"]} + + def test_collect_without_identifier(self, tmp_path: Path): + """Collects from global files when identifier is None.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "coder-output.json").write_text( + json.dumps( + { + "role": "coder", + "commit": "abc", + "files_changed": [], + "handoff_data": {"changed_files": ["old.py"]}, + "logs": None, + "metrics": {}, + "timestamp": "2026-01-01T00:00:00", + } + ) + ) + + result = collect_handoff_data(tmp_path, AgentRole.TESTER) + assert "coder" in result + assert result["coder"].data == {"changed_files": ["old.py"]} + + def test_collect_empty_handoff_data_excluded(self, tmp_path: Path): + """Agent outputs with empty handoff_data are excluded.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "871-coder-output.json").write_text( + json.dumps( + { + "role": "coder", + "commit": "abc", + "files_changed": [], + "handoff_data": {}, + "logs": None, + "metrics": {}, + "timestamp": "2026-01-01T00:00:00", + } + ) + ) + + result = collect_handoff_data(tmp_path, AgentRole.TESTER, identifier=871) + assert "coder" not in result + + +class TestGetHandoffEnvVarWrapper: + """Tests for handoffs.get_handoff_env_var with identifier.""" + + def test_env_var_with_identifier(self, tmp_path: Path): + """Returns JSON string with handoff data from prefixed files.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "871-coder-output.json").write_text( + json.dumps( + { + "role": "coder", + "commit": "abc", + "files_changed": ["x.py"], + "handoff_data": {"key": "value"}, + "logs": None, + "metrics": {}, + "timestamp": "2026-01-01T00:00:00", + } + ) + ) + + result = get_handoff_env_var(tmp_path, AgentRole.TESTER, identifier=871) + parsed = json.loads(result) + assert "coder" in parsed + assert parsed["coder"]["key"] == "value" + + def test_env_var_without_identifier(self, tmp_path: Path): + """Returns JSON string from global files when identifier is None.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "coder-output.json").write_text( + json.dumps( + { + "role": "coder", + "commit": "abc", + "files_changed": [], + "handoff_data": {"legacy": True}, + "logs": None, + "metrics": {}, + "timestamp": "2026-01-01T00:00:00", + } + ) + ) + + result = get_handoff_env_var(tmp_path, AgentRole.TESTER) + parsed = json.loads(result) + assert "coder" in parsed + assert parsed["coder"]["legacy"] is True + + def test_env_var_empty_when_no_deps(self, tmp_path: Path): + """Returns empty JSON object for role with no dependencies.""" + result = get_handoff_env_var(tmp_path, AgentRole.CODER, identifier=871) + assert json.loads(result) == {} + + def test_round_trip_save_then_env_var(self, tmp_path: Path): + """Save output, then collect via env var — full round-trip.""" + output = AgentOutput( + role=AgentRole.CODER, + commit="abc", + files_changed=["test.py"], + handoff_data={"changed_files": ["test.py"], "summary": "Added tests"}, + ) + save_agent_output(tmp_path, output, identifier=42) + + result = get_handoff_env_var(tmp_path, AgentRole.TESTER, identifier=42) + parsed = json.loads(result) + assert "coder" in parsed + assert parsed["coder"]["changed_files"] == ["test.py"] diff --git a/orchestrator/tests/test_pipeline_prompts.py b/orchestrator/tests/test_pipeline_prompts.py index 8f08a4256..2c7504a3c 100644 --- a/orchestrator/tests/test_pipeline_prompts.py +++ b/orchestrator/tests/test_pipeline_prompts.py @@ -30,6 +30,7 @@ _read_tester_gaps, _render_contract_tasks, _summarize_issue, + _synthesize_plan_draft, ) @@ -72,15 +73,20 @@ def test_explicit_checks_without_repo(self): assert "Repository:" not in result def test_always_includes_results_format(self): - """Both modes include the results JSON format.""" + """Both modes include the namespaced results JSON format.""" checks = [{"name": "test", "command": "pytest"}] for prompt in [ _build_checker_prompt("pid-1", "local"), _build_checker_prompt("pid-1", "local", repo_checks=checks), ]: - assert "implement-results.json" in prompt + assert "pid-1-implement-results.json" in prompt assert "all_passed" in prompt + def test_results_filename_uses_issue_number(self): + """Results filename uses issue_number when provided.""" + result = _build_checker_prompt("pid-1", "issue", issue_number=871) + assert "871-implement-results.json" in result + def test_includes_pipeline_metadata(self): """Prompt always includes pipeline ID and mode.""" result = _build_checker_prompt("pid-42", "issue") @@ -589,11 +595,16 @@ def test_includes_fix_rules(self): assert "Auto-fixable" in result or "auto-fixable" in result.lower() def test_includes_results_file_format(self): - """Includes the results JSON format.""" + """Includes the namespaced results JSON format.""" result = _build_check_and_fix_prompt("pid-1", "local") - assert "implement-results.json" in result + assert "pid-1-implement-results.json" in result assert "all_passed" in result + def test_results_filename_uses_issue_number(self): + """Results filename uses issue_number when provided.""" + result = _build_check_and_fix_prompt("pid-1", "issue", issue_number=871) + assert "871-implement-results.json" in result + def test_includes_repeat_workflow(self): """Includes repeat-up-to-3-times workflow.""" result = _build_check_and_fix_prompt("pid-1", "local") @@ -1559,6 +1570,58 @@ def test_no_issue_number_omits_issue_line(self): assert "Issue: #" not in result +class TestNamespacedOutputFilenames: + """Tests for namespaced (identifier-prefixed) output filenames in prompts.""" + + def test_architect_prompt_uses_issue_number(self): + """Architect prompt references {issue_number}-architect-output.json.""" + result = _build_agent_prompt( + role_value="architect", + phase="plan", + pipeline_id="issue-871", + pipeline_mode="issue", + prompt="# Feature", + issue_number=871, + ) + assert "871-architect-output.json" in result + + def test_risk_analyst_prompt_uses_issue_number(self): + """Risk analyst prompt references {issue_number}-risk_analyst-output.json.""" + result = _build_agent_prompt( + role_value="risk_analyst", + phase="plan", + pipeline_id="issue-871", + pipeline_mode="issue", + prompt="# Feature", + issue_number=871, + ) + assert "871-risk_analyst-output.json" in result + + def test_integrator_prompt_uses_issue_number(self): + """Integrator prompt references {issue_number}-integrator-output.json.""" + result = _build_agent_prompt( + role_value="integrator", + phase="implement", + pipeline_id="issue-871", + pipeline_mode="issue", + prompt="# Feature", + issue_number=871, + ) + assert "871-integrator-output.json" in result + + def test_prompt_falls_back_to_pipeline_id(self): + """Without issue_number, prompt uses pipeline_id as identifier.""" + result = _build_agent_prompt( + role_value="architect", + phase="plan", + pipeline_id="local-abc123", + pipeline_mode="local", + prompt="# Feature", + issue_number=None, + ) + assert "local-abc123-architect-output.json" in result + + class TestBuildPhaseScopedPromptEdgeCases: """Edge cases for _build_phase_scoped_prompt() plan overview behavior.""" @@ -1742,6 +1805,34 @@ def test_with_gaps_and_failures(self, tmp_path): assert "No error handling for invalid input" in result assert "Missing boundary check in parse()" in result + def test_prefixed_file_preferred(self, tmp_path): + """Prefixed file is preferred over global file when identifier given.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "tester-output.json").write_text( + json.dumps({"tests_failed": 1, "gaps_found": ["old-gap"]}) + ) + (outputs_dir / "871-tester-output.json").write_text( + json.dumps({"tests_failed": 3, "gaps_found": ["prefixed-gap"]}) + ) + + result = _read_tester_gaps(tmp_path, identifier=871) + assert result is not None + assert "prefixed-gap" in result + assert "old-gap" not in result + + def test_fallback_to_global_with_identifier(self, tmp_path): + """Falls back to global path when prefixed file does not exist.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "tester-output.json").write_text( + json.dumps({"tests_failed": 1, "gaps_found": ["global-gap"]}) + ) + + result = _read_tester_gaps(tmp_path, identifier=871) + assert result is not None + assert "global-gap" in result + def test_no_gaps_returns_none(self, tmp_path): """Returns None when no gaps or failures found.""" outputs_dir = tmp_path / ".egg-state" / "agent-outputs" @@ -2027,3 +2118,253 @@ def test_phase_prompt_prior_feedback_header_with_tester_findings(self): assert "The reviewer and tester found issues with your previous work" in result # Should NOT contain the reviewer-only language assert "found issues with your previous draft" not in result + + +class TestReadTesterGapsNamespacedEdgeCases: + """Additional edge-case tests for _read_tester_gaps with identifier.""" + + def test_no_identifier_uses_global_ignores_prefixed(self, tmp_path): + """When identifier=None, only global file is used even if prefixed exists.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "871-tester-output.json").write_text( + json.dumps({"tests_failed": 5, "gaps_found": ["prefixed-gap"]}) + ) + (outputs_dir / "tester-output.json").write_text( + json.dumps({"tests_failed": 1, "gaps_found": ["global-gap"]}) + ) + + result = _read_tester_gaps(tmp_path, identifier=None) + assert result is not None + assert "global-gap" in result + assert "prefixed-gap" not in result + + def test_neither_file_with_identifier_returns_none(self, tmp_path): + """Returns None when identifier given but neither file exists.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + # No files at all + + result = _read_tester_gaps(tmp_path, identifier=871) + assert result is None + + def test_corrupted_prefixed_falls_back_to_none(self, tmp_path): + """Corrupted prefixed file returns None (no fallback to global in this function).""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + # Prefixed file is corrupt + (outputs_dir / "871-tester-output.json").write_text("NOT JSON{{{") + + # _read_tester_gaps selects the prefixed file because it exists, + # then fails to parse → returns None. + result = _read_tester_gaps(tmp_path, identifier=871) + assert result is None + + def test_string_identifier_prefixed_file(self, tmp_path): + """String identifiers work for _read_tester_gaps.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "local-xyz-tester-output.json").write_text( + json.dumps({"tests_failed": 2, "gaps_found": ["gap-a"]}) + ) + + result = _read_tester_gaps(tmp_path, identifier="local-xyz") + assert result is not None + assert "gap-a" in result + + +class TestAutofixPromptNamespaced: + """Tests for _build_autofix_prompt with namespaced results filename.""" + + def test_uses_issue_number_in_results_filename(self): + """Results filename references issue_number when provided.""" + result = _build_autofix_prompt( + pipeline_id="issue-871", + pipeline_mode="issue", + check_results={"checks": [{"name": "pytest", "passed": False, "output": "fail"}]}, + issue_number=871, + ) + assert "871-implement-results.json" in result + + def test_falls_back_to_pipeline_id(self): + """Falls back to pipeline_id when issue_number is None.""" + result = _build_autofix_prompt( + pipeline_id="local-abc", + pipeline_mode="local", + check_results={"checks": []}, + issue_number=None, + ) + assert "local-abc-implement-results.json" in result + + +class TestSynthesizePlanDraftNamespaced: + """Tests for _synthesize_plan_draft with namespaced agent output filenames.""" + + def test_reads_prefixed_agent_outputs(self, tmp_path): + """Reads {identifier}-architect-output.json when present.""" + # Set up draft path structure + drafts_dir = tmp_path / ".egg-state" / "drafts" + drafts_dir.mkdir(parents=True) + + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "871-architect-output.json").write_text( + json.dumps({"content": "Architecture analysis for issue 871"}) + ) + (outputs_dir / "871-risk_analyst-output.json").write_text( + json.dumps({"content": "Risk assessment for issue 871"}) + ) + + _synthesize_plan_draft( + repo_path=tmp_path, + pipeline_id="issue-871", + pipeline_mode="issue", + issue_number=871, + ) + + draft_path = tmp_path / ".egg-state" / "drafts" / "871-plan.md" + assert draft_path.exists() + content = draft_path.read_text() + assert "Architecture analysis for issue 871" in content + assert "Risk assessment for issue 871" in content + + def test_falls_back_to_global_agent_outputs(self, tmp_path): + """Falls back to global filenames when prefixed files missing.""" + drafts_dir = tmp_path / ".egg-state" / "drafts" + drafts_dir.mkdir(parents=True) + + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + # Only global files exist — content must exceed _MIN_PLAN_DRAFT_CONTENT_LENGTH (50) + long_content = "Global architecture analysis with detailed design decisions and component interactions for the feature" + (outputs_dir / "architect-output.json").write_text(json.dumps({"content": long_content})) + + _synthesize_plan_draft( + repo_path=tmp_path, + pipeline_id="issue-871", + pipeline_mode="issue", + issue_number=871, + ) + + draft_path = tmp_path / ".egg-state" / "drafts" / "871-plan.md" + assert draft_path.exists() + content = draft_path.read_text() + assert long_content in content + + def test_prefixed_preferred_over_global(self, tmp_path): + """When both prefixed and global exist, prefixed wins.""" + drafts_dir = tmp_path / ".egg-state" / "drafts" + drafts_dir.mkdir(parents=True) + + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "architect-output.json").write_text( + json.dumps( + { + "content": "Old global architecture output that should be ignored when prefixed version is available" + } + ) + ) + (outputs_dir / "871-architect-output.json").write_text( + json.dumps( + { + "content": "New prefixed architecture output with detailed design decisions and component interactions" + } + ) + ) + + _synthesize_plan_draft( + repo_path=tmp_path, + pipeline_id="issue-871", + pipeline_mode="issue", + issue_number=871, + ) + + draft_path = tmp_path / ".egg-state" / "drafts" / "871-plan.md" + assert draft_path.exists() + content = draft_path.read_text() + assert "New prefixed architecture output" in content + assert "Old global architecture output" not in content + + def test_does_not_overwrite_existing_draft(self, tmp_path): + """Existing draft is not overwritten by synthesis.""" + drafts_dir = tmp_path / ".egg-state" / "drafts" + drafts_dir.mkdir(parents=True) + draft_path = drafts_dir / "871-plan.md" + draft_path.write_text("Existing plan from task_planner") + + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "871-architect-output.json").write_text( + json.dumps({"content": "Architecture from architect"}) + ) + + _synthesize_plan_draft( + repo_path=tmp_path, + pipeline_id="issue-871", + pipeline_mode="issue", + issue_number=871, + ) + + assert draft_path.read_text() == "Existing plan from task_planner" + + def test_local_mode_uses_pipeline_id(self, tmp_path): + """In local mode, uses pipeline_id for both draft path and output lookup.""" + drafts_dir = tmp_path / ".egg-state" / "drafts" + drafts_dir.mkdir(parents=True) + + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + long_content = "Local mode architecture analysis with detailed design decisions, component interactions, and implementation strategy" + (outputs_dir / "local-abc-architect-output.json").write_text( + json.dumps({"content": long_content}) + ) + + _synthesize_plan_draft( + repo_path=tmp_path, + pipeline_id="local-abc", + pipeline_mode="local", + issue_number=None, + ) + + draft_path = tmp_path / ".egg-state" / "drafts" / "local-abc-plan.md" + assert draft_path.exists() + content = draft_path.read_text() + assert long_content in content + + def test_no_outputs_dir_no_crash(self, tmp_path): + """No agent-outputs directory does not crash.""" + drafts_dir = tmp_path / ".egg-state" / "drafts" + drafts_dir.mkdir(parents=True) + + # No agent-outputs directory at all + _synthesize_plan_draft( + repo_path=tmp_path, + pipeline_id="issue-871", + pipeline_mode="issue", + issue_number=871, + ) + + draft_path = tmp_path / ".egg-state" / "drafts" / "871-plan.md" + assert not draft_path.exists() + + def test_empty_outputs_produce_no_draft(self, tmp_path): + """Agent outputs with empty content do not produce a draft.""" + drafts_dir = tmp_path / ".egg-state" / "drafts" + drafts_dir.mkdir(parents=True) + + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "871-architect-output.json").write_text(json.dumps({"content": ""})) + (outputs_dir / "871-risk_analyst-output.json").write_text(json.dumps({"content": " "})) + + _synthesize_plan_draft( + repo_path=tmp_path, + pipeline_id="issue-871", + pipeline_mode="issue", + issue_number=871, + ) + + draft_path = tmp_path / ".egg-state" / "drafts" / "871-plan.md" + # No meaningful content → draft not written + assert not draft_path.exists() diff --git a/sandbox/.claude/commands/coder-mode.md b/sandbox/.claude/commands/coder-mode.md index 0218f3233..1bb1ea383 100644 --- a/sandbox/.claude/commands/coder-mode.md +++ b/sandbox/.claude/commands/coder-mode.md @@ -41,9 +41,13 @@ egg-contract show Create this file when done: +Output filenames are prefixed with the issue number or pipeline ID +(e.g., `871-coder-output.json` for issue #871): + ```bash mkdir -p .egg-state/agent-outputs -cat > .egg-state/agent-outputs/coder-output.json << 'EOF' +IDENT="${EGG_ISSUE_NUMBER:-$EGG_PIPELINE_ID}" +cat > ".egg-state/agent-outputs/${IDENT}-coder-output.json" << 'EOF' { "changed_files": [ "path/to/file1.py", diff --git a/sandbox/.claude/commands/documenter-mode.md b/sandbox/.claude/commands/documenter-mode.md index 2aa26a03e..a2ef482c2 100644 --- a/sandbox/.claude/commands/documenter-mode.md +++ b/sandbox/.claude/commands/documenter-mode.md @@ -18,7 +18,7 @@ You are the **Documenter** agent in a multi-agent SDLC pipeline. This mode activ ## Workflow -1. **Read Coder handoff**: Check `.egg-state/agent-outputs/coder-output.json` +1. **Read Coder handoff**: Check `.egg-state/agent-outputs/{identifier}-coder-output.json` 2. **Analyze changes**: Understand what was implemented 3. **Identify docs to update**: Find relevant documentation 4. **Update documentation**: Keep docs accurate and helpful @@ -26,9 +26,13 @@ You are the **Documenter** agent in a multi-agent SDLC pipeline. This mode activ ## Reading Coder Output +Output filenames are prefixed with the issue number or pipeline ID +(e.g., `871-coder-output.json` for issue #871): + ```bash # Read the coder's handoff -cat .egg-state/agent-outputs/coder-output.json +IDENT="${EGG_ISSUE_NUMBER:-$EGG_PIPELINE_ID}" +cat ".egg-state/agent-outputs/${IDENT}-coder-output.json" # This gives you: # - changed_files: List of files the coder modified @@ -65,9 +69,12 @@ cat .egg-state/agent-outputs/coder-output.json Create this file when done: +Output filenames are prefixed with the issue number or pipeline ID: + ```bash mkdir -p .egg-state/agent-outputs -cat > .egg-state/agent-outputs/documenter-output.json << 'EOF' +IDENT="${EGG_ISSUE_NUMBER:-$EGG_PIPELINE_ID}" +cat > ".egg-state/agent-outputs/${IDENT}-documenter-output.json" << 'EOF' { "doc_files": [ "docs/guides/feature.md", diff --git a/sandbox/.claude/commands/integrator-mode.md b/sandbox/.claude/commands/integrator-mode.md index 0d2790c25..5d4129e52 100644 --- a/sandbox/.claude/commands/integrator-mode.md +++ b/sandbox/.claude/commands/integrator-mode.md @@ -26,15 +26,20 @@ The Integrator is read-only for the codebase. You validate but do not modify. ## Reading Agent Outputs +Output filenames are prefixed with the issue number or pipeline ID +(e.g., `871-coder-output.json` for issue #871): + ```bash +IDENT="${EGG_ISSUE_NUMBER:-$EGG_PIPELINE_ID}" + # Read coder output -cat .egg-state/agent-outputs/coder-output.json +cat ".egg-state/agent-outputs/${IDENT}-coder-output.json" # Read tester output -cat .egg-state/agent-outputs/tester-output.json +cat ".egg-state/agent-outputs/${IDENT}-tester-output.json" # Read documenter output (if available) -cat .egg-state/agent-outputs/documenter-output.json +cat ".egg-state/agent-outputs/${IDENT}-documenter-output.json" ``` ## Validation Steps @@ -78,9 +83,12 @@ npm run lint Create this file when done: +Output filenames are prefixed with the issue number or pipeline ID: + ```bash mkdir -p .egg-state/agent-outputs -cat > .egg-state/agent-outputs/integrator-output.json << 'EOF' +IDENT="${EGG_ISSUE_NUMBER:-$EGG_PIPELINE_ID}" +cat > ".egg-state/agent-outputs/${IDENT}-integrator-output.json" << 'EOF' { "status": "success", "tests": { diff --git a/sandbox/.claude/commands/tester-mode.md b/sandbox/.claude/commands/tester-mode.md index cd223c5cd..47e9b1414 100644 --- a/sandbox/.claude/commands/tester-mode.md +++ b/sandbox/.claude/commands/tester-mode.md @@ -18,7 +18,7 @@ You are the **Tester** agent in a multi-agent SDLC pipeline. This mode activates ## Workflow -1. **Read Coder handoff**: Check `.egg-state/agent-outputs/coder-output.json` +1. **Read Coder handoff**: Check `.egg-state/agent-outputs/{identifier}-coder-output.json` 2. **Analyze changes**: Understand what was implemented 3. **Identify gaps**: Look for missing error handling, boundary conditions, uncovered branches, and integration gaps 4. **Write tests**: Cover new functionality, edge cases, and identified gaps @@ -28,9 +28,13 @@ You are the **Tester** agent in a multi-agent SDLC pipeline. This mode activates ## Reading Coder Output +Output filenames are prefixed with the issue number or pipeline ID +(e.g., `871-coder-output.json` for issue #871): + ```bash # Read the coder's handoff -cat .egg-state/agent-outputs/coder-output.json +IDENT="${EGG_ISSUE_NUMBER:-$EGG_PIPELINE_ID}" +cat ".egg-state/agent-outputs/${IDENT}-coder-output.json" # This gives you: # - changed_files: List of files the coder modified @@ -59,9 +63,12 @@ When reviewing the coder's implementation, actively look for: Create this file when done: +Output filenames are prefixed with the issue number or pipeline ID: + ```bash mkdir -p .egg-state/agent-outputs -cat > .egg-state/agent-outputs/tester-output.json << 'EOF' +IDENT="${EGG_ISSUE_NUMBER:-$EGG_PIPELINE_ID}" +cat > ".egg-state/agent-outputs/${IDENT}-tester-output.json" << 'EOF' { "test_files": [ "tests/test_new_feature.py", diff --git a/shared/egg_contracts/orchestrator.py b/shared/egg_contracts/orchestrator.py index af5e036ed..3bbc922e0 100644 --- a/shared/egg_contracts/orchestrator.py +++ b/shared/egg_contracts/orchestrator.py @@ -320,24 +320,44 @@ def get_dispatch_for_contract(contract: Contract) -> DispatchDecision: return orchestrator.get_next_dispatch() -def load_agent_output(repo_path: Path, role: AgentRole) -> dict[str, Any]: +def load_agent_output( + repo_path: Path, + role: AgentRole, + identifier: int | str | None = None, +) -> dict[str, Any]: """Load the handoff output from an agent. Args: repo_path: Path to the repository role: The agent role + identifier: Pipeline/issue identifier for namespaced paths. + When provided, looks for ``{identifier}-{role}-output.json`` + first, falling back to the old ``{role}-output.json`` path. + When ``None``, uses the old path directly (backward compat). Returns: Parsed output data (empty dict if not found) """ - output_file = repo_path / ".egg-state" / "agent-outputs" / f"{role.value}-output.json" + output_dir = repo_path / ".egg-state" / "agent-outputs" - if not output_file.exists(): + if identifier is not None: + prefixed_file = output_dir / f"{identifier}-{role.value}-output.json" + if prefixed_file.exists(): + try: + with prefixed_file.open() as f: + result: dict[str, Any] = json.load(f) + return result + except (json.JSONDecodeError, OSError): + return {} + # Fall back to old global path + global_file = output_dir / f"{role.value}-output.json" + + if not global_file.exists(): return {} try: - with output_file.open() as f: - result: dict[str, Any] = json.load(f) + with global_file.open() as f: + result = json.load(f) return result except (json.JSONDecodeError, OSError): return {} @@ -347,6 +367,7 @@ def save_agent_output( repo_path: Path, role: AgentRole, outputs: dict[str, Any], + identifier: int | str | None = None, ) -> Path: """Save handoff output for an agent. @@ -354,6 +375,10 @@ def save_agent_output( repo_path: Path to the repository role: The agent role outputs: The output data to save + identifier: Pipeline/issue identifier for namespaced paths. + When provided, writes to ``{identifier}-{role}-output.json``. + When ``None``, writes to the old ``{role}-output.json`` path + (backward compat). Returns: Path to the saved file @@ -361,7 +386,10 @@ def save_agent_output( output_dir = repo_path / ".egg-state" / "agent-outputs" output_dir.mkdir(parents=True, exist_ok=True) - output_file = output_dir / f"{role.value}-output.json" + if identifier is not None: + output_file = output_dir / f"{identifier}-{role.value}-output.json" + else: + output_file = output_dir / f"{role.value}-output.json" with output_file.open("w") as f: json.dump(outputs, f, indent=2) @@ -372,6 +400,7 @@ def save_agent_output( def collect_handoff_data( repo_path: Path, target_role: AgentRole, + identifier: int | str | None = None, ) -> dict[str, Any]: """Collect all handoff data for a target agent. @@ -381,6 +410,8 @@ def collect_handoff_data( Args: repo_path: Path to the repository target_role: The agent role to collect data for + identifier: Pipeline/issue identifier for namespaced paths. + Forwarded to :func:`load_agent_output` for each dependency. Returns: Combined handoff data from dependencies @@ -389,7 +420,7 @@ def collect_handoff_data( handoff = {} for dep_role in role_def.dependencies: - dep_output = load_agent_output(repo_path, dep_role) + dep_output = load_agent_output(repo_path, dep_role, identifier=identifier) if dep_output: handoff[dep_role.value] = dep_output diff --git a/shared/egg_contracts/tests/test_orchestrator.py b/shared/egg_contracts/tests/test_orchestrator.py new file mode 100644 index 000000000..14e35ab02 --- /dev/null +++ b/shared/egg_contracts/tests/test_orchestrator.py @@ -0,0 +1,235 @@ +""" +Tests for load_agent_output / save_agent_output identifier-prefixed paths. + +Verifies the backward-compat fallback behavior: +1. Prefixed path exists → used +2. Only old global path exists → fallback reads it +3. Neither exists → empty dict returned +4. Both exist → prefixed path takes priority +""" + +import json +from pathlib import Path + +from egg_contracts.agent_roles import AgentRole +from egg_contracts.orchestrator import ( + collect_handoff_data, + load_agent_output, + save_agent_output, +) + + +class TestLoadAgentOutputIdentifier: + """Tests for load_agent_output with identifier parameter.""" + + def test_prefixed_path_used(self, tmp_path: Path): + """When prefixed path exists, it is returned.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "871-coder-output.json").write_text(json.dumps({"key": "prefixed"})) + + result = load_agent_output(tmp_path, AgentRole.CODER, identifier=871) + assert result == {"key": "prefixed"} + + def test_fallback_to_global_path(self, tmp_path: Path): + """When only old global path exists, fallback reads it.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "coder-output.json").write_text(json.dumps({"key": "global"})) + + result = load_agent_output(tmp_path, AgentRole.CODER, identifier=871) + assert result == {"key": "global"} + + def test_neither_exists_returns_empty(self, tmp_path: Path): + """When neither file exists, returns empty dict.""" + result = load_agent_output(tmp_path, AgentRole.CODER, identifier=871) + assert result == {} + + def test_prefixed_takes_priority(self, tmp_path: Path): + """When both prefixed and global exist, prefixed takes priority.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "coder-output.json").write_text(json.dumps({"key": "global"})) + (outputs_dir / "871-coder-output.json").write_text(json.dumps({"key": "prefixed"})) + + result = load_agent_output(tmp_path, AgentRole.CODER, identifier=871) + assert result == {"key": "prefixed"} + + def test_no_identifier_uses_global(self, tmp_path: Path): + """When identifier is None, uses global path directly.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "coder-output.json").write_text(json.dumps({"key": "global"})) + + result = load_agent_output(tmp_path, AgentRole.CODER) + assert result == {"key": "global"} + + def test_string_identifier(self, tmp_path: Path): + """String identifiers (e.g. local pipeline IDs) work.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "local-abc123-coder-output.json").write_text(json.dumps({"key": "local"})) + + result = load_agent_output(tmp_path, AgentRole.CODER, identifier="local-abc123") + assert result == {"key": "local"} + + +class TestSaveAgentOutputIdentifier: + """Tests for save_agent_output with identifier parameter.""" + + def test_save_with_identifier(self, tmp_path: Path): + """Saves to prefixed path when identifier provided.""" + path = save_agent_output(tmp_path, AgentRole.CODER, {"key": "value"}, identifier=871) + assert path.name == "871-coder-output.json" + assert json.loads(path.read_text()) == {"key": "value"} + + def test_save_without_identifier(self, tmp_path: Path): + """Saves to global path when identifier is None.""" + path = save_agent_output(tmp_path, AgentRole.CODER, {"key": "value"}) + assert path.name == "coder-output.json" + assert json.loads(path.read_text()) == {"key": "value"} + + +class TestLoadAgentOutputErrorHandling: + """Edge-case and error-handling tests for load_agent_output.""" + + def test_corrupted_prefixed_file_returns_empty(self, tmp_path: Path): + """Corrupted prefixed file returns {} without falling through to global.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "871-coder-output.json").write_text("NOT VALID JSON{{{") + (outputs_dir / "coder-output.json").write_text(json.dumps({"key": "global"})) + + # The prefixed file exists but is corrupt — returns empty dict, + # does NOT fall through to global file. + result = load_agent_output(tmp_path, AgentRole.CODER, identifier=871) + assert result == {} + + def test_corrupted_global_file_returns_empty(self, tmp_path: Path): + """Corrupted global file returns {} when no identifier provided.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "coder-output.json").write_text("{invalid json}") + + result = load_agent_output(tmp_path, AgentRole.CODER) + assert result == {} + + def test_empty_prefixed_file_returns_empty(self, tmp_path: Path): + """Empty prefixed file triggers JSONDecodeError, returns {}.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "871-coder-output.json").write_text("") + + result = load_agent_output(tmp_path, AgentRole.CODER, identifier=871) + assert result == {} + + def test_integer_zero_identifier(self, tmp_path: Path): + """Identifier of 0 (falsy int) is still treated as a valid identifier.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "0-coder-output.json").write_text(json.dumps({"key": "zero"})) + + result = load_agent_output(tmp_path, AgentRole.CODER, identifier=0) + assert result == {"key": "zero"} + + def test_no_identifier_ignores_prefixed_file(self, tmp_path: Path): + """When identifier=None, prefixed files are ignored entirely.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "871-coder-output.json").write_text(json.dumps({"key": "prefixed"})) + + # No global file, identifier=None → empty dict (prefixed file ignored) + result = load_agent_output(tmp_path, AgentRole.CODER, identifier=None) + assert result == {} + + def test_all_agent_roles_with_identifier(self, tmp_path: Path): + """All agent roles produce correctly-prefixed filenames.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + for role in [AgentRole.CODER, AgentRole.TESTER, AgentRole.DOCUMENTER, AgentRole.INTEGRATOR]: + (outputs_dir / f"42-{role.value}-output.json").write_text( + json.dumps({"role": role.value}) + ) + + for role in [AgentRole.CODER, AgentRole.TESTER, AgentRole.DOCUMENTER, AgentRole.INTEGRATOR]: + result = load_agent_output(tmp_path, role, identifier=42) + assert result == {"role": role.value} + + +class TestSaveAgentOutputEdgeCases: + """Edge-case tests for save_agent_output.""" + + def test_round_trip_with_identifier(self, tmp_path: Path): + """save then load with same identifier returns original data.""" + data = {"changed_files": ["a.py", "b.py"], "summary": "test changes"} + save_agent_output(tmp_path, AgentRole.TESTER, data, identifier=871) + result = load_agent_output(tmp_path, AgentRole.TESTER, identifier=871) + assert result == data + + def test_round_trip_without_identifier(self, tmp_path: Path): + """save then load without identifier returns original data.""" + data = {"changed_files": ["x.py"]} + save_agent_output(tmp_path, AgentRole.CODER, data) + result = load_agent_output(tmp_path, AgentRole.CODER) + assert result == data + + def test_save_creates_directory(self, tmp_path: Path): + """save_agent_output creates the output directory if missing.""" + path = save_agent_output(tmp_path, AgentRole.CODER, {"k": "v"}, identifier=99) + assert path.exists() + assert path.parent.name == "agent-outputs" + + def test_save_overwrites_existing(self, tmp_path: Path): + """Saving twice with the same identifier overwrites the file.""" + save_agent_output(tmp_path, AgentRole.CODER, {"v": 1}, identifier=10) + save_agent_output(tmp_path, AgentRole.CODER, {"v": 2}, identifier=10) + result = load_agent_output(tmp_path, AgentRole.CODER, identifier=10) + assert result == {"v": 2} + + def test_different_identifiers_coexist(self, tmp_path: Path): + """Files for different identifiers do not interfere.""" + save_agent_output(tmp_path, AgentRole.CODER, {"id": "a"}, identifier=100) + save_agent_output(tmp_path, AgentRole.CODER, {"id": "b"}, identifier=200) + + assert load_agent_output(tmp_path, AgentRole.CODER, identifier=100) == {"id": "a"} + assert load_agent_output(tmp_path, AgentRole.CODER, identifier=200) == {"id": "b"} + + +class TestCollectHandoffDataIdentifier: + """Tests for collect_handoff_data with identifier parameter.""" + + def test_collects_from_prefixed_paths(self, tmp_path: Path): + """collect_handoff_data forwards identifier to load_agent_output.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "871-coder-output.json").write_text(json.dumps({"files": ["main.py"]})) + + # TESTER depends on CODER + result = collect_handoff_data(tmp_path, AgentRole.TESTER, identifier=871) + assert "coder" in result + assert result["coder"]["files"] == ["main.py"] + + def test_collect_fallback_to_global(self, tmp_path: Path): + """collect_handoff_data falls back to global path when prefixed missing.""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "coder-output.json").write_text(json.dumps({"files": ["legacy.py"]})) + + result = collect_handoff_data(tmp_path, AgentRole.TESTER, identifier=999) + assert "coder" in result + assert result["coder"]["files"] == ["legacy.py"] + + def test_collect_no_deps_returns_empty(self, tmp_path: Path): + """Role with no dependencies returns empty dict.""" + # CODER has no dependencies + result = collect_handoff_data(tmp_path, AgentRole.CODER, identifier=871) + assert result == {} + + def test_collect_without_identifier(self, tmp_path: Path): + """collect_handoff_data works without identifier (backward compat).""" + outputs_dir = tmp_path / ".egg-state" / "agent-outputs" + outputs_dir.mkdir(parents=True) + (outputs_dir / "coder-output.json").write_text(json.dumps({"files": ["old.py"]})) + + result = collect_handoff_data(tmp_path, AgentRole.TESTER) + assert "coder" in result