diff --git a/flake.lock b/flake.lock index 79497ddd..d19f7376 100644 --- a/flake.lock +++ b/flake.lock @@ -149,11 +149,11 @@ ] }, "locked": { - "lastModified": 1770770348, - "narHash": "sha256-A2GzkmzdYvdgmMEu5yxW+xhossP+txrYb7RuzRaqhlg=", + "lastModified": 1771245058, + "narHash": "sha256-KD4AAqrQtSXFOLBd4vo+s8uaEaZMR38csgSZwklfwi4=", "owner": "pyproject-nix", "repo": "uv2nix", - "rev": "5d1b2cb4fe3158043fbafbbe2e46238abbc954b0", + "rev": "978264ee45387bcce80b9938493acfae648d7c92", "type": "github" }, "original": { diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py index 8b055de3..d6dcbb3a 100644 --- a/src/deepwork/mcp/quality_gate.py +++ b/src/deepwork/mcp/quality_gate.py @@ -263,6 +263,7 @@ async def _build_payload( self, outputs: dict[str, str | list[str]], project_root: Path, + notes: str | None = None, ) -> str: """Build the user prompt payload with output file contents. @@ -272,6 +273,7 @@ async def _build_payload( Args: outputs: Map of output names to file path(s) project_root: Project root path for reading files + notes: Optional notes from the agent about work done Returns: Formatted payload with output file contents or path listing @@ -297,6 +299,11 @@ async def _build_payload( parts.extend(output_sections) parts.append(f"{SECTION_SEPARATOR} END OUTPUTS {SECTION_SEPARATOR}") + if notes: + parts.append(f"{SECTION_SEPARATOR} AUTHOR NOTES {SECTION_SEPARATOR}") + parts.append(notes) + parts.append(f"{SECTION_SEPARATOR} END AUTHOR NOTES {SECTION_SEPARATOR}") + if not parts: return "[No files provided]" @@ -370,6 +377,8 @@ async def build_review_instructions_file( parts.append("") # Build outputs listing (uses self.max_inline_files to decide inline vs path-only) + # Notes are handled separately below in the "Author Notes" section, + # so we don't pass them to _build_payload here. payload = await self._build_payload(outputs, project_root) parts.append(payload) parts.append("") @@ -445,12 +454,12 @@ async def build_review_instructions_file( def compute_timeout(file_count: int) -> int: """Compute dynamic timeout based on number of files. - Base timeout is 120 seconds. For every file beyond the first 5, - add 30 seconds. Examples: - - 3 files -> 120s - - 5 files -> 120s - - 10 files -> 120 + 30*5 = 270s (4.5 min) - - 20 files -> 120 + 30*15 = 570s (9.5 min) + Base timeout is 240 seconds (4 minutes). For every file beyond + the first 5, add 30 seconds. Examples: + - 3 files -> 240s + - 5 files -> 240s + - 10 files -> 240 + 30*5 = 390s (6.5 min) + - 20 files -> 240 + 30*15 = 690s (11.5 min) Args: file_count: Total number of files being reviewed @@ -458,7 +467,7 @@ def compute_timeout(file_count: int) -> int: Returns: Timeout in seconds """ - return 120 + 30 * max(0, file_count - 5) + return 240 + 30 * max(0, file_count - 5) async def evaluate( self, @@ -502,7 +511,7 @@ async def evaluate( notes=notes, additional_review_guidance=additional_review_guidance, ) - payload = await self._build_payload(outputs, project_root) + payload = await self._build_payload(outputs, project_root, notes=notes) # Dynamic timeout: more files = more time for the reviewer file_count = len(self._flatten_output_paths(outputs)) diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml index 8bb58a70..f77e380f 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml +++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml @@ -54,9 +54,9 @@ steps: reviews: - run_each: job.yml quality_criteria: - "Intermediate Deliverables": "Does the job break out across the logical steps such that there are reviewable intermediate deliverables?" + "Intermediate Deliverables": "The job breaks out across logical steps with reviewable intermediate deliverables." "Reviews": | - Are there reviews defined for each step? Do particularly critical documents have their own reviews? + Reviews are defined for each step. Particularly critical documents have their own reviews. Note that the reviewers do not have transcript access, so if the criteria are about the conversation, then add a `.deepwork/tmp/[step_summary].md` step output file so the agent has a communication channel to the reviewer. @@ -78,13 +78,13 @@ steps: - run_each: step_instruction_files additional_review_guidance: "Read the job.yml file in the same job directory for context on how this instruction file fits into the larger workflow." quality_criteria: - "Complete Instructions": "Is the instruction file complete (no stubs or placeholders)?" - "Specific & Actionable": "Are instructions tailored to the step's purpose, not generic?" - "Output Examples": "Does the instruction file show what good output looks like? This can be either template examples, or negative examples of what not to do. Only required if the step has ouputs" - "Quality Criteria": "Does the instruction file define quality criteria for its outputs?" - "Ask Structured Questions": "If this step gathers user input, do instructions explicitly use the phrase 'ask structured questions'? If the step has no user inputs, this criterion passes automatically." - "Prompt Engineering": "Does the instruction file follow Anthropic's best practices for prompt engineering?" - "No Redundant Info": "Does the instruction file avoid duplicating information that belongs in the job.yml's common_job_info_provided_to_all_steps_at_runtime section? Shared context (project background, terminology, conventions) should be in common_job_info, not repeated in each step." + "Complete Instructions": "The instruction file is complete (no stubs or placeholders)." + "Specific & Actionable": "Instructions are tailored to the step's purpose, not generic." + "Output Examples": "The instruction file shows what good output looks like. This can be either template examples, or negative examples of what not to do. Only required if the step has outputs." + "Quality Criteria": "The instruction file defines quality criteria for its outputs." + "Ask Structured Questions": "If this step gathers user input, instructions explicitly use the phrase 'ask structured questions'. If the step has no user inputs, this criterion passes automatically." + "Prompt Engineering": "The instruction file follows Anthropic's best practices for prompt engineering." + "No Redundant Info": "The instruction file avoids duplicating information that belongs in the job.yml's common_job_info_provided_to_all_steps_at_runtime section. Shared context (project background, terminology, conventions) is in common_job_info, not repeated in each step." - id: test name: "Test the New Workflow" @@ -106,11 +106,11 @@ steps: reviews: - run_each: step quality_criteria: - "Workflow Invoked": "Was the new workflow actually run on the user's test case via MCP?" - "Output Critiqued": "Did the agent identify up to 3 top issues with the output?" - "User Feedback Gathered": "Did the agent ask the user about each issue and gather additional feedback?" - "Corrections Made": "Were all requested corrections applied to the output?" - "User Satisfied": "Did the user confirm the output meets their needs?" + "Workflow Invoked": "The new workflow was actually run on the user's test case via MCP." + "Output Critiqued": "The agent identified up to 3 top issues with the output." + "User Feedback Gathered": "The agent asked the user about each issue and gathered additional feedback." + "Corrections Made": "All requested corrections were applied to the output." + "User Satisfied": "The user confirmed the output meets their needs." - id: iterate name: "Iterate on Workflow Design" @@ -170,14 +170,14 @@ steps: reviews: - run_each: step quality_criteria: - "Conversation Analyzed": "Did the agent review the conversation for DeepWork job executions?" - "Confusion Identified": "Did the agent identify points of confusion, errors, or inefficiencies?" - "Instructions Improved": "Were job instructions updated to address identified issues?" - "Instructions Concise": "Are instructions free of redundancy and unnecessary verbosity?" - "Shared Content Extracted": "Is lengthy/duplicated content extracted into referenced files?" - "Bespoke Learnings Captured": "Were run-specific learnings added to AGENTS.md?" - "File References Used": "Do AGENTS.md entries reference other files where appropriate?" - "Working Folder Correct": "Is AGENTS.md in the correct working folder for the job?" + "Conversation Analyzed": "The agent reviewed the conversation for DeepWork job executions." + "Confusion Identified": "The agent identified points of confusion, errors, or inefficiencies." + "Instructions Improved": "Job instructions were updated to address identified issues." + "Instructions Concise": "Instructions are free of redundancy and unnecessary verbosity." + "Shared Content Extracted": "Lengthy/duplicated content is extracted into referenced files." + "Bespoke Learnings Captured": "Run-specific learnings were added to AGENTS.md." + "File References Used": "AGENTS.md entries reference other files where appropriate." + "Working Folder Correct": "AGENTS.md is in the correct working folder for the job." - id: fix_settings name: "Fix Settings Files" @@ -193,15 +193,14 @@ steps: reviews: - run_each: step quality_criteria: - "DeepWork Skills Removed": "Are `Skill(...)` entries matching jobs in `.deepwork/jobs/` removed?" - "Non-DeepWork Skills Preserved": "Are skills NOT matching DeepWork jobs left intact?" - "Stale make_new_job.sh Removed": "Are stale `Bash(...)` permissions referencing `.deepwork/jobs/deepwork_jobs/make_new_job.sh` removed?" - "Rules Hooks Removed": "Are all DeepWork Rules hooks and permissions removed?" - "Duplicate Hooks Removed": "Are duplicate hook entries consolidated or removed?" - "Hardcoded Paths Removed": "Are user-specific hardcoded paths (like `/Users/*/...`) removed?" - "Deprecated Commands Removed": "Are deprecated commands like `deepwork hook *` removed?" - "Valid JSON": "Is settings.json still valid JSON after modifications?" - "Backup Created": "Was a backup of the original settings created before modifications?" + "DeepWork Skills Removed": "`Skill(...)` entries matching jobs in `.deepwork/jobs/` are removed." + "Non-DeepWork Skills Preserved": "Skills NOT matching DeepWork jobs are left intact." + "Stale make_new_job.sh Removed": "Stale `Bash(...)` permissions referencing `.deepwork/jobs/deepwork_jobs/make_new_job.sh` are removed." + "Rules Hooks Removed": "All DeepWork Rules hooks and permissions are removed." + "Duplicate Hooks Removed": "Duplicate hook entries are consolidated or removed." + "Hardcoded Paths Removed": "User-specific hardcoded paths (like `/Users/*/...`) are removed." + "Deprecated Commands Removed": "Deprecated commands like `deepwork hook *` are removed." + "Backup Created": "A backup of the original settings was created before modifications." - id: fix_jobs name: "Fix Job Definitions" @@ -225,12 +224,12 @@ steps: - run_each: step additional_review_guidance: "Read the .claude/settings.json file for context on what settings were cleaned up in the prior step." quality_criteria: - "Exposed Field Addressed": "Are `exposed: true` fields removed or noted as deprecated?" - "Stop Hooks Migrated": "Are `stop_hooks` migrated to `hooks.after_agent` format?" - "Removed Steps Cleaned": "Are references to removed steps (like `review_job_spec`) updated?" - "Orphaned Steps Fixed": "For jobs with no workflows, is there a single workflow (named after the job) containing all steps? For jobs with existing workflows, does each orphan get its own workflow (named after the step)?" - "Promise Lines Removed": "Are deprecated `Quality Criteria Met` lines removed from step instruction .md files?" - "Valid YAML": "Are all job.yml files valid YAML?" + "Exposed Field Addressed": "`exposed: true` fields are removed or noted as deprecated." + "Stop Hooks Migrated": "`stop_hooks` are migrated to `hooks.after_agent` format." + "Removed Steps Cleaned": "References to removed steps (like `review_job_spec`) are updated." + "Orphaned Steps Fixed": "For jobs with no workflows, there is a single workflow (named after the job) containing all steps. For jobs with existing workflows, each orphan gets its own workflow (named after the step)." + "Promise Lines Removed": "Step instructions do not include anything about `Quality Criteria Met`." + "job.ymls are readable": "Calling `get_workflows` from the Deepwork tool shows all expected jobs. If any are missing, its YML is likely bad." - id: errata name: "Clean Up Errata" @@ -245,13 +244,9 @@ steps: - fix_jobs reviews: - run_each: step - additional_review_guidance: "Check the .deepwork/jobs/ directory and .claude/skills/ directory to verify the cleanup was done correctly." + additional_review_guidance: "You should do this in a small number or turns - tee up every data request you need in your first call. Do not invoke sub-agents." quality_criteria: - "Legacy Job Skills Removed": "Are legacy skill folders for each job removed from `.claude/skills/` and `.gemini/skills/`?" - "Deepwork Skill Preserved": "Does the `deepwork` skill folder still exist in `.claude/skills/deepwork/`?" - "Temp Files Cleaned": "Are `.deepwork/tmp/` contents cleaned appropriately?" - "Rules Folder Removed": "Is `.deepwork/rules/` folder backed up and removed (fully deprecated)?" - "Rules Job Removed": "Is `.deepwork/jobs/deepwork_rules/` removed if present?" - "Config Version Updated": "Is `.deepwork/config.yml` using current version format?" - "DeepWork Re-installed": "Was `deepwork install` run after cleanup, and does it complete without errors?" - "Git Status Clean": "Are changes ready to be committed (no untracked garbage files)?" + "Legacy Job Skills Removed": "Legacy skill folders for each job are removed from `.claude/skills/` and `.gemini/skills/`." + "Deepwork Skill Preserved": "The `deepwork` skill folder still exists in `.claude/skills/deepwork/`." + "Rules Folder Removed": "`.deepwork/rules/` folder is gone." + "Rules Job Removed": "`.deepwork/jobs/deepwork_rules/` is gone." diff --git a/src/deepwork/standard_jobs/deepwork_jobs/research_report_job_best_practices.md b/src/deepwork/standard_jobs/deepwork_jobs/research_report_job_best_practices.md index 87e65615..e0e93c28 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/research_report_job_best_practices.md +++ b/src/deepwork/standard_jobs/deepwork_jobs/research_report_job_best_practices.md @@ -150,16 +150,16 @@ reviews: # Content review - is the analysis sound? - run_each: final_report.md quality_criteria: - "Claims Cited": "Is every factual claim backed by a specific source or query from the dataroom?" - "Questions Answered": "Are all research questions from the scoping document addressed?" - "Depth": "Does the analysis go beyond surface-level observations to root causes or actionable insights?" + "Claims Cited": "Every factual claim is backed by a specific source or query from the dataroom." + "Questions Answered": "All research questions from the scoping document are addressed." + "Depth": "The analysis goes beyond surface-level observations to root causes or actionable insights." # Presentation review - is the output polished? - run_each: final_report.md quality_criteria: - "Readable Flow": "Does the document flow logically for someone reading it without prior context?" - "Audience Fit": "Is the language and detail level appropriate for the intended audience?" - "Visual Quality": "Do all charts, tables, and figures render correctly and add value?" + "Readable Flow": "The document flows logically for someone reading it without prior context." + "Audience Fit": "The language and detail level are appropriate for the intended audience." + "Visual Quality": "All charts, tables, and figures render correctly and add value." ``` ### Capability Considerations diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md index d870f1f8..cb4307ca 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md +++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md @@ -203,18 +203,18 @@ For final outputs, reviews let you make sure the output meets the user's expecta **Reviews format:** -Each review specifies `run_each` (what to review) and `quality_criteria` (a map of criterion name to question): +Each review specifies `run_each` (what to review) and `quality_criteria` (a map of criterion name to a statement describing the expected state after the step completes — NOT a question): ```yaml reviews: - run_each: step # Review all outputs together quality_criteria: - "Consistent Style": "Do all files follow the same structure?" - "Complete Coverage": "Are all required topics covered?" + "Consistent Style": "All files follow the same structure." + "Complete Coverage": "All required topics are covered." - run_each: report_files # Review each file in a 'files'-type output individually quality_criteria: - "Well Written": "Is the content clear and well-organized?" - "Data-Backed": "Are claims supported by data?" + "Well Written": "Content is clear and well-organized." + "Data-Backed": "Claims are supported by data." ``` **`run_each` options:** @@ -229,11 +229,11 @@ reviews: - run_each: report_files additional_review_guidance: "Read the comparison_matrix.md file for context on whether claims in the report are supported by the analysis data." quality_criteria: - "Data-Backed": "Are recommendations supported by the competitive analysis data?" + "Data-Backed": "Recommendations are supported by the competitive analysis data." - run_each: step_instruction_files additional_review_guidance: "Read the job.yml file in the same job directory for context on how this instruction file fits into the larger workflow." quality_criteria: - "Complete Instructions": "Is the instruction file complete?" + "Complete Instructions": "The instruction file is complete." ``` **When to use `additional_review_guidance`:** diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md index 8fb07d10..be9a3415 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md +++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md @@ -224,15 +224,15 @@ steps: ### Step 7: Migrate `quality_criteria` to `reviews` -The flat `quality_criteria` field on steps has been replaced by the `reviews` array. Each review specifies `run_each` (what to review) and `quality_criteria` as a map of criterion name to question. +The flat `quality_criteria` field on steps has been replaced by the `reviews` array. Each review specifies `run_each` (what to review) and `quality_criteria` as a map of criterion name to a statement describing the expected state (not a question). **Before (deprecated):** ```yaml steps: - id: my_step quality_criteria: - - "**Complete**: Is the output complete?" - - "**Accurate**: Is the data accurate?" + - "**Complete**: The output is complete." + - "**Accurate**: The data is accurate." ``` **After (current format):** @@ -242,13 +242,13 @@ steps: reviews: - run_each: step quality_criteria: - "Complete": "Is the output complete?" - "Accurate": "Is the data accurate?" + "Complete": "The output is complete." + "Accurate": "The data is accurate." ``` **Migration rules:** -1. **Parse the old format**: Each string typically follows `**Name**: Question` format. Extract the name (bold text) as the map key and the question as the value. +1. **Parse the old format**: Each string typically follows `**Name**: Question/Statement` format. Extract the name (bold text) as the map key and convert the value to a statement of expected state (not a question). 2. **Choose `run_each`**: Default to `step` (reviews all outputs together). If the step has a single primary output, consider using that output name instead. 3. **For steps with no quality_criteria**: Use `reviews: []` 4. **Remove the old field**: Delete the `quality_criteria` array entirely after migration. diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md index f87d8365..e6771604 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md +++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md @@ -66,9 +66,9 @@ If a step in the job.yml has `reviews` defined, the generated instruction file s reviews: - run_each: research_notes.md quality_criteria: - "Sufficient Data": "Does each competitor have at least 3 data points?" - "Sources Cited": "Are sources cited for key claims?" - "Current Information": "Is the information current (within last year)?" + "Sufficient Data": "Each competitor has at least 3 data points." + "Sources Cited": "Sources are cited for key claims." + "Current Information": "Information is current (within last year)." ``` **The instruction file should include:** diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md index 33a80c8a..0abdef3e 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md +++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md @@ -86,15 +86,15 @@ Review and update quality reviews in two places: reviews: - run_each: step quality_criteria: - "Formatted Correctly": "Is the report formatted correctly?" + "Formatted Correctly": "The report is formatted correctly." # After reviews: - run_each: report.md quality_criteria: - "Distinct Colors": "Does the report use distinct colors for each data series in charts?" - "Readable Tables": "Do tables have sufficient padding and font size for readability?" - "Clear Summary": "Is the executive summary understandable by non-technical readers?" + "Distinct Colors": "The report uses distinct colors for each data series in charts." + "Readable Tables": "Tables have sufficient padding and font size for readability." + "Clear Summary": "The executive summary is understandable by non-technical readers." ``` ### Step 5: Consider Alternative Tools diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md index 0d9bc406..2136ed5a 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md +++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md @@ -88,7 +88,7 @@ For each generalizable learning: - Include helpful examples - Clarify ambiguous instructions - Update quality criteria if needed - - If you identify problems in the outcomes of steps, those usually should be reflected in an update to the `reviews` for that step in `job.yml` (adjusting criteria names, questions, or `run_each` targeting) + - If you identify problems in the outcomes of steps, those usually should be reflected in an update to the `reviews` for that step in `job.yml` (adjusting criteria names, statements, or `run_each` targeting) 3. **Keep instructions concise** - Avoid redundancy - don't repeat the same guidance in multiple places diff --git a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example index 5ba528ad..dcc71e92 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example +++ b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example @@ -54,9 +54,9 @@ steps: reviews: - run_each: research_notes.md quality_criteria: - "Sufficient Data": "Does each competitor have at least 3 data points?" - "Sources Cited": "Are sources cited for key claims?" - "Current Information": "Is the information current (within last year)?" + "Sufficient Data": "Each competitor has at least 3 data points." + "Sources Cited": "Sources are cited for key claims." + "Current Information": "Information is current (within last year)." - id: comparative_analysis name: "Comparative Analysis" @@ -92,5 +92,5 @@ steps: - run_each: positioning_report.md additional_review_guidance: "Read the comparison_matrix.md file to verify that recommendations are grounded in the competitive analysis data." quality_criteria: - "Actionable": "Are recommendations specific and actionable?" - "Data-Backed": "Are recommendations supported by the competitive analysis data?" + "Actionable": "Recommendations are specific and actionable." + "Data-Backed": "Recommendations are supported by the competitive analysis data." diff --git a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template index 386d83da..c2256161 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template +++ b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template @@ -44,8 +44,8 @@ steps: # Optional: tell the reviewer what files to read for context # additional_review_guidance: "Read the [filename] for context on [what]." quality_criteria: - "[Criterion Name]": "[Question to evaluate]" - "[Another Criterion]": "[Another question]" + "[Criterion Name]": "[Statement of expected state after the step — NOT a question]" + "[Another Criterion]": "[Another statement of expected state]" # Optional: Delegate to a specific agent type (uses context: fork) # agent: general-purpose # or other agent type diff --git a/tests/unit/mcp/test_quality_gate.py b/tests/unit/mcp/test_quality_gate.py index b2c56c6a..1409f32d 100644 --- a/tests/unit/mcp/test_quality_gate.py +++ b/tests/unit/mcp/test_quality_gate.py @@ -625,16 +625,16 @@ class TestComputeTimeout: """Tests for QualityGate.compute_timeout.""" def test_base_timeout_for_few_files(self) -> None: - """Test that <=5 files gives base 120s timeout.""" - assert QualityGate.compute_timeout(0) == 120 - assert QualityGate.compute_timeout(1) == 120 - assert QualityGate.compute_timeout(5) == 120 + """Test that <=5 files gives base 240s (4 min) timeout.""" + assert QualityGate.compute_timeout(0) == 240 + assert QualityGate.compute_timeout(1) == 240 + assert QualityGate.compute_timeout(5) == 240 def test_timeout_increases_after_five(self) -> None: """Test that each file after 5 adds 30 seconds.""" - assert QualityGate.compute_timeout(6) == 150 - assert QualityGate.compute_timeout(10) == 270 # 120 + 5*30 - assert QualityGate.compute_timeout(20) == 570 # 120 + 15*30 + assert QualityGate.compute_timeout(6) == 270 + assert QualityGate.compute_timeout(10) == 390 # 240 + 5*30 + assert QualityGate.compute_timeout(20) == 690 # 240 + 15*30 class TestDynamicTimeout: @@ -653,8 +653,8 @@ async def test_timeout_passed_to_cli(self, mock_cli: ClaudeCLI, project_root: Pa ) call_kwargs = mock_cli.run.call_args.kwargs - # 1 file -> timeout = 120 - assert call_kwargs["timeout"] == 120 + # 1 file -> timeout = 240 + assert call_kwargs["timeout"] == 240 async def test_timeout_scales_with_file_count( self, mock_cli: ClaudeCLI, project_root: Path @@ -672,8 +672,8 @@ async def test_timeout_scales_with_file_count( ) call_kwargs = mock_cli.run.call_args.kwargs - # 10 files -> 120 + 5*30 = 270 - assert call_kwargs["timeout"] == 270 + # 10 files -> 240 + 5*30 = 390 + assert call_kwargs["timeout"] == 390 class TestMockQualityGate: