diff --git a/.claude/settings.json b/.claude/settings.json index 4d45bb0d..b7190a76 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -155,6 +155,17 @@ } ] } + ], + "SubagentStop": [ + { + "matcher": "", + "hooks": [ + { + "type": "command", + "command": "python -m deepwork.hooks.rules_check" + } + ] + } ] } } \ No newline at end of file diff --git a/.claude/skills/add_platform.add_capabilities/SKILL.md b/.claude/skills/add_platform.add_capabilities/SKILL.md index f00fd713..b9d76df3 100644 --- a/.claude/skills/add_platform.add_capabilities/SKILL.md +++ b/.claude/skills/add_platform.add_capabilities/SKILL.md @@ -18,6 +18,21 @@ hooks: If ALL criteria are met, include `✓ Quality Criteria Met`. + SubagentStop: + - hooks: + - type: prompt + prompt: | + Verify the capability additions meet ALL criteria: + 1. Any new hooks from the platform (for slash commands only) are added to src/deepwork/schemas/job_schema.py + 2. All existing adapters in src/deepwork/adapters.py are updated with the new hook fields + (set to None/null if the platform doesn't support that hook) + 3. Only hooks available on slash command definitions are added (not general CLI hooks) + 4. job_schema.py remains valid Python with no syntax errors + 5. adapters.py remains consistent - all adapters have the same hook fields + 6. If no new hooks are needed, document why in a comment + + If ALL criteria are met, include `✓ Quality Criteria Met`. + --- # add_platform.add_capabilities diff --git a/.claude/skills/add_platform.implement/SKILL.md b/.claude/skills/add_platform.implement/SKILL.md index eceb86ef..44722b65 100644 --- a/.claude/skills/add_platform.implement/SKILL.md +++ b/.claude/skills/add_platform.implement/SKILL.md @@ -22,6 +22,25 @@ hooks: If ALL criteria are met, include `✓ Quality Criteria Met`. + SubagentStop: + - hooks: + - type: command + command: ".deepwork/jobs/add_platform/hooks/run_tests.sh" + - type: prompt + prompt: | + Verify the implementation meets ALL criteria: + 1. Platform adapter class is added to src/deepwork/adapters.py + 2. Templates exist in src/deepwork/templates// with appropriate command structure + 3. Tests exist for all new functionality + 4. Test coverage is 100% for new code (run: uv run pytest --cov) + 5. All tests pass + 6. README.md is updated with: + - New platform listed in supported platforms + - Installation instructions for the platform + - Any platform-specific notes + + If ALL criteria are met, include `✓ Quality Criteria Met`. + --- # add_platform.implement diff --git a/.claude/skills/add_platform.research/SKILL.md b/.claude/skills/add_platform.research/SKILL.md index ff7e489e..af44f2d3 100644 --- a/.claude/skills/add_platform.research/SKILL.md +++ b/.claude/skills/add_platform.research/SKILL.md @@ -19,6 +19,22 @@ hooks: If ALL criteria are met, include `✓ Quality Criteria Met`. + SubagentStop: + - hooks: + - type: prompt + prompt: | + Verify the research output meets ALL criteria: + 1. Both files exist in doc/platforms//: cli_configuration.md and hooks_system.md + 2. Each file has a comment at the top with: + - Last updated date + - Source URL where the documentation was obtained + 3. cli_configuration.md covers how the platform's CLI is configured + 4. hooks_system.md covers hooks available for slash command definitions ONLY + 5. No extraneous documentation (only these two specific topics) + 6. Documentation is comprehensive enough to implement the platform + + If ALL criteria are met, include `✓ Quality Criteria Met`. + --- # add_platform.research diff --git a/.claude/skills/add_platform.verify/SKILL.md b/.claude/skills/add_platform.verify/SKILL.md index 67b20801..583101f2 100644 --- a/.claude/skills/add_platform.verify/SKILL.md +++ b/.claude/skills/add_platform.verify/SKILL.md @@ -17,6 +17,20 @@ hooks: If ALL criteria are met, include `✓ Quality Criteria Met`. + SubagentStop: + - hooks: + - type: prompt + prompt: | + Verify the installation meets ALL criteria: + 1. Platform-specific directories/files are added to the deepwork repo as needed + 2. Running `deepwork install --platform ` completes without errors + 3. Expected command files are created in the platform's command directory + 4. Command file content matches the templates and job definitions + 5. Established DeepWork jobs (deepwork_jobs, deepwork_rules) are installed correctly + 6. The platform can be used alongside existing platforms without conflicts + + If ALL criteria are met, include `✓ Quality Criteria Met`. + --- # add_platform.verify diff --git a/.claude/skills/commit.commit_and_push/SKILL.md b/.claude/skills/commit.commit_and_push/SKILL.md index a1aa3dad..7c96d0df 100644 --- a/.claude/skills/commit.commit_and_push/SKILL.md +++ b/.claude/skills/commit.commit_and_push/SKILL.md @@ -14,6 +14,17 @@ hooks: 4. Changes were pushed to remote If ALL criteria are met, include `✓ Quality Criteria Met`. + SubagentStop: + - hooks: + - type: prompt + prompt: | + Verify the commit is ready: + 1. Changed files list was reviewed by the agent + 2. Files match what was modified during this session (or unexpected changes were investigated) + 3. Commit was created with appropriate message + 4. Changes were pushed to remote + If ALL criteria are met, include `✓ Quality Criteria Met`. + --- # commit.commit_and_push diff --git a/.claude/skills/commit.lint/SKILL.md b/.claude/skills/commit.lint/SKILL.md index 9e8ddf4d..1caa6bf0 100644 --- a/.claude/skills/commit.lint/SKILL.md +++ b/.claude/skills/commit.lint/SKILL.md @@ -13,6 +13,16 @@ hooks: 3. No remaining lint errors If ALL criteria are met, include `✓ Quality Criteria Met`. + SubagentStop: + - hooks: + - type: prompt + prompt: | + Verify the linting is complete: + 1. ruff format was run successfully + 2. ruff check was run successfully (with --fix) + 3. No remaining lint errors + If ALL criteria are met, include `✓ Quality Criteria Met`. + --- # commit.lint diff --git a/.claude/skills/commit.review/SKILL.md b/.claude/skills/commit.review/SKILL.md index 3c339def..08ed14f4 100644 --- a/.claude/skills/commit.review/SKILL.md +++ b/.claude/skills/commit.review/SKILL.md @@ -13,6 +13,16 @@ hooks: 3. All identified issues were addressed or documented as intentional If ALL criteria are met, include `✓ Quality Criteria Met`. + SubagentStop: + - hooks: + - type: prompt + prompt: | + Verify the code review is complete: + 1. Changed files were identified + 2. Sub-agent reviewed the code for general issues, DRY opportunities, naming clarity, and test coverage + 3. All identified issues were addressed or documented as intentional + If ALL criteria are met, include `✓ Quality Criteria Met`. + --- # commit.review diff --git a/.claude/skills/commit.test/SKILL.md b/.claude/skills/commit.test/SKILL.md index d4813e87..79229595 100644 --- a/.claude/skills/commit.test/SKILL.md +++ b/.claude/skills/commit.test/SKILL.md @@ -14,6 +14,17 @@ hooks: 4. Test output shows passing status If ALL criteria are met, include `✓ Quality Criteria Met`. + SubagentStop: + - hooks: + - type: prompt + prompt: | + Verify the tests are passing: + 1. Latest code was pulled from the branch + 2. All tests completed successfully + 3. No test failures or errors remain + 4. Test output shows passing status + If ALL criteria are met, include `✓ Quality Criteria Met`. + --- # commit.test diff --git a/.claude/skills/deepwork_jobs.define/SKILL.md b/.claude/skills/deepwork_jobs.define/SKILL.md index 0215a6c7..c77c043a 100644 --- a/.claude/skills/deepwork_jobs.define/SKILL.md +++ b/.claude/skills/deepwork_jobs.define/SKILL.md @@ -34,6 +34,37 @@ hooks: If criteria are NOT met OR the promise tag is missing, respond with: {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} + SubagentStop: + - hooks: + - type: prompt + prompt: | + You must evaluate whether Claude has met all the below quality criteria for the request. + + ## Quality Criteria + + 1. **User Understanding**: Did the agent fully understand the user's workflow by asking structured questions? + 2. **Structured Questions Used**: Did the agent ask structured questions (using the AskUserQuestion tool) to gather user input? + 3. **Document Detection**: For document-oriented workflows, did the agent detect patterns and offer doc spec creation? + 4. **doc spec Created (if applicable)**: If a doc spec was needed, was it created in `.deepwork/doc_specs/[doc_spec_name].md` with proper quality criteria? + 5. **doc spec References**: Are document outputs properly linked to their doc specs using `{file, doc_spec}` format? + 6. **Valid Against doc spec**: Does the job.yml conform to the job.yml doc spec quality criteria (valid identifier, semantic version, concise summary, rich description, complete steps, valid dependencies)? + 7. **Clear Inputs/Outputs**: Does every step have clearly defined inputs and outputs? + 8. **Logical Dependencies**: Do step dependencies make sense and avoid circular references? + 9. **Concise Summary**: Is the summary under 200 characters and descriptive? + 10. **Rich Description**: Does the description provide enough context for future refinement? + 11. **Valid Schema**: Does the job.yml follow the required schema (name, version, summary, steps)? + 12. **File Created**: Has the job.yml file been created in `.deepwork/jobs/[job_name]/job.yml`? + + ## Instructions + + Review the conversation and determine if ALL quality criteria above have been satisfied. + Look for evidence that each criterion has been addressed. + + If the agent has included `✓ Quality Criteria Met` in their response AND + all criteria appear to be met, respond with: {"ok": true} + + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} --- # deepwork_jobs.define diff --git a/.claude/skills/deepwork_jobs.implement/SKILL.md b/.claude/skills/deepwork_jobs.implement/SKILL.md index 28af7b70..9ad9bdcf 100644 --- a/.claude/skills/deepwork_jobs.implement/SKILL.md +++ b/.claude/skills/deepwork_jobs.implement/SKILL.md @@ -31,6 +31,34 @@ hooks: If criteria are NOT met OR the promise tag is missing, respond with: {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} + SubagentStop: + - hooks: + - type: prompt + prompt: | + You must evaluate whether Claude has met all the below quality criteria for the request. + + ## Quality Criteria + + 1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly? + 2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)? + 3. **Specific & Actionable**: Are instructions tailored to each step's purpose, not generic? + 4. **Output Examples**: Does each instruction file show what good output looks like? + 5. **Quality Criteria**: Does each instruction file define quality criteria for its outputs? + 6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"? + 7. **Sync Complete**: Has `deepwork sync` been run successfully? + 8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`? + 9. **Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful. + + ## Instructions + + Review the conversation and determine if ALL quality criteria above have been satisfied. + Look for evidence that each criterion has been addressed. + + If the agent has included `✓ Quality Criteria Met` in their response AND + all criteria appear to be met, respond with: {"ok": true} + + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} --- # deepwork_jobs.implement diff --git a/.claude/skills/deepwork_jobs.learn/SKILL.md b/.claude/skills/deepwork_jobs.learn/SKILL.md index da29dad5..cecc8e6f 100644 --- a/.claude/skills/deepwork_jobs.learn/SKILL.md +++ b/.claude/skills/deepwork_jobs.learn/SKILL.md @@ -33,6 +33,37 @@ hooks: If criteria are NOT met OR the promise tag is missing, respond with: {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} + SubagentStop: + - hooks: + - type: prompt + prompt: | + You must evaluate whether Claude has met all the below quality criteria for the request. + + ## Quality Criteria + + 1. **Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions? + 2. **Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies? + 3. **Instructions Improved**: Were job instructions updated to address identified issues? + 4. **Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity? + 5. **Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files? + 6. **doc spec Reviewed (if applicable)**: For jobs with doc spec outputs, were doc spec-related learnings identified? + 7. **doc spec Updated (if applicable)**: Were doc spec files updated with improved quality criteria or structure? + 8. **Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md? + 9. **File References Used**: Do AGENTS.md entries reference other files where appropriate? + 10. **Working Folder Correct**: Is AGENTS.md in the correct working folder for the job? + 11. **Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md? + 12. **Sync Complete**: Has `deepwork sync` been run if instructions were modified? + + ## Instructions + + Review the conversation and determine if ALL quality criteria above have been satisfied. + Look for evidence that each criterion has been addressed. + + If the agent has included `✓ Quality Criteria Met` in their response AND + all criteria appear to be met, respond with: {"ok": true} + + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} --- # deepwork_jobs.learn diff --git a/.claude/skills/deepwork_jobs.review_job_spec/SKILL.md b/.claude/skills/deepwork_jobs.review_job_spec/SKILL.md index 85a881b1..25915621 100644 --- a/.claude/skills/deepwork_jobs.review_job_spec/SKILL.md +++ b/.claude/skills/deepwork_jobs.review_job_spec/SKILL.md @@ -26,6 +26,29 @@ hooks: If criteria are NOT met OR the promise tag is missing, respond with: {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} + SubagentStop: + - hooks: + - type: prompt + prompt: | + You must evaluate whether Claude has met all the below quality criteria for the request. + + ## Quality Criteria + + 1. **Sub-Agent Used**: Was a sub-agent spawned to provide unbiased review? + 2. **All doc spec Criteria Evaluated**: Did the sub-agent assess all 9 quality criteria? + 3. **Findings Addressed**: Were all failed criteria addressed by the main agent? + 4. **Validation Loop Complete**: Did the review-fix cycle continue until all criteria passed? + + ## Instructions + + Review the conversation and determine if ALL quality criteria above have been satisfied. + Look for evidence that each criterion has been addressed. + + If the agent has included `✓ Quality Criteria Met` in their response AND + all criteria appear to be met, respond with: {"ok": true} + + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} --- # deepwork_jobs.review_job_spec diff --git a/.claude/skills/manual_tests.run_fire_tests/SKILL.md b/.claude/skills/manual_tests.run_fire_tests/SKILL.md index c9c4813c..65bb5a9b 100644 --- a/.claude/skills/manual_tests.run_fire_tests/SKILL.md +++ b/.claude/skills/manual_tests.run_fire_tests/SKILL.md @@ -14,7 +14,32 @@ hooks: 1. **Sub-Agents Used**: Did the main agent spawn a sub-agent (using the Task tool) for EACH test? The main agent must NOT edit the test files directly. 2. **Serial Execution**: Were sub-agents launched ONE AT A TIME (not in parallel) to prevent cross-contamination? 3. **Hooks Fired Automatically**: Did the main agent observe the blocking hooks firing automatically when each sub-agent returned? The agent must NOT manually run the rules_check command. - 4. **Git Reverted Between Tests**: Was `git checkout -- manual_tests/` run between each test to prevent cross-contamination? + 4. **Git Reverted Between Tests**: Was `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json` run between each test to prevent cross-contamination? + 5. **All Tests Run**: Were all 8 'should fire' tests executed (trigger/safety, set, pair, command action, multi safety, infinite block prompt, infinite block command, created)? + 6. **Results Recorded**: Did the main agent track pass/fail status for each test case? + + ## Instructions + + Review the conversation and determine if ALL quality criteria above have been satisfied. + Look for evidence that each criterion has been addressed. + + If the agent has included `✓ Quality Criteria Met` in their response AND + all criteria appear to be met, respond with: {"ok": true} + + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} + SubagentStop: + - hooks: + - type: prompt + prompt: | + You must evaluate whether Claude has met all the below quality criteria for the request. + + ## Quality Criteria + + 1. **Sub-Agents Used**: Did the main agent spawn a sub-agent (using the Task tool) for EACH test? The main agent must NOT edit the test files directly. + 2. **Serial Execution**: Were sub-agents launched ONE AT A TIME (not in parallel) to prevent cross-contamination? + 3. **Hooks Fired Automatically**: Did the main agent observe the blocking hooks firing automatically when each sub-agent returned? The agent must NOT manually run the rules_check command. + 4. **Git Reverted Between Tests**: Was `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json` run between each test to prevent cross-contamination? 5. **All Tests Run**: Were all 8 'should fire' tests executed (trigger/safety, set, pair, command action, multi safety, infinite block prompt, infinite block command, created)? 6. **Results Recorded**: Did the main agent track pass/fail status for each test case? @@ -84,9 +109,22 @@ For EACH test below, follow this cycle: 1. **Launch a sub-agent** using the Task tool (use a fast model like haiku) 2. **Wait for the sub-agent to complete** 3. **Observe whether the hook fired automatically** - you should see a blocking prompt or command output -4. **Record the result** - pass if hook fired, fail if it didn't -5. **Revert changes**: `git checkout -- manual_tests/` -6. **Proceed to the next test** +4. **If no visible blocking occurred, check the queue**: + ```bash + ls -la .deepwork/tmp/rules/queue/ + cat .deepwork/tmp/rules/queue/*.json 2>/dev/null + ``` + - If queue entries exist with status "queued", the hook DID fire but blocking wasn't visible + - If queue is empty, the hook did NOT fire at all + - Record the queue status along with the result +5. **Record the result** - pass if hook fired (visible block OR queue entry), fail if neither +6. **Revert changes and clear queue**: + ```bash + git checkout -- manual_tests/ + rm -rf .deepwork/tmp/rules/queue/*.json 2>/dev/null || true + ``` + The queue must be cleared because rules that have been shown (status=QUEUED) won't fire again until cleared. +7. **Proceed to the next test** **IMPORTANT**: Only launch ONE sub-agent at a time. Wait for it to complete and revert before launching the next. @@ -128,22 +166,27 @@ For EACH test below, follow this cycle: Record the result after each test: -| Test Case | Should Fire | Hook Fired? | Result | -|-----------|-------------|:-----------:|:------:| -| Trigger/Safety | Edit .py only | | | -| Set Mode | Edit _source.py only | | | -| Pair Mode | Edit _trigger.py only | | | -| Command Action | Edit .txt | | | -| Multi Safety | Edit .py only | | | -| Infinite Block Prompt | Edit .py (no promise) | | | -| Infinite Block Command | Edit .py (no promise) | | | -| Created Mode | Create NEW .yml | | | +| Test Case | Should Fire | Visible Block? | Queue Entry? | Result | +|-----------|-------------|:--------------:|:------------:|:------:| +| Trigger/Safety | Edit .py only | | | | +| Set Mode | Edit _source.py only | | | | +| Pair Mode | Edit _trigger.py only | | | | +| Command Action | Edit .txt | | | | +| Multi Safety | Edit .py only | | | | +| Infinite Block Prompt | Edit .py (no promise) | | | | +| Infinite Block Command | Edit .py (no promise) | | | | +| Created Mode | Create NEW .yml | | | | + +**Queue Entry Status Guide:** +- If queue has entry with status "queued" → Hook fired, rule was shown to agent +- If queue has entry with status "passed" → Hook fired, rule was satisfied +- If queue is empty → Hook did NOT fire ## Quality Criteria - **Sub-agents spawned**: All 8 tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly - **Serial execution**: Sub-agents were launched ONE AT A TIME, not in parallel -- **Git reverted between tests**: `git checkout -- manual_tests/` was run after each test +- **Git reverted and queue cleared between tests**: `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json` was run after each test - **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check - hooks fired AUTOMATICALLY - **Blocking behavior verified**: For each test, the appropriate blocking hook fired automatically when the sub-agent returned - **Results recorded**: Pass/fail status was recorded for each test @@ -219,7 +262,7 @@ Stop hooks will automatically validate your work. The loop continues until all c 1. **Sub-Agents Used**: Did the main agent spawn a sub-agent (using the Task tool) for EACH test? The main agent must NOT edit the test files directly. 2. **Serial Execution**: Were sub-agents launched ONE AT A TIME (not in parallel) to prevent cross-contamination? 3. **Hooks Fired Automatically**: Did the main agent observe the blocking hooks firing automatically when each sub-agent returned? The agent must NOT manually run the rules_check command. -4. **Git Reverted Between Tests**: Was `git checkout -- manual_tests/` run between each test to prevent cross-contamination? +4. **Git Reverted Between Tests**: Was `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json` run between each test to prevent cross-contamination? 5. **All Tests Run**: Were all 8 'should fire' tests executed (trigger/safety, set, pair, command action, multi safety, infinite block prompt, infinite block command, created)? 6. **Results Recorded**: Did the main agent track pass/fail status for each test case? diff --git a/.claude/skills/manual_tests.run_not_fire_tests/SKILL.md b/.claude/skills/manual_tests.run_not_fire_tests/SKILL.md index c7f0c74c..c3f2248c 100644 --- a/.claude/skills/manual_tests.run_not_fire_tests/SKILL.md +++ b/.claude/skills/manual_tests.run_not_fire_tests/SKILL.md @@ -15,7 +15,31 @@ hooks: 2. **Parallel Execution**: Were multiple sub-agents launched in parallel (in a single message with multiple Task tool calls)? 3. **Hooks Observed**: Did the main agent observe that no blocking hooks fired when the sub-agents returned? The hooks fire AUTOMATICALLY - the agent must NOT manually run the rules_check command. 4. **All Tests Run**: Were all 8 'should NOT fire' tests executed (trigger/safety, set, pair forward, pair reverse, multi safety, infinite block prompt, infinite block command, created)? - 5. **Git Reverted**: Were changes reverted after tests completed using `git checkout -- manual_tests/`? + 5. **Git Reverted**: Were changes reverted and queue cleared after tests completed using `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json`? + + ## Instructions + + Review the conversation and determine if ALL quality criteria above have been satisfied. + Look for evidence that each criterion has been addressed. + + If the agent has included `✓ Quality Criteria Met` in their response AND + all criteria appear to be met, respond with: {"ok": true} + + If criteria are NOT met OR the promise tag is missing, respond with: + {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} + SubagentStop: + - hooks: + - type: prompt + prompt: | + You must evaluate whether Claude has met all the below quality criteria for the request. + + ## Quality Criteria + + 1. **Sub-Agents Used**: Did the main agent spawn sub-agents (using the Task tool) to make the file edits? The main agent must NOT edit the test files directly. + 2. **Parallel Execution**: Were multiple sub-agents launched in parallel (in a single message with multiple Task tool calls)? + 3. **Hooks Observed**: Did the main agent observe that no blocking hooks fired when the sub-agents returned? The hooks fire AUTOMATICALLY - the agent must NOT manually run the rules_check command. + 4. **All Tests Run**: Were all 8 'should NOT fire' tests executed (trigger/safety, set, pair forward, pair reverse, multi safety, infinite block prompt, infinite block command, created)? + 5. **Git Reverted**: Were changes reverted and queue cleared after tests completed using `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json`? ## Instructions @@ -109,14 +133,15 @@ Run all 8 "should NOT fire" tests in **parallel** sub-agents, then verify no blo | Infinite Block Command | Promise tag | | | Created Mode | Modify existing | | -4. **Revert all changes** +4. **Revert all changes and clear queue** After all tests complete, run: ```bash git checkout -- manual_tests/ + rm -rf .deepwork/tmp/rules/queue/*.json 2>/dev/null || true ``` - This cleans up the test files before the "should fire" tests run. + This cleans up the test files AND clears the rules queue before the "should fire" tests run. The queue must be cleared because rules that have already been shown to the agent (status=QUEUED) won't fire again until the queue is cleared. ## Quality Criteria @@ -124,7 +149,7 @@ Run all 8 "should NOT fire" tests in **parallel** sub-agents, then verify no blo - **Parallel execution**: All 8 sub-agents were launched in a single message (parallel) - **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check - **No unexpected blocks**: All tests passed - no blocking hooks fired -- **Changes reverted**: `git checkout -- manual_tests/` was run after tests completed +- **Changes reverted and queue cleared**: `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json` was run after tests completed - When all criteria are met, include `✓ Quality Criteria Met` in your response ## Reference @@ -193,7 +218,7 @@ Stop hooks will automatically validate your work. The loop continues until all c 2. **Parallel Execution**: Were multiple sub-agents launched in parallel (in a single message with multiple Task tool calls)? 3. **Hooks Observed**: Did the main agent observe that no blocking hooks fired when the sub-agents returned? The hooks fire AUTOMATICALLY - the agent must NOT manually run the rules_check command. 4. **All Tests Run**: Were all 8 'should NOT fire' tests executed (trigger/safety, set, pair forward, pair reverse, multi safety, infinite block prompt, infinite block command, created)? -5. **Git Reverted**: Were changes reverted after tests completed using `git checkout -- manual_tests/`? +5. **Git Reverted**: Were changes reverted and queue cleared after tests completed using `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json`? **To complete**: Include `✓ Quality Criteria Met` in your final response only after verifying ALL criteria are satisfied. diff --git a/.claude/skills/update.job/SKILL.md b/.claude/skills/update.job/SKILL.md index 60038425..19ab7fb0 100644 --- a/.claude/skills/update.job/SKILL.md +++ b/.claude/skills/update.job/SKILL.md @@ -14,6 +14,17 @@ hooks: 4. Command files in .claude/commands/ were regenerated If ALL criteria are met, include `✓ Quality Criteria Met`. + SubagentStop: + - hooks: + - type: prompt + prompt: | + Verify the update process completed successfully: + 1. Changes were made in src/deepwork/standard_jobs/[job_name]/ (NOT in .deepwork/jobs/) + 2. `deepwork install --platform claude` was run + 3. Files in .deepwork/jobs/ match the source files + 4. Command files in .claude/commands/ were regenerated + If ALL criteria are met, include `✓ Quality Criteria Met`. + --- # update.job diff --git a/.deepwork/jobs/manual_tests/job.yml b/.deepwork/jobs/manual_tests/job.yml index d35fe02d..8e06f359 100644 --- a/.deepwork/jobs/manual_tests/job.yml +++ b/.deepwork/jobs/manual_tests/job.yml @@ -1,5 +1,5 @@ name: manual_tests -version: "1.0.0" +version: "1.1.0" summary: "Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly." description: | A workflow for running manual tests that validate DeepWork rules/hooks fire correctly. @@ -28,6 +28,8 @@ description: | - Created mode (new files only) changelog: + - version: "1.1.0" + changes: "Added rules queue clearing between tests to prevent anti-infinite-loop mechanism from blocking tests" - version: "1.0.0" changes: "Initial job creation - tests run in sub-agents to observe automatic hook firing" @@ -45,7 +47,7 @@ steps: - "**Parallel Execution**: Were multiple sub-agents launched in parallel (in a single message with multiple Task tool calls)?" - "**Hooks Observed**: Did the main agent observe that no blocking hooks fired when the sub-agents returned? The hooks fire AUTOMATICALLY - the agent must NOT manually run the rules_check command." - "**All Tests Run**: Were all 8 'should NOT fire' tests executed (trigger/safety, set, pair forward, pair reverse, multi safety, infinite block prompt, infinite block command, created)?" - - "**Git Reverted**: Were changes reverted after tests completed using `git checkout -- manual_tests/`?" + - "**Git Reverted**: Were changes reverted and queue cleared after tests completed using `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json`?" - id: run_fire_tests name: "Run Should-Fire Tests" @@ -62,6 +64,6 @@ steps: - "**Sub-Agents Used**: Did the main agent spawn a sub-agent (using the Task tool) for EACH test? The main agent must NOT edit the test files directly." - "**Serial Execution**: Were sub-agents launched ONE AT A TIME (not in parallel) to prevent cross-contamination?" - "**Hooks Fired Automatically**: Did the main agent observe the blocking hooks firing automatically when each sub-agent returned? The agent must NOT manually run the rules_check command." - - "**Git Reverted Between Tests**: Was `git checkout -- manual_tests/` run between each test to prevent cross-contamination?" + - "**Git Reverted Between Tests**: Was `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json` run between each test to prevent cross-contamination?" - "**All Tests Run**: Were all 8 'should fire' tests executed (trigger/safety, set, pair, command action, multi safety, infinite block prompt, infinite block command, created)?" - "**Results Recorded**: Did the main agent track pass/fail status for each test case?" diff --git a/.deepwork/jobs/manual_tests/steps/run_fire_tests.md b/.deepwork/jobs/manual_tests/steps/run_fire_tests.md index b2a71998..f3e887fb 100644 --- a/.deepwork/jobs/manual_tests/steps/run_fire_tests.md +++ b/.deepwork/jobs/manual_tests/steps/run_fire_tests.md @@ -37,9 +37,22 @@ For EACH test below, follow this cycle: 1. **Launch a sub-agent** using the Task tool (use a fast model like haiku) 2. **Wait for the sub-agent to complete** 3. **Observe whether the hook fired automatically** - you should see a blocking prompt or command output -4. **Record the result** - pass if hook fired, fail if it didn't -5. **Revert changes**: `git checkout -- manual_tests/` -6. **Proceed to the next test** +4. **If no visible blocking occurred, check the queue**: + ```bash + ls -la .deepwork/tmp/rules/queue/ + cat .deepwork/tmp/rules/queue/*.json 2>/dev/null + ``` + - If queue entries exist with status "queued", the hook DID fire but blocking wasn't visible + - If queue is empty, the hook did NOT fire at all + - Record the queue status along with the result +5. **Record the result** - pass if hook fired (visible block OR queue entry), fail if neither +6. **Revert changes and clear queue**: + ```bash + git checkout -- manual_tests/ + rm -rf .deepwork/tmp/rules/queue/*.json 2>/dev/null || true + ``` + The queue must be cleared because rules that have been shown (status=QUEUED) won't fire again until cleared. +7. **Proceed to the next test** **IMPORTANT**: Only launch ONE sub-agent at a time. Wait for it to complete and revert before launching the next. @@ -81,22 +94,27 @@ For EACH test below, follow this cycle: Record the result after each test: -| Test Case | Should Fire | Hook Fired? | Result | -|-----------|-------------|:-----------:|:------:| -| Trigger/Safety | Edit .py only | | | -| Set Mode | Edit _source.py only | | | -| Pair Mode | Edit _trigger.py only | | | -| Command Action | Edit .txt | | | -| Multi Safety | Edit .py only | | | -| Infinite Block Prompt | Edit .py (no promise) | | | -| Infinite Block Command | Edit .py (no promise) | | | -| Created Mode | Create NEW .yml | | | +| Test Case | Should Fire | Visible Block? | Queue Entry? | Result | +|-----------|-------------|:--------------:|:------------:|:------:| +| Trigger/Safety | Edit .py only | | | | +| Set Mode | Edit _source.py only | | | | +| Pair Mode | Edit _trigger.py only | | | | +| Command Action | Edit .txt | | | | +| Multi Safety | Edit .py only | | | | +| Infinite Block Prompt | Edit .py (no promise) | | | | +| Infinite Block Command | Edit .py (no promise) | | | | +| Created Mode | Create NEW .yml | | | | + +**Queue Entry Status Guide:** +- If queue has entry with status "queued" → Hook fired, rule was shown to agent +- If queue has entry with status "passed" → Hook fired, rule was satisfied +- If queue is empty → Hook did NOT fire ## Quality Criteria - **Sub-agents spawned**: All 8 tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly - **Serial execution**: Sub-agents were launched ONE AT A TIME, not in parallel -- **Git reverted between tests**: `git checkout -- manual_tests/` was run after each test +- **Git reverted and queue cleared between tests**: `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json` was run after each test - **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check - hooks fired AUTOMATICALLY - **Blocking behavior verified**: For each test, the appropriate blocking hook fired automatically when the sub-agent returned - **Results recorded**: Pass/fail status was recorded for each test diff --git a/.deepwork/jobs/manual_tests/steps/run_not_fire_tests.md b/.deepwork/jobs/manual_tests/steps/run_not_fire_tests.md index acca88e6..38cab325 100644 --- a/.deepwork/jobs/manual_tests/steps/run_not_fire_tests.md +++ b/.deepwork/jobs/manual_tests/steps/run_not_fire_tests.md @@ -67,14 +67,15 @@ Run all 8 "should NOT fire" tests in **parallel** sub-agents, then verify no blo | Infinite Block Command | Promise tag | | | Created Mode | Modify existing | | -4. **Revert all changes** +4. **Revert all changes and clear queue** After all tests complete, run: ```bash git checkout -- manual_tests/ + rm -rf .deepwork/tmp/rules/queue/*.json 2>/dev/null || true ``` - This cleans up the test files before the "should fire" tests run. + This cleans up the test files AND clears the rules queue before the "should fire" tests run. The queue must be cleared because rules that have already been shown to the agent (status=QUEUED) won't fire again until the queue is cleared. ## Quality Criteria @@ -82,7 +83,7 @@ Run all 8 "should NOT fire" tests in **parallel** sub-agents, then verify no blo - **Parallel execution**: All 8 sub-agents were launched in a single message (parallel) - **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check - **No unexpected blocks**: All tests passed - no blocking hooks fired -- **Changes reverted**: `git checkout -- manual_tests/` was run after tests completed +- **Changes reverted and queue cleared**: `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json` was run after tests completed - When all criteria are met, include `✓ Quality Criteria Met` in your response ## Reference diff --git a/.gemini/skills/manual_tests/run_fire_tests.toml b/.gemini/skills/manual_tests/run_fire_tests.toml index e60aefc3..dbd71b57 100644 --- a/.gemini/skills/manual_tests/run_fire_tests.toml +++ b/.gemini/skills/manual_tests/run_fire_tests.toml @@ -61,9 +61,22 @@ For EACH test below, follow this cycle: 1. **Launch a sub-agent** using the Task tool (use a fast model like haiku) 2. **Wait for the sub-agent to complete** 3. **Observe whether the hook fired automatically** - you should see a blocking prompt or command output -4. **Record the result** - pass if hook fired, fail if it didn't -5. **Revert changes**: `git checkout -- manual_tests/` -6. **Proceed to the next test** +4. **If no visible blocking occurred, check the queue**: + ```bash + ls -la .deepwork/tmp/rules/queue/ + cat .deepwork/tmp/rules/queue/*.json 2>/dev/null + ``` + - If queue entries exist with status "queued", the hook DID fire but blocking wasn't visible + - If queue is empty, the hook did NOT fire at all + - Record the queue status along with the result +5. **Record the result** - pass if hook fired (visible block OR queue entry), fail if neither +6. **Revert changes and clear queue**: + ```bash + git checkout -- manual_tests/ + rm -rf .deepwork/tmp/rules/queue/*.json 2>/dev/null || true + ``` + The queue must be cleared because rules that have been shown (status=QUEUED) won't fire again until cleared. +7. **Proceed to the next test** **IMPORTANT**: Only launch ONE sub-agent at a time. Wait for it to complete and revert before launching the next. @@ -105,22 +118,27 @@ For EACH test below, follow this cycle: Record the result after each test: -| Test Case | Should Fire | Hook Fired? | Result | -|-----------|-------------|:-----------:|:------:| -| Trigger/Safety | Edit .py only | | | -| Set Mode | Edit _source.py only | | | -| Pair Mode | Edit _trigger.py only | | | -| Command Action | Edit .txt | | | -| Multi Safety | Edit .py only | | | -| Infinite Block Prompt | Edit .py (no promise) | | | -| Infinite Block Command | Edit .py (no promise) | | | -| Created Mode | Create NEW .yml | | | +| Test Case | Should Fire | Visible Block? | Queue Entry? | Result | +|-----------|-------------|:--------------:|:------------:|:------:| +| Trigger/Safety | Edit .py only | | | | +| Set Mode | Edit _source.py only | | | | +| Pair Mode | Edit _trigger.py only | | | | +| Command Action | Edit .txt | | | | +| Multi Safety | Edit .py only | | | | +| Infinite Block Prompt | Edit .py (no promise) | | | | +| Infinite Block Command | Edit .py (no promise) | | | | +| Created Mode | Create NEW .yml | | | | + +**Queue Entry Status Guide:** +- If queue has entry with status "queued" → Hook fired, rule was shown to agent +- If queue has entry with status "passed" → Hook fired, rule was satisfied +- If queue is empty → Hook did NOT fire ## Quality Criteria - **Sub-agents spawned**: All 8 tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly - **Serial execution**: Sub-agents were launched ONE AT A TIME, not in parallel -- **Git reverted between tests**: `git checkout -- manual_tests/` was run after each test +- **Git reverted and queue cleared between tests**: `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json` was run after each test - **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check - hooks fired AUTOMATICALLY - **Blocking behavior verified**: For each test, the appropriate blocking hook fired automatically when the sub-agent returned - **Results recorded**: Pass/fail status was recorded for each test @@ -189,7 +207,7 @@ Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD` 1. **Sub-Agents Used**: Did the main agent spawn a sub-agent (using the Task tool) for EACH test? The main agent must NOT edit the test files directly. 2. **Serial Execution**: Were sub-agents launched ONE AT A TIME (not in parallel) to prevent cross-contamination? 3. **Hooks Fired Automatically**: Did the main agent observe the blocking hooks firing automatically when each sub-agent returned? The agent must NOT manually run the rules_check command. -4. **Git Reverted Between Tests**: Was `git checkout -- manual_tests/` run between each test to prevent cross-contamination? +4. **Git Reverted Between Tests**: Was `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json` run between each test to prevent cross-contamination? 5. **All Tests Run**: Were all 8 'should fire' tests executed (trigger/safety, set, pair, command action, multi safety, infinite block prompt, infinite block command, created)? 6. **Results Recorded**: Did the main agent track pass/fail status for each test case? ## On Completion diff --git a/.gemini/skills/manual_tests/run_not_fire_tests.toml b/.gemini/skills/manual_tests/run_not_fire_tests.toml index b80f0387..d8139215 100644 --- a/.gemini/skills/manual_tests/run_not_fire_tests.toml +++ b/.gemini/skills/manual_tests/run_not_fire_tests.toml @@ -87,14 +87,15 @@ Run all 8 "should NOT fire" tests in **parallel** sub-agents, then verify no blo | Infinite Block Command | Promise tag | | | Created Mode | Modify existing | | -4. **Revert all changes** +4. **Revert all changes and clear queue** After all tests complete, run: ```bash git checkout -- manual_tests/ + rm -rf .deepwork/tmp/rules/queue/*.json 2>/dev/null || true ``` - This cleans up the test files before the "should fire" tests run. + This cleans up the test files AND clears the rules queue before the "should fire" tests run. The queue must be cleared because rules that have already been shown to the agent (status=QUEUED) won't fire again until the queue is cleared. ## Quality Criteria @@ -102,7 +103,7 @@ Run all 8 "should NOT fire" tests in **parallel** sub-agents, then verify no blo - **Parallel execution**: All 8 sub-agents were launched in a single message (parallel) - **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check - **No unexpected blocks**: All tests passed - no blocking hooks fired -- **Changes reverted**: `git checkout -- manual_tests/` was run after tests completed +- **Changes reverted and queue cleared**: `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json` was run after tests completed - When all criteria are met, include `✓ Quality Criteria Met` in your response ## Reference @@ -164,7 +165,7 @@ Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD` 2. **Parallel Execution**: Were multiple sub-agents launched in parallel (in a single message with multiple Task tool calls)? 3. **Hooks Observed**: Did the main agent observe that no blocking hooks fired when the sub-agents returned? The hooks fire AUTOMATICALLY - the agent must NOT manually run the rules_check command. 4. **All Tests Run**: Were all 8 'should NOT fire' tests executed (trigger/safety, set, pair forward, pair reverse, multi safety, infinite block prompt, infinite block command, created)? -5. **Git Reverted**: Were changes reverted after tests completed using `git checkout -- manual_tests/`? +5. **Git Reverted**: Were changes reverted and queue cleared after tests completed using `git checkout -- manual_tests/` and `rm -rf .deepwork/tmp/rules/queue/*.json`? ## On Completion 1. Verify outputs are created diff --git a/src/deepwork/core/generator.py b/src/deepwork/core/generator.py index 59faa92f..55028311 100644 --- a/src/deepwork/core/generator.py +++ b/src/deepwork/core/generator.py @@ -213,6 +213,12 @@ def _build_step_context( if hook_contexts: hooks[platform_event_name] = hook_contexts + # Claude Code has separate Stop and SubagentStop events. When a Stop hook + # is defined, also register it for SubagentStop so it triggers for both + # the main agent and subagents. + if "Stop" in hooks: + hooks["SubagentStop"] = hooks["Stop"] + # Backward compatibility: stop_hooks is after_agent hooks stop_hooks = hooks.get( adapter.get_platform_hook_name(SkillLifecycleHook.AFTER_AGENT) or "Stop", [] diff --git a/src/deepwork/core/hooks_syncer.py b/src/deepwork/core/hooks_syncer.py index 5df2e74f..4a97cbd7 100644 --- a/src/deepwork/core/hooks_syncer.py +++ b/src/deepwork/core/hooks_syncer.py @@ -187,6 +187,17 @@ def merge_hooks_for_platform( if not _hook_already_present(merged[event], command): merged[event].append(hook_config) + # Claude Code has separate Stop and SubagentStop events. When a Stop hook + # is defined, also register it for SubagentStop so it triggers for both + # the main agent and subagents. + if "Stop" in merged: + if "SubagentStop" not in merged: + merged["SubagentStop"] = [] + for hook_config in merged["Stop"]: + command = hook_config.get("hooks", [{}])[0].get("command", "") + if not _hook_already_present(merged["SubagentStop"], command): + merged["SubagentStop"].append(hook_config) + return merged diff --git a/src/deepwork/templates/claude/skill-job-step.md.jinja b/src/deepwork/templates/claude/skill-job-step.md.jinja index 8464a116..c76aa1ac 100644 --- a/src/deepwork/templates/claude/skill-job-step.md.jinja +++ b/src/deepwork/templates/claude/skill-job-step.md.jinja @@ -46,7 +46,8 @@ user-invocable: false {% if quality_criteria or hooks %} hooks: {% if quality_criteria %} - Stop: +{% for event_name in ["Stop", "SubagentStop"] %} + {{ event_name }}: - hooks: - type: prompt prompt: | @@ -68,9 +69,27 @@ hooks: If criteria are NOT met OR the promise tag is missing, respond with: {"ok": false, "reason": "**AGENT: TAKE ACTION** - [which criteria failed and why]"} +{% endfor %} {% endif %} {% for event_name, event_hooks in hooks.items() %} -{% if not (event_name == "Stop" and quality_criteria) %} +{% if not (event_name == "Stop" and quality_criteria) and not (event_name == "SubagentStop" and "Stop" in hooks) %} +{# For Stop events, generate both Stop and SubagentStop blocks #} +{% if event_name == "Stop" %} +{% for stop_event in ["Stop", "SubagentStop"] %} + {{ stop_event }}: + - hooks: +{% for hook in event_hooks %} +{% if hook.type == "script" %} + - type: command + command: ".deepwork/jobs/{{ job_name }}/{{ hook.path }}" +{% else %} + - type: prompt + prompt: | + {{ hook.content | indent(12) }} +{% endif %} +{% endfor %} +{% endfor %} +{% else %} {{ event_name }}: - hooks: {% for hook in event_hooks %} @@ -84,6 +103,7 @@ hooks: {% endif %} {% endfor %} {% endif %} +{% endif %} {% endfor %} {% endif %} --- diff --git a/tests/shell_script_tests/test_rules_stop_hook.py b/tests/shell_script_tests/test_rules_stop_hook.py index 5eaa73f6..23418021 100644 --- a/tests/shell_script_tests/test_rules_stop_hook.py +++ b/tests/shell_script_tests/test_rules_stop_hook.py @@ -407,3 +407,75 @@ def test_promise_tag_still_prevents_firing( assert result == {}, f"Rule should not fire with promise tag: {result}" finally: os.unlink(transcript_path) + + +class TestSubagentStopEvent: + """Tests for SubagentStop event triggering agentFinished rules.""" + + def test_subagent_stop_event_triggers_rules( + self, src_dir: Path, git_repo_with_src_rule: Path + ) -> None: + """Test that SubagentStop event triggers agentFinished rules. + + Claude Code has both Stop and SubagentStop events that should both + trigger after_agent/agentFinished rules. + """ + # Create a file that triggers the rule + test_src_dir = git_repo_with_src_rule / "src" + test_src_dir.mkdir(exist_ok=True) + (test_src_dir / "main.py").write_text("# New file\n") + + # Stage the change + repo = Repo(git_repo_with_src_rule) + repo.index.add(["src/main.py"]) + + # Run with SubagentStop event + hook_input = {"hook_event_name": "SubagentStop"} + stdout, stderr, code = run_stop_hook(git_repo_with_src_rule, hook_input, src_dir=src_dir) + + # Parse the output + output = stdout.strip() + assert output, f"Expected JSON output. stderr: {stderr}" + result = json.loads(output) + + # Should trigger the rule just like Stop event does + assert result.get("decision") == "block", f"SubagentStop should trigger rules: {result}" + assert "Test Rule" in result.get("reason", "") + + def test_both_stop_and_subagent_stop_trigger_same_rules( + self, src_dir: Path, git_repo_with_src_rule: Path + ) -> None: + """Test that Stop and SubagentStop events trigger the same rules. + + Both events should fire agentFinished rules with identical behavior. + """ + # Create a file that triggers the rule + test_src_dir = git_repo_with_src_rule / "src" + test_src_dir.mkdir(exist_ok=True) + (test_src_dir / "main.py").write_text("# New file\n") + + repo = Repo(git_repo_with_src_rule) + repo.index.add(["src/main.py"]) + + # Test Stop event + hook_input_stop = {"hook_event_name": "Stop"} + stdout_stop, _, _ = run_stop_hook(git_repo_with_src_rule, hook_input_stop, src_dir=src_dir) + result_stop = json.loads(stdout_stop.strip()) + + # Clear the queue to allow the rule to fire again + queue_dir = git_repo_with_src_rule / ".deepwork" / "tmp" / "rules" / "queue" + if queue_dir.exists(): + for f in queue_dir.glob("*.json"): + f.unlink() + + # Test SubagentStop event + hook_input_subagent = {"hook_event_name": "SubagentStop"} + stdout_subagent, _, _ = run_stop_hook( + git_repo_with_src_rule, hook_input_subagent, src_dir=src_dir + ) + result_subagent = json.loads(stdout_subagent.strip()) + + # Both should produce the same blocking behavior + assert result_stop.get("decision") == result_subagent.get("decision") == "block" + assert "Test Rule" in result_stop.get("reason", "") + assert "Test Rule" in result_subagent.get("reason", "") diff --git a/tests/unit/test_hooks_syncer.py b/tests/unit/test_hooks_syncer.py index abaca222..79527681 100644 --- a/tests/unit/test_hooks_syncer.py +++ b/tests/unit/test_hooks_syncer.py @@ -224,6 +224,57 @@ def test_avoids_duplicate_hooks(self, temp_dir: Path) -> None: # Should only have one entry assert len(result["Stop"]) == 1 + def test_duplicates_stop_hooks_to_subagent_stop(self, temp_dir: Path) -> None: + """Test that Stop hooks are also registered for SubagentStop event. + + Claude Code has separate Stop and SubagentStop events. When a Stop hook + is defined, it should also be registered for SubagentStop so the hook + triggers for both the main agent and subagents. + """ + job_dir = temp_dir / ".deepwork" / "jobs" / "job1" + job_dir.mkdir(parents=True) + + job_hooks_list = [ + JobHooks( + job_name="job1", + job_dir=job_dir, + hooks={"Stop": [HookSpec(script="hook.sh")]}, + ), + ] + + result = merge_hooks_for_platform(job_hooks_list, temp_dir) + + # Should have both Stop and SubagentStop events + assert "Stop" in result + assert "SubagentStop" in result + assert len(result["Stop"]) == 1 + assert len(result["SubagentStop"]) == 1 + + # Both should have the same hook command + stop_cmd = result["Stop"][0]["hooks"][0]["command"] + subagent_stop_cmd = result["SubagentStop"][0]["hooks"][0]["command"] + assert stop_cmd == subagent_stop_cmd == ".deepwork/jobs/job1/hooks/hook.sh" + + def test_does_not_duplicate_subagent_stop_if_no_stop(self, temp_dir: Path) -> None: + """Test that SubagentStop is not created if there are no Stop hooks.""" + job_dir = temp_dir / ".deepwork" / "jobs" / "job1" + job_dir.mkdir(parents=True) + + job_hooks_list = [ + JobHooks( + job_name="job1", + job_dir=job_dir, + hooks={"UserPromptSubmit": [HookSpec(script="capture.sh")]}, + ), + ] + + result = merge_hooks_for_platform(job_hooks_list, temp_dir) + + # Should only have UserPromptSubmit, not SubagentStop + assert "UserPromptSubmit" in result + assert "SubagentStop" not in result + assert "Stop" not in result + class TestSyncHooksToPlatform: """Tests for sync_hooks_to_platform function using adapters.""" @@ -249,7 +300,8 @@ def test_syncs_hooks_via_adapter(self, temp_dir: Path) -> None: count = sync_hooks_to_platform(temp_dir, adapter, job_hooks_list) - assert count == 1 + # Count is 2 because Stop hooks are also registered for SubagentStop + assert count == 2 # Verify settings.json was created settings_file = temp_dir / ".claude" / "settings.json" @@ -260,6 +312,7 @@ def test_syncs_hooks_via_adapter(self, temp_dir: Path) -> None: assert "hooks" in settings assert "Stop" in settings["hooks"] + assert "SubagentStop" in settings["hooks"] def test_returns_zero_for_empty_hooks(self, temp_dir: Path) -> None: """Test returns 0 when no hooks to sync.""" diff --git a/tests/unit/test_stop_hooks.py b/tests/unit/test_stop_hooks.py index c1516514..f2837d90 100644 --- a/tests/unit/test_stop_hooks.py +++ b/tests/unit/test_stop_hooks.py @@ -618,3 +618,183 @@ def test_build_context_multiple_hooks(self, generator: SkillGenerator, tmp_path: assert context["stop_hooks"][0]["type"] == "prompt" assert context["stop_hooks"][1]["type"] == "script" assert context["stop_hooks"][2]["type"] == "prompt" + + def test_build_context_duplicates_stop_to_subagent_stop( + self, generator: SkillGenerator, job_with_hooks: JobDefinition + ) -> None: + """Test that Stop hooks are also registered for SubagentStop event. + + Claude Code has separate Stop and SubagentStop events. When a Stop hook + is defined, it should also be registered for SubagentStop so the hook + triggers for both the main agent and subagents. + """ + adapter = ClaudeAdapter() + context = generator._build_step_context(job_with_hooks, job_with_hooks.steps[0], 0, adapter) + + # Should have both Stop and SubagentStop in hooks dict + assert "hooks" in context + assert "Stop" in context["hooks"] + assert "SubagentStop" in context["hooks"] + + # Both should have the same hooks + assert context["hooks"]["Stop"] == context["hooks"]["SubagentStop"] + assert len(context["hooks"]["Stop"]) == 1 + assert context["hooks"]["Stop"][0]["type"] == "prompt" + + def test_build_context_no_subagent_stop_without_stop( + self, generator: SkillGenerator, tmp_path: Path + ) -> None: + """Test that SubagentStop is not created if there are no Stop hooks.""" + job_dir = tmp_path / "test_job" + job_dir.mkdir() + steps_dir = job_dir / "steps" + steps_dir.mkdir() + (steps_dir / "step1.md").write_text("# Step 1") + + job = JobDefinition( + name="test_job", + version="1.0.0", + summary="Test", + description="Test", + steps=[ + Step( + id="step1", + name="Step 1", + description="Step", + instructions_file="steps/step1.md", + outputs=[OutputSpec(file="out.md")], + ) + ], + job_dir=job_dir, + ) + + adapter = ClaudeAdapter() + context = generator._build_step_context(job, job.steps[0], 0, adapter) + + # Should not have Stop or SubagentStop without any hooks + assert "hooks" in context + assert "Stop" not in context["hooks"] + assert "SubagentStop" not in context["hooks"] + + +class TestGeneratorTemplateOutput: + """Tests for generated skill file output.""" + + @pytest.fixture + def full_generator(self) -> SkillGenerator: + """Create generator using actual package templates.""" + # Use the actual templates directory from the package + templates_dir = Path(__file__).parent.parent.parent / "src" / "deepwork" / "templates" + return SkillGenerator(templates_dir) + + @pytest.fixture + def job_with_quality_criteria(self, tmp_path: Path) -> JobDefinition: + """Create job with quality_criteria for testing template output.""" + job_dir = tmp_path / "test_job" + job_dir.mkdir() + steps_dir = job_dir / "steps" + steps_dir.mkdir() + (steps_dir / "step1.md").write_text("# Step 1 Instructions\n\nDo the thing.") + + return JobDefinition( + name="test_job", + version="1.0.0", + summary="Test job", + description="A test job", + steps=[ + Step( + id="step1", + name="Step 1", + description="First step", + instructions_file="steps/step1.md", + outputs=[OutputSpec(file="output.md")], + quality_criteria=["Criterion 1 is met", "Criterion 2 is verified"], + ), + ], + job_dir=job_dir, + ) + + @pytest.fixture + def job_with_stop_hooks(self, tmp_path: Path) -> JobDefinition: + """Create job with custom stop hooks for testing template output.""" + job_dir = tmp_path / "test_job" + job_dir.mkdir() + steps_dir = job_dir / "steps" + steps_dir.mkdir() + (steps_dir / "step1.md").write_text("# Step 1 Instructions") + + return JobDefinition( + name="test_job", + version="1.0.0", + summary="Test job", + description="A test job", + steps=[ + Step( + id="step1", + name="Step 1", + description="First step", + instructions_file="steps/step1.md", + outputs=[OutputSpec(file="output.md")], + hooks={ + "after_agent": [HookAction(prompt="Custom validation prompt")], + }, + ), + ], + job_dir=job_dir, + ) + + def test_template_generates_both_stop_and_subagent_stop_for_quality_criteria( + self, + full_generator: SkillGenerator, + job_with_quality_criteria: JobDefinition, + tmp_path: Path, + ) -> None: + """Test that template generates both Stop and SubagentStop hooks for quality_criteria.""" + adapter = ClaudeAdapter() + skill_path = full_generator.generate_step_skill( + job_with_quality_criteria, + job_with_quality_criteria.steps[0], + adapter, + tmp_path, + ) + + content = skill_path.read_text() + + # Both Stop and SubagentStop should be in the generated file + assert "Stop:" in content, "Stop hook should be in generated skill" + assert "SubagentStop:" in content, "SubagentStop hook should be in generated skill" + + # Both should contain the quality criteria prompt + lines = content.split("\n") + stop_found = False + subagent_stop_found = False + for _i, line in enumerate(lines): + if line.strip().startswith("Stop:"): + stop_found = True + if line.strip().startswith("SubagentStop:"): + subagent_stop_found = True + + assert stop_found and subagent_stop_found, ( + f"Both Stop and SubagentStop should be generated. Content:\n{content[:1000]}" + ) + + def test_template_generates_both_stop_and_subagent_stop_for_custom_hooks( + self, full_generator: SkillGenerator, job_with_stop_hooks: JobDefinition, tmp_path: Path + ) -> None: + """Test that template generates both Stop and SubagentStop for custom stop hooks.""" + adapter = ClaudeAdapter() + skill_path = full_generator.generate_step_skill( + job_with_stop_hooks, + job_with_stop_hooks.steps[0], + adapter, + tmp_path, + ) + + content = skill_path.read_text() + + # Both Stop and SubagentStop should be in the generated file + assert "Stop:" in content, "Stop hook should be in generated skill" + assert "SubagentStop:" in content, "SubagentStop hook should be in generated skill" + + # Both should contain the custom prompt + assert "Custom validation prompt" in content, "Custom prompt should be in generated skill"