diff --git a/.deepwork/rules/manual-test-infinite-block-command.md b/.deepwork/rules/manual-test-infinite-block-command.md new file mode 100644 index 00000000..8f8b24b4 --- /dev/null +++ b/.deepwork/rules/manual-test-infinite-block-command.md @@ -0,0 +1,41 @@ +--- +name: "Manual Test: Infinite Block Command" +trigger: manual_tests/test_infinite_block_command/test_infinite_block_command.py +action: + command: "false" + run_for: each_match +compare_to: prompt +--- + +# Manual Test: Infinite Block Command (Promise Required) + +This rule runs a command that ALWAYS FAILS (`false` returns exit code 1). + +## Why this blocks + +The command action always fails, creating an infinite block. The only way +to proceed should be to provide a promise acknowledging that you understand +the restriction. + +## Expected behavior + +If promises work correctly for command actions: +- Without promise: Command runs, fails, blocks +- With promise: Command is SKIPPED entirely, allows + +If there's a bug: +- The command will run and fail even when a promise is provided + +## What to do + +You MUST include the following promise tag in your response: + +``` +Manual Test: Infinite Block Command +``` + +## This tests + +Whether the promise mechanism works for COMMAND-type rules. If a rule is +promised, the command should not run at all - the rule should be skipped +during evaluation. diff --git a/.deepwork/rules/manual-test-infinite-block-prompt.md b/.deepwork/rules/manual-test-infinite-block-prompt.md new file mode 100644 index 00000000..67c97414 --- /dev/null +++ b/.deepwork/rules/manual-test-infinite-block-prompt.md @@ -0,0 +1,34 @@ +--- +name: "Manual Test: Infinite Block Prompt" +trigger: manual_tests/test_infinite_block_prompt/test_infinite_block_prompt.py +compare_to: prompt +--- + +# Manual Test: Infinite Block Prompt (Promise Required) + +You edited `{trigger_files}` which triggers an infinite block. + +## Why this blocks + +This rule has NO safety file option and uses a PROMPT action. The only way +to proceed is to provide a promise acknowledging that you understand the +restriction. + +## What to do + +You MUST include the following promise tag in your response: + +``` +Manual Test: Infinite Block Prompt +``` + +This simulates scenarios where: +- An operation requires explicit acknowledgment before proceeding +- There is no alternative action that can suppress the rule +- The agent must demonstrate understanding of the constraint + +## This tests + +The promise mechanism for PROMPT-type rules that cannot be satisfied by +editing additional files. This is useful for enforcing policies where +acknowledgment is the only valid response. diff --git a/CHANGELOG.md b/CHANGELOG.md index 41243448..26a9a3ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ All notable changes to DeepWork will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.1] - 2026-01-18 + +### Fixed +- Command rule errors now include promise skip instructions with the exact rule name + - Previously, failed command rules only showed "Command failed" with no guidance + - Now each failed rule shows: `To skip, include Rule Name in your response` + - This allows agents to understand how to proceed when a command rule fails + ## [0.4.0] - 2026-01-16 ### Added @@ -84,6 +92,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Initial version. +[0.4.1]: https://github.com/anthropics/deepwork/releases/tag/0.4.1 [0.4.0]: https://github.com/anthropics/deepwork/releases/tag/0.4.0 [0.3.0]: https://github.com/anthropics/deepwork/releases/tag/0.3.0 [0.1.1]: https://github.com/anthropics/deepwork/releases/tag/0.1.1 diff --git a/README.md b/README.md index 96816677..c4c0176f 100644 --- a/README.md +++ b/README.md @@ -304,3 +304,4 @@ For commercial use or questions about licensing, please contact legal@unsupervis ## Credits - Inspired by [GitHub's spec-kit](https://github.com/github/spec-kit) + diff --git a/doc/architecture.md b/doc/architecture.md index 29400973..a79377fd 100644 --- a/doc/architecture.md +++ b/doc/architecture.md @@ -1269,3 +1269,4 @@ Claude: Created rule "API documentation update" in .deepwork/rules/api-documenta - [Git Workflows](https://www.atlassian.com/git/tutorials/comparing-workflows) - [JSON Schema](https://json-schema.org/) - [Jinja2 Documentation](https://jinja.palletsprojects.com/) + diff --git a/manual_tests/README.md b/manual_tests/README.md index 7baaddb4..11906456 100644 --- a/manual_tests/README.md +++ b/manual_tests/README.md @@ -34,6 +34,8 @@ Each test has two cases: one where the rule SHOULD fire, and one where it should | **Pair Mode (reverse)** | — | Edit `_expected.md` only (should NOT fire) | Manual Test: Pair Mode | | **Command Action** | Edit `.txt` → log appended | — (always runs) | Manual Test: Command Action | | **Multi Safety** | Edit `.py` only | Edit `.py` AND any safety file | Manual Test: Multi Safety | +| **Infinite Block Prompt** | Edit `.py` (always blocks) | Provide `` tag | Manual Test: Infinite Block Prompt | +| **Infinite Block Command** | Edit `.py` (command fails) | Provide `` tag | Manual Test: Infinite Block Command | ## Test Results Tracking @@ -45,6 +47,8 @@ Each test has two cases: one where the rule SHOULD fire, and one where it should | Pair Mode (reverse - expected only) | — | ☐ | | Command Action | ☐ | — | | Multi Safety | ☐ | ☐ | +| Infinite Block Prompt | ☐ | ☐ | +| Infinite Block Command | ☐ | ☐ | ## Test Folders @@ -55,6 +59,8 @@ Each test has two cases: one where the rule SHOULD fire, and one where it should | `test_pair_mode/` | Pair (Directional) | One-way: trigger requires expected, but not vice versa | | `test_command_action/` | Command Action | Automatically runs command on file change | | `test_multi_safety/` | Multiple Safety | Fires unless ANY of the safety files also edited | +| `test_infinite_block_prompt/` | Infinite Block (Prompt) | Always blocks with prompt; only promise can bypass | +| `test_infinite_block_command/` | Infinite Block (Command) | Command always fails; tests if promise skips command | ## Corresponding Rules @@ -64,3 +70,5 @@ Rules are defined in `.deepwork/rules/`: - `manual-test-pair-mode.md` - `manual-test-command-action.md` - `manual-test-multi-safety.md` +- `manual-test-infinite-block-prompt.md` +- `manual-test-infinite-block-command.md` diff --git a/manual_tests/test_infinite_block_command/test_infinite_block_command.py b/manual_tests/test_infinite_block_command/test_infinite_block_command.py new file mode 100644 index 00000000..22be16c7 --- /dev/null +++ b/manual_tests/test_infinite_block_command/test_infinite_block_command.py @@ -0,0 +1,42 @@ +""" +MANUAL TEST: Infinite Block Command Rule + +=== WHAT THIS TESTS === +Tests a COMMAND-type rule with a command that ALWAYS FAILS - it will ALWAYS +block when the trigger file is edited. + +This verifies: +1. The rule correctly blocks when the file is edited (command fails) +2. The error output includes guidance on how to skip using a promise +3. Without guidance in the output, the agent cannot know how to proceed + +=== TEST CASE 1: Rule SHOULD fire (command fails, infinite block) === +1. Edit this file (add a comment below the marker) +2. Run: echo '{}' | python -m deepwork.hooks.rules_check +3. Expected: Block with command error AND promise skip instructions + +=== TEST CASE 2: Rule should NOT fire (promise provided) === +1. Edit this file (add a comment below the marker) +2. Provide a promise (format shown in command error output) +3. Expected: Empty JSON {} (allow) - promise bypasses the command entirely + +=== RULE LOCATION === +.deepwork/rules/manual-test-infinite-block-command.md + +=== KEY DIFFERENCE FROM PROMPT VERSION === +- Prompt version: Shows instructions in the rule's markdown body +- Command version: Must show instructions alongside command error output + +If the command error output does NOT include promise skip instructions, +this is a bug - the agent has no way to know how to proceed. +""" + + +def restricted_command_operation(): + """An operation that requires explicit acknowledgment to proceed.""" + return "This operation uses a command that always fails" + + +# Edit below this line to trigger the rule +# ------------------------------------------- +# Test edit for command block diff --git a/manual_tests/test_infinite_block_prompt/test_infinite_block_prompt.py b/manual_tests/test_infinite_block_prompt/test_infinite_block_prompt.py new file mode 100644 index 00000000..5c2ee508 --- /dev/null +++ b/manual_tests/test_infinite_block_prompt/test_infinite_block_prompt.py @@ -0,0 +1,57 @@ +""" +MANUAL TEST: Infinite Block Prompt Rule (Promise Required) + +=== WHAT THIS TESTS === +Tests a PROMPT-type rule with NO safety file option - it will ALWAYS block +when the trigger file is edited. The only way to proceed is to provide a +promise in the correct format. + +This verifies: +1. The rule correctly blocks when the file is edited +2. The promise mechanism works to bypass the block +3. The promise must be in the exact format: Rule Name + +=== TEST CASE 1: Rule SHOULD fire (infinite block) === +1. Edit this file (add a comment below the marker) +2. Run: echo '{}' | python -m deepwork.hooks.rules_check +3. Expected: "Manual Test: Infinite Block Prompt" appears in output with decision="block" +4. The block message should explain that a promise is required + +=== TEST CASE 2: Rule should NOT fire (promise provided) === +1. Edit this file (add a comment below the marker) +2. Create a transcript with: Manual Test: Infinite Block Prompt +3. Run the hook with the transcript +4. Expected: Empty JSON {} (allow) - promise bypasses the block + +=== HOW TO TEST WITH PROMISE === +The promise must be in the conversation transcript. To test: + +1. Create a temp transcript file with the promise: + echo '{"role":"assistant","message":{"content":[{"type":"text","text":"Manual Test: Infinite Block Prompt"}]}}' > /tmp/transcript.jsonl + +2. Run with transcript: + echo '{"transcript_path":"/tmp/transcript.jsonl"}' | python -m deepwork.hooks.rules_check + +3. Expected: {} (empty JSON = allow) + +=== RULE LOCATION === +.deepwork/rules/manual-test-infinite-block-prompt.md + +=== KEY DIFFERENCE FROM OTHER TESTS === +Other tests have a "safety" file that can be edited to suppress the rule. +This test has NO safety option - the ONLY way to proceed is with a promise. +This simulates scenarios where the agent must explicitly acknowledge a +constraint before proceeding. + +=== COMPARISON WITH COMMAND VERSION === +See test_infinite_block_command/ for the command-action version of this test. +""" + + +def restricted_operation(): + """An operation that requires explicit acknowledgment to proceed.""" + return "This operation always requires a promise to proceed" + + +# Edit below this line to trigger the rule +# ------------------------------------------- diff --git a/pyproject.toml b/pyproject.toml index d84e3edb..5aefbca2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "deepwork" -version = "0.4.0" +version = "0.4.1" description = "Framework for enabling AI agents to perform complex, multi-step work tasks" readme = "README.md" requires-python = ">=3.11" diff --git a/src/deepwork/hooks/rules_check.py b/src/deepwork/hooks/rules_check.py index 1d43d12e..2e6694c7 100644 --- a/src/deepwork/hooks/rules_check.py +++ b/src/deepwork/hooks/rules_check.py @@ -244,10 +244,10 @@ def extract_promise_tags(text: str) -> set[str]: Supports both: - Rule Name - - Rule Name + - ✓ Rule Name """ - # Match with or without checkmark - pattern = r"(?:\s*)?([^<]+)" + # Match with optional checkmark prefix (✓ or ✓ with space) + pattern = r"(?:\s*)?(?:✓\s*)?([^<]+)" matches = re.findall(pattern, text, re.IGNORECASE | re.DOTALL) return {m.strip() for m in matches} @@ -460,7 +460,8 @@ def rules_check_hook(hook_input: HookInput) -> HookOutput: else: # Command failed error_msg = format_command_errors(cmd_results) - command_errors.append(f"## {rule.name}\n{error_msg}") + skip_hint = f"To skip, include `✓ {rule.name}` in your response.\n" + command_errors.append(f"## {rule.name}\n{error_msg}{skip_hint}") queue.update_status( trigger_hash, QueueEntryStatus.FAILED, @@ -481,6 +482,7 @@ def rules_check_hook(hook_input: HookInput) -> HookOutput: # Add command errors if any if command_errors: messages.append("## Command Rule Errors\n") + messages.append("The following command rules failed.\n") messages.extend(command_errors) messages.append("") diff --git a/tests/unit/test_rules_check.py b/tests/unit/test_rules_check.py new file mode 100644 index 00000000..e672fd94 --- /dev/null +++ b/tests/unit/test_rules_check.py @@ -0,0 +1,105 @@ +"""Tests for rules_check hook module.""" + +from deepwork.hooks.rules_check import extract_promise_tags + + +class TestExtractPromiseTags: + """Tests for extract_promise_tags function.""" + + def test_extracts_simple_promise(self) -> None: + """Test extracting a simple promise tag.""" + text = "I've reviewed this. Rule Name" + result = extract_promise_tags(text) + assert result == {"Rule Name"} + + def test_extracts_promise_with_checkmark(self) -> None: + """Test extracting promise tag with checkmark prefix.""" + text = "Done. ✓ Rule Name" + result = extract_promise_tags(text) + assert result == {"Rule Name"} + + def test_extracts_promise_with_checkmark_no_space(self) -> None: + """Test extracting promise tag with checkmark but no space.""" + text = "✓Rule Name" + result = extract_promise_tags(text) + assert result == {"Rule Name"} + + def test_extracts_multiple_promises(self) -> None: + """Test extracting multiple promise tags.""" + text = """ + Rule One + ✓ Rule Two + Rule Three + """ + result = extract_promise_tags(text) + assert result == {"Rule One", "Rule Two", "Rule Three"} + + def test_case_insensitive_tag(self) -> None: + """Test that promise tags are case-insensitive.""" + text = "Rule Name" + result = extract_promise_tags(text) + assert result == {"Rule Name"} + + def test_preserves_rule_name_case(self) -> None: + """Test that rule name case is preserved.""" + text = "Architecture Documentation Accuracy" + result = extract_promise_tags(text) + assert result == {"Architecture Documentation Accuracy"} + + def test_handles_whitespace_in_tag(self) -> None: + """Test handling of whitespace around rule name.""" + text = " Rule Name " + result = extract_promise_tags(text) + assert result == {"Rule Name"} + + def test_handles_newlines_in_tag(self) -> None: + """Test handling of newlines in promise tag.""" + text = "\n Rule Name\n" + result = extract_promise_tags(text) + assert result == {"Rule Name"} + + def test_returns_empty_set_for_no_promises(self) -> None: + """Test that empty set is returned when no promises exist.""" + text = "No promises here." + result = extract_promise_tags(text) + assert result == set() + + def test_handles_empty_string(self) -> None: + """Test handling of empty string.""" + result = extract_promise_tags("") + assert result == set() + + def test_real_world_command_error_promise(self) -> None: + """Test promise format shown in command error output.""" + # This is the exact format shown to agents when a command rule fails + text = "✓ Manual Test: Infinite Block Command" + result = extract_promise_tags(text) + assert result == {"Manual Test: Infinite Block Command"} + + def test_mixed_formats_in_same_text(self) -> None: + """Test extracting both checkmark and non-checkmark promises.""" + text = """ + Rule Without Checkmark + ✓ Rule With Checkmark + """ + result = extract_promise_tags(text) + assert result == {"Rule Without Checkmark", "Rule With Checkmark"} + + def test_promise_with_special_characters_in_name(self) -> None: + """Test promise with special characters in rule name.""" + text = "Source/Test Pairing" + result = extract_promise_tags(text) + assert result == {"Source/Test Pairing"} + + def test_promise_embedded_in_markdown(self) -> None: + """Test promise tag embedded in markdown text.""" + text = """ + I've reviewed the documentation and it's accurate. + + Architecture Documentation Accuracy + README Accuracy + + The changes were purely cosmetic. + """ + result = extract_promise_tags(text) + assert result == {"Architecture Documentation Accuracy", "README Accuracy"}