diff --git a/src/deepwork/core/policy_parser.py b/src/deepwork/core/policy_parser.py index dca1d235..b6ade990 100644 --- a/src/deepwork/core/policy_parser.py +++ b/src/deepwork/core/policy_parser.py @@ -17,6 +17,11 @@ class PolicyParseError(Exception): pass +# Valid compare_to values +COMPARE_TO_VALUES = frozenset({"base", "default_tip", "prompt"}) +DEFAULT_COMPARE_TO = "base" + + @dataclass class Policy: """Represents a single policy definition.""" @@ -25,6 +30,7 @@ class Policy: triggers: list[str] # Normalized to list safety: list[str] = field(default_factory=list) # Normalized to list, empty if not specified instructions: str = "" # Resolved content (either inline or from file) + compare_to: str = DEFAULT_COMPARE_TO # What to compare against: base, default_tip, or prompt @classmethod def from_dict(cls, data: dict[str, Any], base_dir: Path | None = None) -> "Policy": @@ -74,11 +80,15 @@ def from_dict(cls, data: dict[str, Any], base_dir: Path | None = None) -> "Polic f"Policy '{data['name']}' must have either 'instructions' or 'instructions_file'" ) + # Get compare_to (defaults to DEFAULT_COMPARE_TO) + compare_to = data.get("compare_to", DEFAULT_COMPARE_TO) + return cls( name=data["name"], triggers=triggers, safety=safety, instructions=instructions, + compare_to=compare_to, ) diff --git a/src/deepwork/hooks/evaluate_policies.py b/src/deepwork/hooks/evaluate_policies.py index 8713d7a4..07ac3845 100644 --- a/src/deepwork/hooks/evaluate_policies.py +++ b/src/deepwork/hooks/evaluate_policies.py @@ -6,12 +6,16 @@ Usage: python -m deepwork.hooks.evaluate_policies \ - --policy-file .deepwork.policy.yml \ - --changed-files "file1.py\nfile2.py" + --policy-file .deepwork.policy.yml The conversation context is read from stdin and checked for tags that indicate policies have already been addressed. +Changed files are computed based on each policy's compare_to setting: +- base: Compare to merge-base with default branch (default) +- default_tip: Two-dot diff against default branch tip +- prompt: Compare to state captured at prompt submission + Output is JSON suitable for Claude Code Stop hooks: {"decision": "block", "reason": "..."} # Block stop, policies need attention {} # No policies fired, allow stop @@ -20,16 +24,223 @@ import argparse import json import re +import subprocess import sys from pathlib import Path from deepwork.core.policy_parser import ( + Policy, PolicyParseError, - evaluate_policies, + evaluate_policy, parse_policy_file, ) +def get_default_branch() -> str: + """ + Get the default branch name (main or master). + + Returns: + Default branch name, or "main" if cannot be determined. + """ + # Try to get the default branch from remote HEAD + try: + result = subprocess.run( + ["git", "symbolic-ref", "refs/remotes/origin/HEAD"], + capture_output=True, + text=True, + check=True, + ) + # Output is like "refs/remotes/origin/main" + return result.stdout.strip().split("/")[-1] + except subprocess.CalledProcessError: + pass + + # Try common default branch names + for branch in ["main", "master"]: + try: + subprocess.run( + ["git", "rev-parse", "--verify", f"origin/{branch}"], + capture_output=True, + check=True, + ) + return branch + except subprocess.CalledProcessError: + continue + + # Fall back to main + return "main" + + +def get_changed_files_base() -> list[str]: + """ + Get files changed relative to the base of the current branch. + + This finds the merge-base between the current branch and the default branch, + then returns all files changed since that point. + + Returns: + List of changed file paths. + """ + default_branch = get_default_branch() + + try: + # Get the merge-base (where current branch diverged from default) + result = subprocess.run( + ["git", "merge-base", "HEAD", f"origin/{default_branch}"], + capture_output=True, + text=True, + check=True, + ) + merge_base = result.stdout.strip() + + # Stage all changes so they appear in diff + subprocess.run(["git", "add", "-A"], capture_output=True, check=False) + + # Get files changed since merge-base (including staged) + result = subprocess.run( + ["git", "diff", "--name-only", merge_base, "HEAD"], + capture_output=True, + text=True, + check=True, + ) + committed_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set() + + # Also get staged changes not yet committed + result = subprocess.run( + ["git", "diff", "--name-only", "--cached"], + capture_output=True, + text=True, + check=False, + ) + staged_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set() + + # Get untracked files + result = subprocess.run( + ["git", "ls-files", "--others", "--exclude-standard"], + capture_output=True, + text=True, + check=False, + ) + untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set() + + all_files = committed_files | staged_files | untracked_files + return sorted([f for f in all_files if f]) + + except subprocess.CalledProcessError: + return [] + + +def get_changed_files_default_tip() -> list[str]: + """ + Get files changed compared to the tip of the default branch. + + This does a two-dot diff: what's different between HEAD and origin/default. + + Returns: + List of changed file paths. + """ + default_branch = get_default_branch() + + try: + # Stage all changes so they appear in diff + subprocess.run(["git", "add", "-A"], capture_output=True, check=False) + + # Two-dot diff against default branch tip + result = subprocess.run( + ["git", "diff", "--name-only", f"origin/{default_branch}..HEAD"], + capture_output=True, + text=True, + check=True, + ) + committed_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set() + + # Also get staged changes not yet committed + result = subprocess.run( + ["git", "diff", "--name-only", "--cached"], + capture_output=True, + text=True, + check=False, + ) + staged_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set() + + # Get untracked files + result = subprocess.run( + ["git", "ls-files", "--others", "--exclude-standard"], + capture_output=True, + text=True, + check=False, + ) + untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set() + + all_files = committed_files | staged_files | untracked_files + return sorted([f for f in all_files if f]) + + except subprocess.CalledProcessError: + return [] + + +def get_changed_files_prompt() -> list[str]: + """ + Get files changed since the prompt was submitted. + + This compares against the baseline captured by capture_prompt_work_tree.sh. + + Returns: + List of changed file paths. + """ + baseline_path = Path(".deepwork/.last_work_tree") + + try: + # Stage all changes so we can see them with --cached + subprocess.run(["git", "add", "-A"], capture_output=True, check=False) + + # Get all staged files (includes what was just staged) + result = subprocess.run( + ["git", "diff", "--name-only", "--cached"], + capture_output=True, + text=True, + check=False, + ) + current_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set() + current_files = {f for f in current_files if f} + + if baseline_path.exists(): + # Read baseline and find new files + baseline_files = set(baseline_path.read_text().strip().split("\n")) + baseline_files = {f for f in baseline_files if f} + # Return files that are in current but not in baseline + new_files = current_files - baseline_files + return sorted(new_files) + else: + # No baseline, return all current changes + return sorted(current_files) + + except (subprocess.CalledProcessError, OSError): + return [] + + +def get_changed_files_for_mode(mode: str) -> list[str]: + """ + Get changed files for a specific compare_to mode. + + Args: + mode: One of 'base', 'default_tip', or 'prompt' + + Returns: + List of changed file paths. + """ + if mode == "base": + return get_changed_files_base() + elif mode == "default_tip": + return get_changed_files_default_tip() + elif mode == "prompt": + return get_changed_files_prompt() + else: + # Unknown mode, fall back to base + return get_changed_files_base() + + def extract_promise_tags(text: str) -> set[str]: """ Extract policy names from tags in text. @@ -87,23 +298,9 @@ def main() -> None: required=True, help="Path to .deepwork.policy.yml file", ) - parser.add_argument( - "--changed-files", - type=str, - required=True, - help="Newline-separated list of changed files", - ) args = parser.parse_args() - # Parse changed files (newline-separated) - changed_files = [f.strip() for f in args.changed_files.split("\n") if f.strip()] - - if not changed_files: - # No files changed, nothing to evaluate - print("{}") - return - # Check if policy file exists policy_path = Path(args.policy_file) if not policy_path.exists(): @@ -122,7 +319,7 @@ def main() -> None: # Extract promise tags from conversation promised_policies = extract_promise_tags(conversation_context) - # Parse and evaluate policies + # Parse policies try: policies = parse_policy_file(policy_path) except PolicyParseError as e: @@ -136,8 +333,28 @@ def main() -> None: print("{}") return - # Evaluate which policies fire - fired_policies = evaluate_policies(policies, changed_files, promised_policies) + # Group policies by compare_to mode to minimize git calls + policies_by_mode: dict[str, list[Policy]] = {} + for policy in policies: + mode = policy.compare_to + if mode not in policies_by_mode: + policies_by_mode[mode] = [] + policies_by_mode[mode].append(policy) + + # Get changed files for each mode and evaluate policies + fired_policies: list[Policy] = [] + for mode, mode_policies in policies_by_mode.items(): + changed_files = get_changed_files_for_mode(mode) + if not changed_files: + continue + + for policy in mode_policies: + # Skip if already promised + if policy.name in promised_policies: + continue + # Evaluate this policy + if evaluate_policy(policy, changed_files): + fired_policies.append(policy) if not fired_policies: # No policies fired diff --git a/src/deepwork/schemas/policy_schema.py b/src/deepwork/schemas/policy_schema.py index 7c12ac28..5aa6ae89 100644 --- a/src/deepwork/schemas/policy_schema.py +++ b/src/deepwork/schemas/policy_schema.py @@ -58,6 +58,16 @@ "minLength": 1, "description": "Path to a file containing instructions (alternative to inline instructions)", }, + "compare_to": { + "type": "string", + "enum": ["base", "default_tip", "prompt"], + "description": ( + "What to compare against when detecting changed files. " + "'base' (default) compares to the base of the current branch. " + "'default_tip' compares to the tip of the default branch. " + "'prompt' compares to the state at the start of the prompt." + ), + }, }, "oneOf": [ {"required": ["instructions"]}, diff --git a/src/deepwork/standard_jobs/deepwork_policy/hooks/capture_work_tree.sh b/src/deepwork/standard_jobs/deepwork_policy/hooks/capture_prompt_work_tree.sh similarity index 78% rename from src/deepwork/standard_jobs/deepwork_policy/hooks/capture_work_tree.sh rename to src/deepwork/standard_jobs/deepwork_policy/hooks/capture_prompt_work_tree.sh index 04d9a972..a3323e42 100755 --- a/src/deepwork/standard_jobs/deepwork_policy/hooks/capture_work_tree.sh +++ b/src/deepwork/standard_jobs/deepwork_policy/hooks/capture_prompt_work_tree.sh @@ -1,9 +1,10 @@ #!/bin/bash -# capture_work_tree.sh - Captures the current git work tree state +# capture_prompt_work_tree.sh - Captures the git work tree state at prompt submission # # This script creates a snapshot of the current git state by recording # all files that have been modified, added, or deleted. This baseline -# is used later to detect what changed during an agent session. +# is used for policies with compare_to: prompt to detect what changed +# during an agent response (between user prompts). set -e diff --git a/src/deepwork/standard_jobs/deepwork_policy/hooks/get_changed_files.sh b/src/deepwork/standard_jobs/deepwork_policy/hooks/get_changed_files.sh deleted file mode 100755 index 03f70d9e..00000000 --- a/src/deepwork/standard_jobs/deepwork_policy/hooks/get_changed_files.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -# get_changed_files.sh - Gets files that changed since the last work tree capture -# -# This script compares the current git state against the baseline captured -# at the start of the session to determine what files were modified. - -set -e - -# Stage all current changes -git add -A 2>/dev/null || true - -# Get current state -current_files=$(git diff --name-only HEAD 2>/dev/null || echo "") -untracked=$(git ls-files --others --exclude-standard 2>/dev/null || echo "") - -# Combine and deduplicate current files -all_current=$(echo -e "${current_files}\n${untracked}" | sort -u | grep -v '^$' || true) - -if [ -f .deepwork/.last_work_tree ]; then - # Compare with baseline - files that are new or different - # Get files in current that weren't in baseline - last_files=$(cat .deepwork/.last_work_tree 2>/dev/null || echo "") - - # Output files that are in current state - # This includes both newly changed files and files that were already changed - echo "${all_current}" -else - # No baseline exists - return all currently changed files - echo "${all_current}" -fi diff --git a/src/deepwork/standard_jobs/deepwork_policy/hooks/policy_stop_hook.sh b/src/deepwork/standard_jobs/deepwork_policy/hooks/policy_stop_hook.sh index 6d598a3e..b12d456c 100755 --- a/src/deepwork/standard_jobs/deepwork_policy/hooks/policy_stop_hook.sh +++ b/src/deepwork/standard_jobs/deepwork_policy/hooks/policy_stop_hook.sh @@ -2,16 +2,13 @@ # policy_stop_hook.sh - Evaluates policies when the agent stops # # This script is called as a Claude Code Stop hook. It: -# 1. Gets the list of files changed during the session -# 2. Evaluates policies from .deepwork.policy.yml +# 1. Evaluates policies from .deepwork.policy.yml +# 2. Computes changed files based on each policy's compare_to setting # 3. Checks for tags in the conversation transcript # 4. Returns JSON to block stop if policies need attention -# 5. Resets the work tree baseline for the next iteration set -e -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - # Check if policy file exists if [ ! -f .deepwork.policy.yml ]; then # No policies defined, nothing to do @@ -31,16 +28,6 @@ if [ -n "${HOOK_INPUT}" ]; then TRANSCRIPT_PATH=$(echo "${HOOK_INPUT}" | jq -r '.transcript_path // empty' 2>/dev/null || echo "") fi -# Get changed files -changed_files=$("${SCRIPT_DIR}/get_changed_files.sh" 2>/dev/null || echo "") - -# If no files changed, nothing to evaluate -if [ -z "${changed_files}" ]; then - # Reset baseline for next iteration - "${SCRIPT_DIR}/capture_work_tree.sh" 2>/dev/null || true - exit 0 -fi - # Extract conversation text from the JSONL transcript # The transcript is JSONL format - each line is a JSON object # We need to extract the text content from assistant messages @@ -57,16 +44,13 @@ fi # Call the Python evaluator # The Python module handles: # - Parsing the policy file +# - Computing changed files based on each policy's compare_to setting # - Matching changed files against triggers/safety patterns # - Checking for promise tags in the conversation context # - Generating appropriate JSON output result=$(echo "${conversation_context}" | python -m deepwork.hooks.evaluate_policies \ --policy-file .deepwork.policy.yml \ - --changed-files "${changed_files}" \ 2>/dev/null || echo '{}') -# Reset the work tree baseline for the next iteration -"${SCRIPT_DIR}/capture_work_tree.sh" 2>/dev/null || true - # Output the result (JSON for Claude Code hooks) echo "${result}" diff --git a/src/deepwork/standard_jobs/deepwork_policy/hooks/user_prompt_submit.sh b/src/deepwork/standard_jobs/deepwork_policy/hooks/user_prompt_submit.sh index 970be76c..486ad836 100755 --- a/src/deepwork/standard_jobs/deepwork_policy/hooks/user_prompt_submit.sh +++ b/src/deepwork/standard_jobs/deepwork_policy/hooks/user_prompt_submit.sh @@ -1,17 +1,16 @@ #!/bin/bash # user_prompt_submit.sh - Runs on every user prompt submission # -# This script captures the work tree baseline if it doesn't exist yet. -# This ensures we have a baseline to compare against when evaluating policies. +# This script captures the work tree state at each prompt submission. +# This baseline is used for policies with compare_to: prompt to detect +# what changed during an agent response. set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -# Only capture if no baseline exists yet (first prompt of session) -if [ ! -f .deepwork/.last_work_tree ]; then - "${SCRIPT_DIR}/capture_work_tree.sh" -fi +# Capture work tree state at each prompt for compare_to: prompt policies +"${SCRIPT_DIR}/capture_prompt_work_tree.sh" # Exit successfully - don't block the prompt exit 0 diff --git a/src/deepwork/standard_jobs/deepwork_policy/steps/define.md b/src/deepwork/standard_jobs/deepwork_policy/steps/define.md index 0a47e55b..85c2b631 100644 --- a/src/deepwork/standard_jobs/deepwork_policy/steps/define.md +++ b/src/deepwork/standard_jobs/deepwork_policy/steps/define.md @@ -56,6 +56,22 @@ If there are files that, when also changed, mean the policy shouldn't fire: - Trigger: `src/auth/**/*` - Safety: `SECURITY.md`, `docs/security_review.md` +### Step 3b: Choose the Comparison Mode (Optional) + +The `compare_to` field controls what baseline is used when detecting "changed files": + +**Options:** +- `base` (default) - Compares to the base of the current branch (merge-base with main/master). This is the most common choice for feature branches, as it shows all changes made on the branch. +- `default_tip` - Compares to the current tip of the default branch (main/master). Useful when you want to see the difference from what's currently in production. +- `prompt` - Compares to the state at the start of each prompt. Useful for policies that should only fire based on changes made during a single agent response. + +**When to use each:** +- **base**: Best for most policies. "Did this branch change config files?" → trigger docs review +- **default_tip**: For policies about what's different from production/main +- **prompt**: For policies that should only consider very recent changes within the current session + +Most policies should use the default (`base`) and don't need to specify `compare_to`. + ### Step 4: Write the Instructions Create clear, actionable instructions for what the agent should do when the policy fires. @@ -86,6 +102,7 @@ Create or update `.deepwork.policy.yml` in the project root. - name: "[Friendly name for the policy]" trigger: "[glob pattern]" # or array: ["pattern1", "pattern2"] safety: "[glob pattern]" # optional, or array + compare_to: "base" # optional: "base" (default), "default_tip", or "prompt" instructions: | [Multi-line instructions for the agent...] ``` @@ -95,6 +112,7 @@ Create or update `.deepwork.policy.yml` in the project root. - name: "[Friendly name for the policy]" trigger: "[glob pattern]" safety: "[glob pattern]" + compare_to: "base" # optional instructions_file: "path/to/instructions.md" ``` @@ -166,7 +184,10 @@ Create or update this file at the project root with the new policy entry. ## Context Policies are evaluated automatically when you finish working on a task. The system: -1. Tracks which files you changed during the session +1. Determines which files have changed based on each policy's `compare_to` setting: + - `base` (default): Files changed since the branch diverged from main/master + - `default_tip`: Files different from the current main/master branch + - `prompt`: Files changed since the last prompt submission 2. Checks if any changes match policy trigger patterns 3. Skips policies where safety patterns also matched 4. Prompts you with instructions for any triggered policies diff --git a/tests/unit/test_policy_parser.py b/tests/unit/test_policy_parser.py index 031c506e..80eedbb1 100644 --- a/tests/unit/test_policy_parser.py +++ b/tests/unit/test_policy_parser.py @@ -5,6 +5,7 @@ import pytest from deepwork.core.policy_parser import ( + DEFAULT_COMPARE_TO, Policy, PolicyParseError, evaluate_policies, @@ -114,6 +115,54 @@ def test_from_dict_instructions_file_without_base_dir(self) -> None: with pytest.raises(PolicyParseError, match="no base_dir provided"): Policy.from_dict(data, base_dir=None) + def test_from_dict_compare_to_defaults_to_base(self) -> None: + """Test that compare_to defaults to 'base'.""" + data = { + "name": "Test", + "trigger": "src/*", + "instructions": "Check it", + } + policy = Policy.from_dict(data) + + assert policy.compare_to == DEFAULT_COMPARE_TO + assert policy.compare_to == "base" + + def test_from_dict_compare_to_explicit_base(self) -> None: + """Test explicit compare_to: base.""" + data = { + "name": "Test", + "trigger": "src/*", + "instructions": "Check it", + "compare_to": "base", + } + policy = Policy.from_dict(data) + + assert policy.compare_to == "base" + + def test_from_dict_compare_to_default_tip(self) -> None: + """Test compare_to: default_tip.""" + data = { + "name": "Test", + "trigger": "src/*", + "instructions": "Check it", + "compare_to": "default_tip", + } + policy = Policy.from_dict(data) + + assert policy.compare_to == "default_tip" + + def test_from_dict_compare_to_prompt(self) -> None: + """Test compare_to: prompt.""" + data = { + "name": "Test", + "trigger": "src/*", + "instructions": "Check it", + "compare_to": "prompt", + } + policy = Policy.from_dict(data) + + assert policy.compare_to == "prompt" + class TestMatchesPattern: """Tests for matches_pattern function."""