Unsupervisedcom · nhorton · Jan 15, 2026 · Jan 15, 2026 · Jan 15, 2026 · Jan 15, 2026
diff --git a/tests/shell_script_tests/README.md b/tests/shell_script_tests/README.md
@@ -0,0 +1,76 @@
+# Shell Script Tests
+
+Automated tests for DeepWork shell scripts, with a focus on validating Claude Code hooks JSON response formats.
+
+## Scripts Tested
+
+| Script | Type | Description |
+|--------|------|-------------|
+| `policy_stop_hook.sh` | Stop Hook | Evaluates policies and blocks agent stop if policies are triggered |
+| `user_prompt_submit.sh` | UserPromptSubmit Hook | Captures work tree state when user submits a prompt |
+| `capture_prompt_work_tree.sh` | Helper | Records current git state for `compare_to: prompt` policies |
+| `make_new_job.sh` | Utility | Creates directory structure for new DeepWork jobs |
+
+## Claude Code Hooks JSON Format
+
+Hook scripts must return valid JSON responses. The tests enforce these formats:
+
+### Stop Hooks (`hooks.after_agent`)
+```json
+{}                                          // Allow stop
+{"decision": "block", "reason": "..."}      // Block stop with reason
+```
+
+### UserPromptSubmit Hooks (`hooks.before_prompt`)
+```json
+{}    // No output or empty object (side-effect only hooks)
+```
+
+### All Hooks
+- Must return valid JSON if producing output
+- Non-JSON output on stdout is **not allowed** (stderr is ok)
+- Exit code 0 indicates success (even when blocking)
+
+## Running Tests
+
+```bash
+# Run all shell script tests
+uv run pytest tests/shell_script_tests/ -v
+
+# Run tests for a specific script
+uv run pytest tests/shell_script_tests/test_policy_stop_hook.py -v
+
+# Run with coverage
+uv run pytest tests/shell_script_tests/ --cov=src/deepwork
+```
+
+## Test Structure
+
+```
+tests/shell_script_tests/
+├── conftest.py                      # Shared fixtures and helpers
+├── test_policy_stop_hook.py         # Stop hook blocking/allowing tests
+├── test_user_prompt_submit.py       # Prompt submission hook tests
+├── test_capture_prompt_work_tree.py # Work tree capture tests
+├── test_hooks_json_format.py        # JSON format validation tests
+└── test_make_new_job.py             # Job directory creation tests
+```
+
+## Shared Fixtures
+
+Available in `conftest.py`:
+
+| Fixture | Description |
+|---------|-------------|
+| `git_repo` | Basic git repo with initial commit |
+| `git_repo_with_policy` | Git repo with a Python file policy |
+| `policy_hooks_dir` | Path to policy hooks scripts |
+| `jobs_scripts_dir` | Path to job management scripts |
+
+## Adding New Tests
+
+1. Use shared fixtures from `conftest.py` when possible
+2. Use `run_shell_script()` helper for running scripts
+3. Validate JSON output with `validate_json_output()` and `validate_stop_hook_response()`
+4. Test both success and failure cases
+5. Verify exit codes (hooks should exit 0 even when blocking)
diff --git a/tests/shell_script_tests/conftest.py b/tests/shell_script_tests/conftest.py
@@ -0,0 +1,115 @@
+"""Shared fixtures for shell script tests."""
+
+import json
+import os
+import subprocess
+from pathlib import Path
+
+import pytest
+from git import Repo
+
+
+@pytest.fixture
+def git_repo(tmp_path: Path) -> Path:
+    """Create a basic git repo for testing."""
+    repo = Repo.init(tmp_path)
+
+    readme = tmp_path / "README.md"
+    readme.write_text("# Test Project\n")
+    repo.index.add(["README.md"])
+    repo.index.commit("Initial commit")
+
+    return tmp_path
+
+
+@pytest.fixture
+def git_repo_with_policy(tmp_path: Path) -> Path:
+    """Create a git repo with policy that will fire."""
+    repo = Repo.init(tmp_path)
+
+    readme = tmp_path / "README.md"
+    readme.write_text("# Test Project\n")
+    repo.index.add(["README.md"])
+    repo.index.commit("Initial commit")
+
+    # Policy that triggers on any Python file
+    policy_file = tmp_path / ".deepwork.policy.yml"
+    policy_file.write_text(
+        """- name: "Python File Policy"
+  trigger: "**/*.py"
+  compare_to: prompt
+  instructions: |
+    Review Python files for quality.
+"""
+    )
+
+    # Empty baseline so new files trigger
+    deepwork_dir = tmp_path / ".deepwork"
+    deepwork_dir.mkdir(exist_ok=True)
+    (deepwork_dir / ".last_work_tree").write_text("")
+
+    return tmp_path
+
+
+@pytest.fixture
+def policy_hooks_dir() -> Path:
+    """Return the path to the policy hooks scripts directory."""
+    return (
+        Path(__file__).parent.parent.parent
+        / "src"
+        / "deepwork"
+        / "standard_jobs"
+        / "deepwork_policy"
+        / "hooks"
+    )
+
+
+@pytest.fixture
+def jobs_scripts_dir() -> Path:
+    """Return the path to the jobs scripts directory."""
+    return (
+        Path(__file__).parent.parent.parent / "src" / "deepwork" / "standard_jobs" / "deepwork_jobs"
+    )
+
+
+def run_shell_script(
+    script_path: Path,
+    cwd: Path,
+    args: list[str] | None = None,
+    hook_input: dict | None = None,
+    env_extra: dict[str, str] | None = None,
+) -> tuple[str, str, int]:
+    """
+    Run a shell script and return its output.
+
+    Args:
+        script_path: Path to the shell script
+        cwd: Working directory to run the script in
+        args: Optional list of arguments to pass to the script
+        hook_input: Optional JSON input to pass via stdin
+        env_extra: Optional extra environment variables
+
+    Returns:
+        Tuple of (stdout, stderr, return_code)
+    """
+    env = os.environ.copy()
+    env["PYTHONPATH"] = str(Path(__file__).parent.parent.parent / "src")
+    if env_extra:
+        env.update(env_extra)
+
+    cmd = ["bash", str(script_path)]
+    if args:
+        cmd.extend(args)
+
+    stdin_data = json.dumps(hook_input) if hook_input else ""
+
+    result = subprocess.run(
+        cmd,
+        cwd=cwd,
+        capture_output=True,
+        text=True,
+        input=stdin_data,
+        env=env,
+    )
+
+    return result.stdout, result.stderr, result.returncode