From c90aeca69018134f7b97b272e7d0ab8251fca045 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Wed, 14 Jan 2026 17:30:29 -0700
Subject: [PATCH 1/8] Fix CLA signatures file format

Replace Markdown with JSON array - CLA Assistant action
expects JSON format for storing signatures.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 CLA/version_1/CLA_SIGNATORIES.md | 26 +-------------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

diff --git a/CLA/version_1/CLA_SIGNATORIES.md b/CLA/version_1/CLA_SIGNATORIES.md
index 12f2c5e1..fe51488c 100644
--- a/CLA/version_1/CLA_SIGNATORIES.md
+++ b/CLA/version_1/CLA_SIGNATORIES.md
@@ -1,25 +1 @@
-# CLA Signatories
-
-This file tracks individual contributors who have signed the Contributor License Agreement (CLA) for the DeepWork project.
-
-## Individual Contributors
-
-| GitHub Username | Date Signed | Signature Method |
-|-----------------|-------------|------------------|
-| <!-- Add your GitHub username here --> | | |
-
----
-
-## How to Sign
-
-When you submit your first pull request, the CLA Assistant bot will guide you through signing the CLA electronically by commenting on your PR.
-
----
-
-## Corporate Contributors
-
-Organizations that have signed the Corporate CLA are tracked separately. If you are contributing on behalf of your employer, please ensure your organization has signed the Corporate CLA by contacting legal@unsupervised.com.
-
----
-
-For questions about the CLA, see [CLA.md](CLA.md) or contact legal@unsupervised.com.
+[]

From ee96abec4695638e3d01109337b7802e386d3d5c Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Wed, 14 Jan 2026 17:31:48 -0700
Subject: [PATCH 2/8] Fix CLA signatures JSON structure

CLA Assistant expects { "signedContributors": [] } format,
not a plain array.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 CLA/version_1/CLA_SIGNATORIES.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CLA/version_1/CLA_SIGNATORIES.md b/CLA/version_1/CLA_SIGNATORIES.md
index fe51488c..6d978e7e 100644
--- a/CLA/version_1/CLA_SIGNATORIES.md
+++ b/CLA/version_1/CLA_SIGNATORIES.md
@@ -1 +1,3 @@
-[]
+{
+  "signedContributors": []
+}

From f16d9006dc245e1e132f9bb3532aa01b55f1cbcd Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Wed, 14 Jan 2026 17:32:51 -0700
Subject: [PATCH 3/8] Remove manually created signatures file

Per CLA Assistant docs: "You do not need to create this file manually.
Our workflow will create the signature file if it does not already exist.
Manually creating this file will cause the workflow to fail."

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 CLA/version_1/CLA_SIGNATORIES.md | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 CLA/version_1/CLA_SIGNATORIES.md

diff --git a/CLA/version_1/CLA_SIGNATORIES.md b/CLA/version_1/CLA_SIGNATORIES.md
deleted file mode 100644
index 6d978e7e..00000000
--- a/CLA/version_1/CLA_SIGNATORIES.md
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-  "signedContributors": []
-}

From 1463faa70f47f76f278381f22bb3100709137a43 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 15 Jan 2026 00:33:23 +0000
Subject: [PATCH 4/8] Creating file for storing CLA Signatures

---
 CLA/version_1/CLA_SIGNATORIES.md | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 CLA/version_1/CLA_SIGNATORIES.md

diff --git a/CLA/version_1/CLA_SIGNATORIES.md b/CLA/version_1/CLA_SIGNATORIES.md
new file mode 100644
index 00000000..18d5487f
--- /dev/null
+++ b/CLA/version_1/CLA_SIGNATORIES.md
@@ -0,0 +1,3 @@
+{
+   "signedContributors": []
+}
\ No newline at end of file

From 8d1a14505dd62037f89042a65e692cada61b9597 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 15 Jan 2026 00:57:27 +0000
Subject: [PATCH 5/8] @nhorton has signed the CLA in
 Unsupervisedcom/deepwork#27

---
 CLA/version_1/CLA_SIGNATORIES.md | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/CLA/version_1/CLA_SIGNATORIES.md b/CLA/version_1/CLA_SIGNATORIES.md
index 18d5487f..20672347 100644
--- a/CLA/version_1/CLA_SIGNATORIES.md
+++ b/CLA/version_1/CLA_SIGNATORIES.md
@@ -1,3 +1,12 @@
 {
-   "signedContributors": []
+  "signedContributors": [
+    {
+      "name": "nhorton",
+      "id": 204146,
+      "comment_id": 3752380523,
+      "created_at": "2026-01-15T00:57:16Z",
+      "repoId": 1132406094,
+      "pullRequestNo": 27
+    }
+  ]
 }
\ No newline at end of file

From 9565e3ba7b368c3ad3bcff2b7bb984850ef46c70 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 15 Jan 2026 08:27:57 +0000
Subject: [PATCH 6/8] @tylerwillis has signed the CLA in
 Unsupervisedcom/deepwork#31

---
 CLA/version_1/CLA_SIGNATORIES.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/CLA/version_1/CLA_SIGNATORIES.md b/CLA/version_1/CLA_SIGNATORIES.md
index 20672347..61038a69 100644
--- a/CLA/version_1/CLA_SIGNATORIES.md
+++ b/CLA/version_1/CLA_SIGNATORIES.md
@@ -7,6 +7,14 @@
       "created_at": "2026-01-15T00:57:16Z",
       "repoId": 1132406094,
       "pullRequestNo": 27
+    },
+    {
+      "name": "tylerwillis",
+      "id": 50716,
+      "comment_id": 3753520846,
+      "created_at": "2026-01-15T08:27:44Z",
+      "repoId": 1132406094,
+      "pullRequestNo": 31
     }
   ]
 }
\ No newline at end of file

From 5f18ddde99d50df26e89662fddc5ce4c0c00ce16 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 15 Jan 2026 21:49:22 +0000
Subject: [PATCH 7/8] Add automated CI test for Claude Code integration

Add a deterministic 'fruits' test job and comprehensive CI tests to validate
that deepwork-generated commands work correctly with Claude Code.

Changes:
- Add fruits job fixture (identify + classify steps) for CI testing
- Add integration tests for fruits workflow (8 tests)
- Add e2e tests for Claude Code execution (3 tests, skipped without API key)
- Add GitHub Actions workflow for automated testing:
  - validate-generation: Always runs, tests command generation
  - claude-code-e2e: Runs with ANTHROPIC_API_KEY, tests actual execution

The fruits job is designed to be deterministic:
- Input: comma-separated list of items (e.g., "apple, car, banana")
- Step 1: Identify which items are fruits
- Step 2: Classify fruits by category (citrus, tropical, etc.)
---
 .github/workflows/claude-code-test.yml       | 213 ++++++++++++
 tests/e2e/__init__.py                        |   1 +
 tests/e2e/test_claude_code_integration.py    | 325 +++++++++++++++++++
 tests/fixtures/jobs/fruits/job.yml           |  40 +++
 tests/fixtures/jobs/fruits/steps/classify.md |  83 +++++
 tests/fixtures/jobs/fruits/steps/identify.md |  64 ++++
 tests/integration/test_fruits_workflow.py    | 189 +++++++++++
 7 files changed, 915 insertions(+)
 create mode 100644 .github/workflows/claude-code-test.yml
 create mode 100644 tests/e2e/__init__.py
 create mode 100644 tests/e2e/test_claude_code_integration.py
 create mode 100644 tests/fixtures/jobs/fruits/job.yml
 create mode 100644 tests/fixtures/jobs/fruits/steps/classify.md
 create mode 100644 tests/fixtures/jobs/fruits/steps/identify.md
 create mode 100644 tests/integration/test_fruits_workflow.py

diff --git a/.github/workflows/claude-code-test.yml b/.github/workflows/claude-code-test.yml
new file mode 100644
index 00000000..bafc2592
--- /dev/null
+++ b/.github/workflows/claude-code-test.yml
@@ -0,0 +1,213 @@
+name: Claude Code Integration Test
+
+on:
+  # Manual trigger for testing
+  workflow_dispatch:
+    inputs:
+      debug:
+        description: 'Enable debug logging'
+        required: false
+        default: 'false'
+        type: boolean
+  # Run on PRs that modify core code
+  pull_request:
+    branches: ["*"]
+    paths:
+      - 'src/deepwork/**'
+      - 'tests/**'
+      - '.github/workflows/claude-code-test.yml'
+  # Scheduled run for continuous validation
+  schedule:
+    - cron: '0 6 * * 1'  # Weekly on Monday at 6 AM UTC
+
+jobs:
+  # Job 1: Validate command generation (always runs)
+  validate-generation:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: uv sync --extra dev
+
+      - name: Run fruits workflow tests
+        run: uv run pytest tests/integration/test_fruits_workflow.py -v
+
+      - name: Generate commands and validate structure
+        run: |
+          # Create a test environment
+          mkdir -p test_project/.deepwork/jobs
+          cp -r tests/fixtures/jobs/fruits test_project/.deepwork/jobs/
+
+          # Install deepwork into test project
+          cd test_project
+          git init
+          git config user.email "test@test.com"
+          git config user.name "Test"
+          echo "# Test" > README.md
+          git add . && git commit -m "init"
+
+          # Run deepwork sync to generate commands
+          uv run --directory .. deepwork sync
+
+          # Validate generated commands exist
+          echo "Checking generated commands..."
+          ls -la .claude/commands/
+
+          # Verify command files exist
+          test -f .claude/commands/fruits.identify.md || (echo "Missing fruits.identify.md" && exit 1)
+          test -f .claude/commands/fruits.classify.md || (echo "Missing fruits.classify.md" && exit 1)
+
+          # Verify command content
+          grep -q "# fruits.identify" .claude/commands/fruits.identify.md
+          grep -q "raw_items" .claude/commands/fruits.identify.md
+          grep -q "identified_fruits.md" .claude/commands/fruits.identify.md
+
+          grep -q "# fruits.classify" .claude/commands/fruits.classify.md
+          grep -q "identified_fruits.md" .claude/commands/fruits.classify.md
+          grep -q "classified_fruits.md" .claude/commands/fruits.classify.md
+
+          echo "Command generation validated successfully!"
+
+  # Job 2: End-to-end test with Claude Code (only when API key is available)
+  claude-code-e2e:
+    runs-on: ubuntu-latest
+    needs: validate-generation
+    if: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
+    env:
+      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Check for API key
+        id: check-key
+        run: |
+          if [ -z "$ANTHROPIC_API_KEY" ]; then
+            echo "has_key=false" >> $GITHUB_OUTPUT
+            echo "::warning::ANTHROPIC_API_KEY not set, skipping Claude Code e2e test"
+          else
+            echo "has_key=true" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Install Node.js (for Claude Code CLI)
+        if: steps.check-key.outputs.has_key == 'true'
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+
+      - name: Install Claude Code CLI
+        if: steps.check-key.outputs.has_key == 'true'
+        run: npm install -g @anthropic-ai/claude-code
+
+      - name: Install uv
+        if: steps.check-key.outputs.has_key == 'true'
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
+      - name: Set up Python
+        if: steps.check-key.outputs.has_key == 'true'
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install deepwork
+        if: steps.check-key.outputs.has_key == 'true'
+        run: uv sync
+
+      - name: Set up test project
+        if: steps.check-key.outputs.has_key == 'true'
+        run: |
+          mkdir -p test_project/.deepwork/jobs
+          cp -r tests/fixtures/jobs/fruits test_project/.deepwork/jobs/
+
+          cd test_project
+          git init
+          git config user.email "test@test.com"
+          git config user.name "Test"
+          echo "# CI Test Project" > README.md
+          git add . && git commit -m "init"
+
+          # Generate commands
+          uv run --directory .. deepwork sync
+
+          echo "Test project setup complete"
+          ls -la .claude/commands/
+
+      - name: Run Claude Code - Identify Step
+        if: steps.check-key.outputs.has_key == 'true'
+        working-directory: test_project
+        timeout-minutes: 5
+        run: |
+          # Run the identify step with a deterministic input
+          # Using --print to output result, --yes to auto-accept
+          claude --yes --print "/fruits.identify" <<EOF
+          raw_items: apple, car, banana, chair, orange, table, mango, laptop
+          EOF
+
+          # Verify output was created
+          if [ -f "identified_fruits.md" ]; then
+            echo "Identify step completed successfully!"
+            echo "--- Output ---"
+            cat identified_fruits.md
+          else
+            echo "ERROR: identified_fruits.md was not created"
+            exit 1
+          fi
+
+      - name: Run Claude Code - Classify Step
+        if: steps.check-key.outputs.has_key == 'true'
+        working-directory: test_project
+        timeout-minutes: 5
+        run: |
+          # Run the classify step
+          claude --yes --print "/fruits.classify"
+
+          # Verify output was created
+          if [ -f "classified_fruits.md" ]; then
+            echo "Classify step completed successfully!"
+            echo "--- Output ---"
+            cat classified_fruits.md
+          else
+            echo "ERROR: classified_fruits.md was not created"
+            exit 1
+          fi
+
+      - name: Validate outputs
+        if: steps.check-key.outputs.has_key == 'true'
+        working-directory: test_project
+        run: |
+          echo "=== Validating outputs ==="
+
+          # Check identified_fruits.md contains expected fruits
+          echo "Checking identified_fruits.md..."
+          grep -qi "apple" identified_fruits.md || (echo "Missing: apple" && exit 1)
+          grep -qi "banana" identified_fruits.md || (echo "Missing: banana" && exit 1)
+          grep -qi "orange" identified_fruits.md || (echo "Missing: orange" && exit 1)
+          grep -qi "mango" identified_fruits.md || (echo "Missing: mango" && exit 1)
+
+          # Check classified_fruits.md has expected structure
+          echo "Checking classified_fruits.md..."
+          grep -qi "citrus\|tropical\|pome" classified_fruits.md || (echo "Missing fruit categories" && exit 1)
+
+          echo "All validations passed!"
+
+      - name: Upload test artifacts
+        if: steps.check-key.outputs.has_key == 'true' && always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: claude-code-test-outputs
+          path: |
+            test_project/identified_fruits.md
+            test_project/classified_fruits.md
+          retention-days: 7
diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py
new file mode 100644
index 00000000..7adc5273
--- /dev/null
+++ b/tests/e2e/__init__.py
@@ -0,0 +1 @@
+"""End-to-end tests for DeepWork with Claude Code."""
diff --git a/tests/e2e/test_claude_code_integration.py b/tests/e2e/test_claude_code_integration.py
new file mode 100644
index 00000000..0f75fac5
--- /dev/null
+++ b/tests/e2e/test_claude_code_integration.py
@@ -0,0 +1,325 @@
+"""End-to-end tests for DeepWork with Claude Code integration.
+
+These tests validate that DeepWork-generated commands work correctly
+with Claude Code. The tests can run in two modes:
+
+1. **Generation-only mode** (default): Tests command generation and structure
+2. **Full e2e mode**: Actually executes commands with Claude Code
+
+Set ANTHROPIC_API_KEY and DEEPWORK_E2E_FULL=true to run full e2e tests.
+"""
+
+import os
+import shutil
+import subprocess
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from deepwork.core.adapters import ClaudeAdapter
+from deepwork.core.generator import CommandGenerator
+from deepwork.core.parser import parse_job_definition
+
+# Test input for deterministic validation
+TEST_INPUT = "apple, car, banana, chair, orange, table, mango, laptop, grape, bicycle"
+
+# Expected fruits from test input (for validation)
+EXPECTED_FRUITS = {"apple", "banana", "orange", "mango", "grape"}
+
+
+def has_claude_code() -> bool:
+    """Check if Claude Code CLI is available."""
+    try:
+        result = subprocess.run(
+            ["claude", "--version"],
+            capture_output=True,
+            timeout=10,
+        )
+        return result.returncode == 0
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        return False
+
+
+def has_api_key() -> bool:
+    """Check if Anthropic API key is set."""
+    return bool(os.environ.get("ANTHROPIC_API_KEY"))
+
+
+def run_full_e2e() -> bool:
+    """Check if full e2e tests should run."""
+    return (
+        os.environ.get("DEEPWORK_E2E_FULL", "").lower() == "true"
+        and has_api_key()
+        and has_claude_code()
+    )
+
+
+class TestCommandGenerationE2E:
+    """End-to-end tests for command generation."""
+
+    def test_generate_fruits_commands_in_temp_project(self) -> None:
+        """Test generating fruits commands in a realistic project structure."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            project_dir = Path(tmpdir)
+
+            # Set up project structure
+            deepwork_dir = project_dir / ".deepwork" / "jobs"
+            deepwork_dir.mkdir(parents=True)
+
+            # Copy fruits job fixture
+            fixtures_dir = Path(__file__).parent.parent / "fixtures" / "jobs" / "fruits"
+            shutil.copytree(fixtures_dir, deepwork_dir / "fruits")
+
+            # Initialize git repo (required for some operations)
+            subprocess.run(["git", "init"], cwd=project_dir, capture_output=True)
+            subprocess.run(
+                ["git", "config", "user.email", "test@test.com"],
+                cwd=project_dir,
+                capture_output=True,
+            )
+            subprocess.run(
+                ["git", "config", "user.name", "Test"],
+                cwd=project_dir,
+                capture_output=True,
+            )
+
+            # Parse job and generate commands
+            job = parse_job_definition(deepwork_dir / "fruits")
+            generator = CommandGenerator()
+            adapter = ClaudeAdapter()
+
+            commands_dir = project_dir / ".claude"
+            commands_dir.mkdir()
+
+            command_paths = generator.generate_all_commands(job, adapter, commands_dir)
+
+            # Validate commands were generated
+            assert len(command_paths) == 2
+
+            identify_cmd = commands_dir / "commands" / "fruits.identify.md"
+            classify_cmd = commands_dir / "commands" / "fruits.classify.md"
+
+            assert identify_cmd.exists()
+            assert classify_cmd.exists()
+
+            # Validate command content
+            identify_content = identify_cmd.read_text()
+            assert "# fruits.identify" in identify_content
+            assert "raw_items" in identify_content
+            assert "identified_fruits.md" in identify_content
+
+            classify_content = classify_cmd.read_text()
+            assert "# fruits.classify" in classify_content
+            assert "identified_fruits.md" in classify_content
+            assert "classified_fruits.md" in classify_content
+
+    def test_command_structure_matches_claude_code_expectations(self) -> None:
+        """Test that generated commands have the structure Claude Code expects."""
+        fixtures_dir = Path(__file__).parent.parent / "fixtures" / "jobs" / "fruits"
+        job = parse_job_definition(fixtures_dir)
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            commands_dir = Path(tmpdir) / ".claude"
+            commands_dir.mkdir()
+
+            generator = CommandGenerator()
+            adapter = ClaudeAdapter()
+            generator.generate_all_commands(job, adapter, commands_dir)
+
+            identify_cmd = commands_dir / "commands" / "fruits.identify.md"
+            content = identify_cmd.read_text()
+
+            # Claude Code expects specific sections
+            assert "# fruits.identify" in content  # Command name header
+            assert "## Instructions" in content  # Instructions section
+            assert "## Inputs" in content  # Inputs section
+            assert "## Output" in content  # Output section
+
+            # Check for user input prompt
+            assert "raw_items" in content
+
+    def test_dependency_chain_in_commands(self) -> None:
+        """Test that dependency chain is correctly represented in commands."""
+        fixtures_dir = Path(__file__).parent.parent / "fixtures" / "jobs" / "fruits"
+        job = parse_job_definition(fixtures_dir)
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            commands_dir = Path(tmpdir) / ".claude"
+            commands_dir.mkdir()
+
+            generator = CommandGenerator()
+            adapter = ClaudeAdapter()
+            generator.generate_all_commands(job, adapter, commands_dir)
+
+            # First step should have no prerequisites
+            identify_cmd = commands_dir / "commands" / "fruits.identify.md"
+            identify_content = identify_cmd.read_text()
+            assert "## Prerequisites" not in identify_content
+
+            # Second step should reference first step
+            classify_cmd = commands_dir / "commands" / "fruits.classify.md"
+            classify_content = classify_cmd.read_text()
+            assert "## Prerequisites" in classify_content
+            assert "identify" in classify_content.lower()
+
+
+@pytest.mark.skipif(
+    not run_full_e2e(),
+    reason="Full e2e requires ANTHROPIC_API_KEY, DEEPWORK_E2E_FULL=true, and claude CLI",
+)
+class TestClaudeCodeExecution:
+    """End-to-end tests that actually execute with Claude Code.
+
+    These tests only run when:
+    - ANTHROPIC_API_KEY is set
+    - DEEPWORK_E2E_FULL=true
+    - Claude Code CLI is installed
+    """
+
+    @pytest.fixture
+    def project_with_commands(self) -> Path:
+        """Create a test project with generated commands."""
+        tmpdir = tempfile.mkdtemp()
+        project_dir = Path(tmpdir)
+
+        # Set up project structure
+        deepwork_dir = project_dir / ".deepwork" / "jobs"
+        deepwork_dir.mkdir(parents=True)
+
+        # Copy fruits job fixture
+        fixtures_dir = Path(__file__).parent.parent / "fixtures" / "jobs" / "fruits"
+        shutil.copytree(fixtures_dir, deepwork_dir / "fruits")
+
+        # Initialize git repo
+        subprocess.run(["git", "init"], cwd=project_dir, capture_output=True)
+        subprocess.run(
+            ["git", "config", "user.email", "test@test.com"],
+            cwd=project_dir,
+            capture_output=True,
+        )
+        subprocess.run(
+            ["git", "config", "user.name", "Test"],
+            cwd=project_dir,
+            capture_output=True,
+        )
+
+        # Create README
+        (project_dir / "README.md").write_text("# Test Project\n")
+        subprocess.run(["git", "add", "."], cwd=project_dir, capture_output=True)
+        subprocess.run(
+            ["git", "commit", "-m", "init"],
+            cwd=project_dir,
+            capture_output=True,
+        )
+
+        # Generate commands
+        job = parse_job_definition(deepwork_dir / "fruits")
+        generator = CommandGenerator()
+        adapter = ClaudeAdapter()
+
+        commands_dir = project_dir / ".claude"
+        commands_dir.mkdir()
+        generator.generate_all_commands(job, adapter, commands_dir)
+
+        yield project_dir
+
+        # Cleanup
+        shutil.rmtree(tmpdir, ignore_errors=True)
+
+    def test_identify_step_execution(self, project_with_commands: Path) -> None:
+        """Test executing the identify step with Claude Code."""
+        # Run Claude Code with the identify command
+        result = subprocess.run(
+            [
+                "claude",
+                "--yes",
+                "--print",
+                f"/fruits.identify raw_items: {TEST_INPUT}",
+            ],
+            cwd=project_with_commands,
+            capture_output=True,
+            text=True,
+            timeout=120,
+        )
+
+        assert result.returncode == 0, f"Claude Code failed: {result.stderr}"
+
+        # Check output file was created
+        output_file = project_with_commands / "identified_fruits.md"
+        assert output_file.exists(), "identified_fruits.md was not created"
+
+        # Validate content
+        content = output_file.read_text().lower()
+        for fruit in EXPECTED_FRUITS:
+            assert fruit in content, f"Expected fruit '{fruit}' not found in output"
+
+    def test_classify_step_execution(self, project_with_commands: Path) -> None:
+        """Test executing the classify step with Claude Code."""
+        # First, create the input file (simulate identify step output)
+        identify_output = project_with_commands / "identified_fruits.md"
+        identify_output.write_text(
+            "# Identified Fruits\n\n- apple\n- banana\n- orange\n- mango\n- grape\n"
+        )
+
+        # Run Claude Code with the classify command
+        result = subprocess.run(
+            ["claude", "--yes", "--print", "/fruits.classify"],
+            cwd=project_with_commands,
+            capture_output=True,
+            text=True,
+            timeout=120,
+        )
+
+        assert result.returncode == 0, f"Claude Code failed: {result.stderr}"
+
+        # Check output file was created
+        output_file = project_with_commands / "classified_fruits.md"
+        assert output_file.exists(), "classified_fruits.md was not created"
+
+        # Validate content has category structure
+        content = output_file.read_text().lower()
+        # Should have at least one category mentioned
+        categories = ["citrus", "tropical", "pome", "berries", "grape"]
+        has_category = any(cat in content for cat in categories)
+        assert has_category, f"No fruit categories found in output: {content[:500]}"
+
+    def test_full_workflow_execution(self, project_with_commands: Path) -> None:
+        """Test executing the complete fruits workflow with Claude Code."""
+        # Run identify step
+        result1 = subprocess.run(
+            [
+                "claude",
+                "--yes",
+                "--print",
+                f"/fruits.identify raw_items: {TEST_INPUT}",
+            ],
+            cwd=project_with_commands,
+            capture_output=True,
+            text=True,
+            timeout=120,
+        )
+        assert result1.returncode == 0, f"Identify step failed: {result1.stderr}"
+
+        # Verify identify output exists
+        identify_output = project_with_commands / "identified_fruits.md"
+        assert identify_output.exists(), "Identify step did not create output"
+
+        # Run classify step
+        result2 = subprocess.run(
+            ["claude", "--yes", "--print", "/fruits.classify"],
+            cwd=project_with_commands,
+            capture_output=True,
+            text=True,
+            timeout=120,
+        )
+        assert result2.returncode == 0, f"Classify step failed: {result2.stderr}"
+
+        # Verify classify output exists
+        classify_output = project_with_commands / "classified_fruits.md"
+        assert classify_output.exists(), "Classify step did not create output"
+
+        # Validate final output quality
+        content = classify_output.read_text()
+        assert len(content) > 100, "Output seems too short"
+        assert "##" in content, "Output lacks markdown structure"
diff --git a/tests/fixtures/jobs/fruits/job.yml b/tests/fixtures/jobs/fruits/job.yml
new file mode 100644
index 00000000..e1ce79a6
--- /dev/null
+++ b/tests/fixtures/jobs/fruits/job.yml
@@ -0,0 +1,40 @@
+name: fruits
+version: "1.0.0"
+summary: "Identify and classify fruits from a mixed list of items"
+description: |
+  A simple, deterministic job for CI testing of the DeepWork framework.
+
+  This job takes a list of mixed items (fruits and non-fruits) and:
+  1. Identifies which items are fruits
+  2. Classifies those fruits into categories (citrus, berries, tropical, etc.)
+
+  This workflow is designed to produce predictable, verifiable outputs
+  making it ideal for automated testing.
+
+changelog:
+  - version: "1.0.0"
+    changes: "Initial version for CI testing"
+
+steps:
+  - id: identify
+    name: "Identify Fruits"
+    description: "Filter a list of items to identify only the fruits"
+    instructions_file: steps/identify.md
+    inputs:
+      - name: raw_items
+        description: "Comma-separated list of items to filter (e.g., 'apple, car, banana, chair')"
+    outputs:
+      - identified_fruits.md
+    dependencies: []
+
+  - id: classify
+    name: "Classify Fruits"
+    description: "Organize identified fruits into categories"
+    instructions_file: steps/classify.md
+    inputs:
+      - file: identified_fruits.md
+        from_step: identify
+    outputs:
+      - classified_fruits.md
+    dependencies:
+      - identify
diff --git a/tests/fixtures/jobs/fruits/steps/classify.md b/tests/fixtures/jobs/fruits/steps/classify.md
new file mode 100644
index 00000000..dbd96c98
--- /dev/null
+++ b/tests/fixtures/jobs/fruits/steps/classify.md
@@ -0,0 +1,83 @@
+# Classify Fruits
+
+## Objective
+
+Organize the identified fruits into categories based on their type.
+
+## Task
+
+Read the `identified_fruits.md` file from the previous step and categorize each fruit.
+
+### Fruit Categories
+
+Use these standard categories:
+
+1. **Citrus** - orange, lemon, lime, grapefruit, tangerine, mandarin, clementine
+2. **Berries** - strawberry, blueberry, raspberry, blackberry, cranberry, mulberry
+3. **Tropical** - banana, mango, pineapple, papaya, coconut, kiwi, passion fruit
+4. **Stone Fruits** - peach, plum, cherry, apricot, nectarine, lychee
+5. **Pome Fruits** - apple, pear, quince
+6. **Melons** - watermelon, cantaloupe, honeydew, melon
+7. **Grapes** - grape, raisin
+
+If a fruit doesn't fit any category, list it under **Other**.
+
+## Output Format
+
+Create `classified_fruits.md` with the following format:
+
+```markdown
+# Classified Fruits
+
+## [Category Name]
+- [fruit1]
+- [fruit2]
+
+## [Another Category]
+- [fruit3]
+
+---
+
+## Summary
+
+| Category | Count |
+|----------|-------|
+| [category1] | X |
+| [category2] | Y |
+| **Total** | **Z** |
+```
+
+## Example
+
+If `identified_fruits.md` contains: apple, banana, orange
+
+Output should be:
+```markdown
+# Classified Fruits
+
+## Citrus
+- orange
+
+## Tropical
+- banana
+
+## Pome Fruits
+- apple
+
+---
+
+## Summary
+
+| Category | Count |
+|----------|-------|
+| Citrus | 1 |
+| Tropical | 1 |
+| Pome Fruits | 1 |
+| **Total** | **3** |
+```
+
+## Notes
+
+- Only include categories that have at least one fruit
+- Sort fruits alphabetically within each category
+- Ensure the summary table matches the categorized fruits
diff --git a/tests/fixtures/jobs/fruits/steps/identify.md b/tests/fixtures/jobs/fruits/steps/identify.md
new file mode 100644
index 00000000..20acddb9
--- /dev/null
+++ b/tests/fixtures/jobs/fruits/steps/identify.md
@@ -0,0 +1,64 @@
+# Identify Fruits
+
+## Objective
+
+Filter the provided list of items to identify only the fruits.
+
+## Task
+
+Given the input `{{raw_items}}`, create a markdown file listing only the items that are fruits.
+
+### Common Fruits Reference
+
+The following are considered fruits:
+- **Citrus**: orange, lemon, lime, grapefruit, tangerine, mandarin
+- **Berries**: strawberry, blueberry, raspberry, blackberry, cranberry
+- **Tropical**: banana, mango, pineapple, papaya, coconut, kiwi
+- **Stone fruits**: peach, plum, cherry, apricot, nectarine
+- **Pome fruits**: apple, pear, quince
+- **Melons**: watermelon, cantaloupe, honeydew
+- **Grapes**: grape, raisin
+
+### Instructions
+
+1. Parse the comma-separated list of items
+2. For each item, determine if it is a fruit
+3. Create a list of only the fruits found
+
+## Output Format
+
+Create `identified_fruits.md` with the following format:
+
+```markdown
+# Identified Fruits
+
+The following fruits were identified from the input list:
+
+- [fruit1]
+- [fruit2]
+- [fruit3]
+...
+
+## Summary
+
+Found X fruits from Y total items.
+```
+
+## Example
+
+If input is: `apple, car, banana, chair, orange, table`
+
+Output should be:
+```markdown
+# Identified Fruits
+
+The following fruits were identified from the input list:
+
+- apple
+- banana
+- orange
+
+## Summary
+
+Found 3 fruits from 6 total items.
+```
diff --git a/tests/integration/test_fruits_workflow.py b/tests/integration/test_fruits_workflow.py
new file mode 100644
index 00000000..168c94d4
--- /dev/null
+++ b/tests/integration/test_fruits_workflow.py
@@ -0,0 +1,189 @@
+"""Integration tests for the fruits CI test workflow.
+
+This module tests the fruits job - a simple, deterministic workflow
+designed for automated CI testing of the DeepWork framework.
+"""
+
+from pathlib import Path
+
+from deepwork.core.adapters import ClaudeAdapter
+from deepwork.core.generator import CommandGenerator
+from deepwork.core.parser import parse_job_definition
+
+
+class TestFruitsWorkflow:
+    """Integration tests for the fruits CI test workflow."""
+
+    def test_fruits_job_parses_correctly(self, fixtures_dir: Path) -> None:
+        """Test that the fruits job definition parses correctly."""
+        job_dir = fixtures_dir / "jobs" / "fruits"
+        job = parse_job_definition(job_dir)
+
+        assert job.name == "fruits"
+        assert job.version == "1.0.0"
+        assert len(job.steps) == 2
+
+        # Verify step IDs
+        step_ids = [step.id for step in job.steps]
+        assert step_ids == ["identify", "classify"]
+
+    def test_fruits_identify_step_structure(self, fixtures_dir: Path) -> None:
+        """Test the identify step has correct structure."""
+        job_dir = fixtures_dir / "jobs" / "fruits"
+        job = parse_job_definition(job_dir)
+
+        identify_step = job.steps[0]
+        assert identify_step.id == "identify"
+        assert identify_step.name == "Identify Fruits"
+
+        # Has user input
+        assert len(identify_step.inputs) == 1
+        assert identify_step.inputs[0].is_user_input()
+        assert identify_step.inputs[0].name == "raw_items"
+
+        # Has output
+        assert identify_step.outputs == ["identified_fruits.md"]
+
+        # No dependencies (first step)
+        assert identify_step.dependencies == []
+
+    def test_fruits_classify_step_structure(self, fixtures_dir: Path) -> None:
+        """Test the classify step has correct structure."""
+        job_dir = fixtures_dir / "jobs" / "fruits"
+        job = parse_job_definition(job_dir)
+
+        classify_step = job.steps[1]
+        assert classify_step.id == "classify"
+        assert classify_step.name == "Classify Fruits"
+
+        # Has file input from previous step
+        assert len(classify_step.inputs) == 1
+        assert classify_step.inputs[0].is_file_input()
+        assert classify_step.inputs[0].file == "identified_fruits.md"
+        assert classify_step.inputs[0].from_step == "identify"
+
+        # Has output
+        assert classify_step.outputs == ["classified_fruits.md"]
+
+        # Depends on identify step
+        assert classify_step.dependencies == ["identify"]
+
+    def test_fruits_command_generation(self, fixtures_dir: Path, temp_dir: Path) -> None:
+        """Test that fruits job generates valid Claude commands."""
+        job_dir = fixtures_dir / "jobs" / "fruits"
+        job = parse_job_definition(job_dir)
+
+        generator = CommandGenerator()
+        adapter = ClaudeAdapter()
+        commands_dir = temp_dir / ".claude"
+        commands_dir.mkdir()
+
+        command_paths = generator.generate_all_commands(job, adapter, commands_dir)
+
+        assert len(command_paths) == 2
+
+        # Verify command files exist
+        identify_cmd = commands_dir / "commands" / "fruits.identify.md"
+        classify_cmd = commands_dir / "commands" / "fruits.classify.md"
+        assert identify_cmd.exists()
+        assert classify_cmd.exists()
+
+    def test_fruits_identify_command_content(self, fixtures_dir: Path, temp_dir: Path) -> None:
+        """Test the identify command has correct content."""
+        job_dir = fixtures_dir / "jobs" / "fruits"
+        job = parse_job_definition(job_dir)
+
+        generator = CommandGenerator()
+        adapter = ClaudeAdapter()
+        commands_dir = temp_dir / ".claude"
+        commands_dir.mkdir()
+
+        generator.generate_all_commands(job, adapter, commands_dir)
+
+        identify_cmd = commands_dir / "commands" / "fruits.identify.md"
+        content = identify_cmd.read_text()
+
+        # Check header
+        assert "# fruits.identify" in content
+
+        # Check step info
+        assert "Step 1 of 2" in content
+
+        # Check user input is mentioned
+        assert "raw_items" in content
+
+        # Check output is mentioned
+        assert "identified_fruits.md" in content
+
+        # Check next step is suggested
+        assert "/fruits.classify" in content
+
+    def test_fruits_classify_command_content(self, fixtures_dir: Path, temp_dir: Path) -> None:
+        """Test the classify command has correct content."""
+        job_dir = fixtures_dir / "jobs" / "fruits"
+        job = parse_job_definition(job_dir)
+
+        generator = CommandGenerator()
+        adapter = ClaudeAdapter()
+        commands_dir = temp_dir / ".claude"
+        commands_dir.mkdir()
+
+        generator.generate_all_commands(job, adapter, commands_dir)
+
+        classify_cmd = commands_dir / "commands" / "fruits.classify.md"
+        content = classify_cmd.read_text()
+
+        # Check header
+        assert "# fruits.classify" in content
+
+        # Check step info
+        assert "Step 2 of 2" in content
+
+        # Check file input is mentioned
+        assert "identified_fruits.md" in content
+        assert "from step `identify`" in content
+
+        # Check output is mentioned
+        assert "classified_fruits.md" in content
+
+        # Check workflow complete (last step)
+        assert "Workflow Complete" in content
+
+    def test_fruits_dependency_validation(self, fixtures_dir: Path) -> None:
+        """Test that dependency validation passes for fruits job."""
+        job_dir = fixtures_dir / "jobs" / "fruits"
+        job = parse_job_definition(job_dir)
+
+        # This should not raise - dependencies are valid
+        job.validate_dependencies()
+
+    def test_fruits_job_is_deterministic_design(self, fixtures_dir: Path) -> None:
+        """Verify the fruits job is designed for deterministic testing.
+
+        This test documents the design properties that make this job
+        suitable for CI testing.
+        """
+        job_dir = fixtures_dir / "jobs" / "fruits"
+        job = parse_job_definition(job_dir)
+
+        # Job has clear, simple structure
+        assert len(job.steps) == 2
+
+        # Steps form a linear dependency chain
+        assert job.steps[0].dependencies == []
+        assert job.steps[1].dependencies == ["identify"]
+
+        # First step takes user input
+        identify_step = job.steps[0]
+        assert len(identify_step.inputs) == 1
+        assert identify_step.inputs[0].is_user_input()
+
+        # Second step uses output from first step
+        classify_step = job.steps[1]
+        assert len(classify_step.inputs) == 1
+        assert classify_step.inputs[0].is_file_input()
+        assert classify_step.inputs[0].from_step == "identify"
+
+        # Outputs are well-defined markdown files
+        assert identify_step.outputs == ["identified_fruits.md"]
+        assert classify_step.outputs == ["classified_fruits.md"]

From 8b2785e9c2009d0213d690d5a6c85251dc43bf2a Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 15 Jan 2026 15:50:35 -0700
Subject: [PATCH 8/8] Fix CI workflow: use deepwork install with --path, add
 concurrency rules

- Add concurrency rules to ensure only one instance runs per PR
- Fix test to use 'deepwork install --platform claude --path test_project'
- Create .claude directory before install for platform detection
- Run commands from repo root with --path flag instead of cd'ing
---
 .github/workflows/claude-code-test.yml | 39 ++++++++++++++++----------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/claude-code-test.yml b/.github/workflows/claude-code-test.yml
index bafc2592..c60eb1b7 100644
--- a/.github/workflows/claude-code-test.yml
+++ b/.github/workflows/claude-code-test.yml
@@ -20,6 +20,11 @@ on:
   schedule:
     - cron: '0 6 * * 1'  # Weekly on Monday at 6 AM UTC
 
+# Ensure only one instance runs at a time per PR/branch
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
+  cancel-in-progress: true
+
 jobs:
   # Job 1: Validate command generation (always runs)
   validate-generation:
@@ -47,35 +52,37 @@ jobs:
         run: |
           # Create a test environment
           mkdir -p test_project/.deepwork/jobs
+          mkdir -p test_project/.claude  # Required for platform detection
           cp -r tests/fixtures/jobs/fruits test_project/.deepwork/jobs/
 
-          # Install deepwork into test project
+          # Set up git repo in test project
           cd test_project
           git init
           git config user.email "test@test.com"
           git config user.name "Test"
           echo "# Test" > README.md
           git add . && git commit -m "init"
+          cd ..
 
-          # Run deepwork sync to generate commands
-          uv run --directory .. deepwork sync
+          # Run deepwork install to set up the project (this also runs sync)
+          uv run deepwork install --platform claude --path test_project
 
           # Validate generated commands exist
           echo "Checking generated commands..."
-          ls -la .claude/commands/
+          ls -la test_project/.claude/commands/
 
           # Verify command files exist
-          test -f .claude/commands/fruits.identify.md || (echo "Missing fruits.identify.md" && exit 1)
-          test -f .claude/commands/fruits.classify.md || (echo "Missing fruits.classify.md" && exit 1)
+          test -f test_project/.claude/commands/fruits.identify.md || (echo "Missing fruits.identify.md" && exit 1)
+          test -f test_project/.claude/commands/fruits.classify.md || (echo "Missing fruits.classify.md" && exit 1)
 
           # Verify command content
-          grep -q "# fruits.identify" .claude/commands/fruits.identify.md
-          grep -q "raw_items" .claude/commands/fruits.identify.md
-          grep -q "identified_fruits.md" .claude/commands/fruits.identify.md
+          grep -q "# fruits.identify" test_project/.claude/commands/fruits.identify.md
+          grep -q "raw_items" test_project/.claude/commands/fruits.identify.md
+          grep -q "identified_fruits.md" test_project/.claude/commands/fruits.identify.md
 
-          grep -q "# fruits.classify" .claude/commands/fruits.classify.md
-          grep -q "identified_fruits.md" .claude/commands/fruits.classify.md
-          grep -q "classified_fruits.md" .claude/commands/fruits.classify.md
+          grep -q "# fruits.classify" test_project/.claude/commands/fruits.classify.md
+          grep -q "identified_fruits.md" test_project/.claude/commands/fruits.classify.md
+          grep -q "classified_fruits.md" test_project/.claude/commands/fruits.classify.md
 
           echo "Command generation validated successfully!"
 
@@ -129,6 +136,7 @@ jobs:
         if: steps.check-key.outputs.has_key == 'true'
         run: |
           mkdir -p test_project/.deepwork/jobs
+          mkdir -p test_project/.claude  # Required for platform detection
           cp -r tests/fixtures/jobs/fruits test_project/.deepwork/jobs/
 
           cd test_project
@@ -137,12 +145,13 @@ jobs:
           git config user.name "Test"
           echo "# CI Test Project" > README.md
           git add . && git commit -m "init"
+          cd ..
 
-          # Generate commands
-          uv run --directory .. deepwork sync
+          # Run deepwork install to set up the project (this also runs sync)
+          uv run deepwork install --platform claude --path test_project
 
           echo "Test project setup complete"
-          ls -la .claude/commands/
+          ls -la test_project/.claude/commands/
 
       - name: Run Claude Code - Identify Step
         if: steps.check-key.outputs.has_key == 'true'