Unsupervisedcom · nhorton · Jan 15, 2026 · Jan 15, 2026 · Jan 15, 2026 · Jan 15, 2026
diff --git a/.github/workflows/claude-code-test.yml b/.github/workflows/claude-code-test.yml
@@ -0,0 +1,353 @@
+name: Claude Code Integration Test
+
+on:
+  # Manual trigger for testing
+  workflow_dispatch:
+    inputs:
+      debug:
+        description: 'Enable debug logging'
+        required: false
+        default: 'false'
+        type: boolean
+  # Run on PRs that modify core code
+  pull_request:
+    branches: ["*"]
+    paths:
+      - 'src/deepwork/**'
+      - 'tests/**'
+      - '.github/workflows/claude-code-test.yml'
+  # Scheduled run for continuous validation
+  schedule:
+    - cron: '0 6 * * 1'  # Weekly on Monday at 6 AM UTC
+
+# Ensure only one instance runs at a time per PR/branch
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  # Job 1: Validate command generation from fixtures (always runs, no API key needed)
+  validate-generation:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: uv sync --extra dev
+
+      - name: Run fruits workflow tests
+        run: uv run pytest tests/integration/test_fruits_workflow.py -v
+
+      - name: Generate commands and validate structure
+        run: |
+          # Create a test environment
+          mkdir -p test_project/.deepwork/jobs
+          mkdir -p test_project/.claude  # Required for platform detection
+          cp -r tests/fixtures/jobs/fruits test_project/.deepwork/jobs/
+
+          # Set up git repo in test project
+          cd test_project
+          git init
+          git config user.email "test@test.com"
+          git config user.name "Test"
+          echo "# Test" > README.md
+          git add . && git commit -m "init"
+          cd ..
+
+          # Run deepwork install to set up the project (this also runs sync)
+          uv run deepwork install --platform claude --path test_project
+
+          # Validate generated commands exist
+          echo "Checking generated commands..."
+          ls -la test_project/.claude/commands/
+
+          # Verify command files exist
+          test -f test_project/.claude/commands/fruits.identify.md || (echo "Missing fruits.identify.md" && exit 1)
+          test -f test_project/.claude/commands/fruits.classify.md || (echo "Missing fruits.classify.md" && exit 1)
+
+          # Verify command content
+          grep -q "# fruits.identify" test_project/.claude/commands/fruits.identify.md
+          grep -q "raw_items" test_project/.claude/commands/fruits.identify.md
+          grep -q "identified_fruits.md" test_project/.claude/commands/fruits.identify.md
+
+          grep -q "# fruits.classify" test_project/.claude/commands/fruits.classify.md
+          grep -q "identified_fruits.md" test_project/.claude/commands/fruits.classify.md
+          grep -q "classified_fruits.md" test_project/.claude/commands/fruits.classify.md
+
+          echo "Command generation validated successfully!"
+
+  # Job 2: Full end-to-end test with Claude Code
+  # Tests the COMPLETE workflow: define job -> implement -> execute
+  claude-code-e2e:
+    runs-on: ubuntu-latest
+    needs: validate-generation
+    if: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
+    env:
+      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Check for API key
+        id: check-key
+        run: |
+          if [ -z "$ANTHROPIC_API_KEY" ]; then
+            echo "has_key=false" >> $GITHUB_OUTPUT
+            echo "::warning::ANTHROPIC_API_KEY not set, skipping Claude Code e2e test"
+          else
+            echo "has_key=true" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Install Node.js (for Claude Code CLI)
+        if: steps.check-key.outputs.has_key == 'true'
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+
+      - name: Install Claude Code CLI
+        if: steps.check-key.outputs.has_key == 'true'
+        run: npm install -g @anthropic-ai/claude-code
+
+      - name: Install uv
+        if: steps.check-key.outputs.has_key == 'true'
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
+      - name: Set up Python
+        if: steps.check-key.outputs.has_key == 'true'
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install deepwork
+        if: steps.check-key.outputs.has_key == 'true'
+        run: uv sync
+
+      - name: Set up fresh test project
+        if: steps.check-key.outputs.has_key == 'true'
+        run: |
+          # Create a fresh project with NO pre-existing job definitions
+          mkdir -p test_project/.claude
+
+          cd test_project
+          git init
+          git config user.email "test@test.com"
+          git config user.name "Test"
+          echo "# CI Test Project - DeepWork E2E Test" > README.md
+          git add . && git commit -m "init"
+          cd ..
+
+          # Install deepwork (this sets up .deepwork/ with standard jobs only)
+          uv run deepwork install --platform claude --path test_project
+
+          echo "Fresh test project setup complete"
+          echo "Available commands:"
+          ls -la test_project/.claude/commands/
+
+      # STEP 1: Use /deepwork_jobs.define to CREATE the fruits job
+      - name: Create job with /deepwork_jobs.define
+        if: steps.check-key.outputs.has_key == 'true'
+        working-directory: test_project
+        timeout-minutes: 10
+        run: |
+          echo "=== Running /deepwork_jobs.define to create fruits job ==="
+
+          # Provide detailed, deterministic instructions for creating the job
+          claude --yes --print "/deepwork_jobs.define" <<'PROMPT_EOF'
+          I want to create a simple job called "fruits" for identifying and classifying fruits.
+
+          Here are the EXACT specifications - please create the job.yml with these exact details:
+
+          Job name: fruits
+          Version: 1.0.0
+          Summary: Identify and classify fruits from a mixed list of items
+
+          Description: A simple workflow that takes a list of mixed items, identifies which are fruits, then classifies them by category. Designed for CI testing.
+
+          Steps:
+          1. Step ID: identify
+             Name: Identify Fruits
+             Description: Filter a list of items to identify only the fruits
+             Input: raw_items (user parameter) - A comma-separated list of items
+             Output: identified_fruits.md
+             Dependencies: none
+
+          2. Step ID: classify
+             Name: Classify Fruits
+             Description: Organize identified fruits into categories (citrus, tropical, berries, etc.)
+             Input: identified_fruits.md (file from step identify)
+             Output: classified_fruits.md
+             Dependencies: identify
+
+          Please create this job definition now. Do not ask questions - use these exact specifications.
+          PROMPT_EOF
+
+          # Verify the job.yml was created
+          echo "=== Checking job.yml was created ==="
+          if [ -f ".deepwork/jobs/fruits/job.yml" ]; then
+            echo "SUCCESS: job.yml created"
+            cat .deepwork/jobs/fruits/job.yml
+          else
+            echo "ERROR: job.yml was not created"
+            echo "Contents of .deepwork/jobs/:"
+            ls -la .deepwork/jobs/ || echo "No jobs directory"
+            exit 1
+          fi
+
+      # STEP 2: Use /deepwork_jobs.implement to generate step instructions
+      - name: Generate step instructions with /deepwork_jobs.implement
+        if: steps.check-key.outputs.has_key == 'true'
+        working-directory: test_project
+        timeout-minutes: 10
+        run: |
+          echo "=== Running /deepwork_jobs.implement to generate step instructions ==="
+
+          claude --yes --print "/deepwork_jobs.implement" <<'PROMPT_EOF'
+          Please implement the "fruits" job that was just defined.
+
+          For the identify step, create instructions that:
+          - Parse the comma-separated raw_items input
+          - Identify which items are fruits (apple, banana, orange, mango, grape, etc.)
+          - Output a markdown file listing the identified fruits
+
+          For the classify step, create instructions that:
+          - Read identified_fruits.md from the previous step
+          - Classify fruits into categories: Citrus (orange, lemon), Tropical (banana, mango), Pome (apple, pear), Berries, etc.
+          - Output a markdown file with fruits organized by category
+
+          Generate the step instruction files now.
+          PROMPT_EOF
+
+          # Verify step files were created
+          echo "=== Checking step files were created ==="
+          if [ -f ".deepwork/jobs/fruits/steps/identify.md" ] && [ -f ".deepwork/jobs/fruits/steps/classify.md" ]; then
+            echo "SUCCESS: Step instruction files created"
+            echo "--- identify.md ---"
+            cat .deepwork/jobs/fruits/steps/identify.md
+            echo ""
+            echo "--- classify.md ---"
+            cat .deepwork/jobs/fruits/steps/classify.md
+          else
+            echo "ERROR: Step files were not created"
+            ls -la .deepwork/jobs/fruits/steps/ || echo "No steps directory"
+            exit 1
+          fi
+
+          # Run sync to generate the slash commands
+          echo "=== Running deepwork sync to generate commands ==="
+          cd ..
+          uv run deepwork sync --path test_project
+
+          echo "=== Checking generated commands ==="
+          ls -la test_project/.claude/commands/
+
+          if [ -f "test_project/.claude/commands/fruits.identify.md" ] && [ -f "test_project/.claude/commands/fruits.classify.md" ]; then
+            echo "SUCCESS: Slash commands generated"
+          else
+            echo "ERROR: Slash commands were not generated"
+            exit 1
+          fi
+
+      # STEP 3: Execute the generated /fruits.identify command
+      - name: Run /fruits.identify
+        if: steps.check-key.outputs.has_key == 'true'
+        working-directory: test_project
+        timeout-minutes: 5
+        run: |
+          echo "=== Running /fruits.identify with test input ==="
+
+          claude --yes --print "/fruits.identify" <<'PROMPT_EOF'
+          raw_items: apple, car, banana, chair, orange, table, mango, laptop, grape, bicycle
+          PROMPT_EOF
+
+          # Verify output was created
+          if [ -f "identified_fruits.md" ]; then
+            echo "SUCCESS: identified_fruits.md created"
+            echo "--- Output ---"
+            cat identified_fruits.md
+          else
+            echo "ERROR: identified_fruits.md was not created"
+            exit 1
+          fi
+
+      # STEP 4: Execute the generated /fruits.classify command
+      - name: Run /fruits.classify
+        if: steps.check-key.outputs.has_key == 'true'
+        working-directory: test_project
+        timeout-minutes: 5
+        run: |
+          echo "=== Running /fruits.classify ==="
+
+          claude --yes --print "/fruits.classify"
+
+          # Verify output was created
+          if [ -f "classified_fruits.md" ]; then
+            echo "SUCCESS: classified_fruits.md created"
+            echo "--- Output ---"
+            cat classified_fruits.md
+          else
+            echo "ERROR: classified_fruits.md was not created"
+            exit 1
+          fi
+
+      # STEP 5: Validate the complete workflow output
+      - name: Validate complete workflow
+        if: steps.check-key.outputs.has_key == 'true'
+        working-directory: test_project
+        run: |
+          echo "=== Validating complete workflow ==="
+
+          # Check identified_fruits.md contains expected fruits
+          echo "Checking identified_fruits.md..."
+          grep -qi "apple" identified_fruits.md || (echo "Missing: apple" && exit 1)
+          grep -qi "banana" identified_fruits.md || (echo "Missing: banana" && exit 1)
+          grep -qi "orange" identified_fruits.md || (echo "Missing: orange" && exit 1)
+          grep -qi "mango" identified_fruits.md || (echo "Missing: mango" && exit 1)
+          grep -qi "grape" identified_fruits.md || (echo "Missing: grape" && exit 1)
+          echo "  ✓ All expected fruits found in identified_fruits.md"
+
+          # Check classified_fruits.md has expected structure
+          echo "Checking classified_fruits.md..."
+          grep -qi "citrus\|tropical\|pome\|berr" classified_fruits.md || (echo "Missing fruit categories" && exit 1)
+          echo "  ✓ Fruit categories found in classified_fruits.md"
+
+          # Verify job structure was created correctly
+          echo "Checking job structure..."
+          test -f .deepwork/jobs/fruits/job.yml || (echo "Missing job.yml" && exit 1)
+          test -f .deepwork/jobs/fruits/steps/identify.md || (echo "Missing identify.md" && exit 1)
+          test -f .deepwork/jobs/fruits/steps/classify.md || (echo "Missing classify.md" && exit 1)
+          echo "  ✓ Job structure is complete"
+
+          echo ""
+          echo "=========================================="
+          echo "  ALL E2E TESTS PASSED SUCCESSFULLY!"
+          echo "=========================================="
+          echo ""
+          echo "Workflow tested:"
+          echo "  1. /deepwork_jobs.define - Created job specification"
+          echo "  2. /deepwork_jobs.implement - Generated step instructions"
+          echo "  3. /fruits.identify - Executed identify step"
+          echo "  4. /fruits.classify - Executed classify step"
+          echo ""
+
+      - name: Upload test artifacts
+        if: steps.check-key.outputs.has_key == 'true' && always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: claude-code-e2e-outputs
+          path: |
+            test_project/.deepwork/jobs/fruits/
+            test_project/.claude/commands/fruits.*.md
+            test_project/identified_fruits.md
+            test_project/classified_fruits.md
+          retention-days: 7