Skip to content

feat: MCP Server Architecture for Checkpoint-Based Workflow Execution #628

feat: MCP Server Architecture for Checkpoint-Based Workflow Execution

feat: MCP Server Architecture for Checkpoint-Based Workflow Execution #628

name: Claude Code Integration Test
on:
# Manual trigger for testing
workflow_dispatch:
inputs:
debug:
description: 'Enable debug logging'
required: false
default: 'false'
type: boolean
# Run on all PRs (shows as check, but steps skip unless in merge queue)
pull_request:
branches: [main]
# Run in the merge queue to validate before merging
merge_group:
branches: [main]
# Ensure only one instance runs at a time per PR/branch
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: true
# Minimal permissions for this workflow
permissions:
contents: read
jobs:
# Job 1: Validate skill generation from fixtures (no API key needed)
# Runs on all events, but actual work only happens in merge_group/workflow_dispatch
# This ensures the check name exists for PRs (needed for GitHub's merge queue)
validate-generation:
runs-on: ubuntu-latest
steps:
# For PRs: just pass quickly (actual tests run in merge queue)
- name: Skip on PR
if: github.event_name == 'pull_request'
run: echo "Validation will run in merge queue. Passing for PR."
- uses: actions/checkout@v4
if: github.event_name != 'pull_request'
- name: Install uv
if: github.event_name != 'pull_request'
uses: astral-sh/setup-uv@v4
with:
version: "latest"
- name: Set up Python
if: github.event_name != 'pull_request'
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
if: github.event_name != 'pull_request'
run: uv sync --extra dev
- name: Validate fruits fixture parses and install generates correct structure
if: github.event_name != 'pull_request'
run: |
# Verify the fruits fixture parses correctly via deepwork's parser
uv run python -c "
from pathlib import Path
from deepwork.core.parser import parse_job_definition
job = parse_job_definition(Path('tests/fixtures/jobs/fruits'))
assert job.name == 'fruits'
assert job.version == '1.0.0'
assert len(job.steps) == 2
assert [s.id for s in job.steps] == ['identify', 'classify']
# Identify step: user input -> file output
identify = job.steps[0]
assert identify.inputs[0].is_user_input()
assert identify.inputs[0].name == 'raw_items'
assert identify.outputs[0].name == 'identified_fruits.md'
assert identify.dependencies == []
# Classify step: file input from identify -> file output
classify = job.steps[1]
assert classify.inputs[0].is_file_input()
assert classify.inputs[0].file == 'identified_fruits.md'
assert classify.inputs[0].from_step == 'identify'
assert classify.outputs[0].name == 'classified_fruits.md'
assert classify.dependencies == ['identify']
# Workflow definition
assert len(job.workflows) == 1
assert job.workflows[0].name == 'full'
assert job.workflows[0].steps == ['identify', 'classify']
# Validations pass
job.validate_dependencies()
job.validate_file_inputs()
job.validate_workflows()
print('All fruits fixture validations passed!')
"
- name: Generate skills and validate structure
if: github.event_name != 'pull_request'
run: |
# Create a test environment
mkdir -p test_project/.deepwork/jobs
mkdir -p test_project/.claude # Required for platform detection
cp -r tests/fixtures/jobs/fruits test_project/.deepwork/jobs/
# Set up git repo in test project
cd test_project
git init
git config user.email "test@test.com"
git config user.name "Test"
echo "# Test" > README.md
git add . && git commit -m "init"
cd ..
# Run deepwork install to set up the project (this also runs sync)
uv run deepwork install --platform claude --path test_project
# Validate generated skills exist
echo "Checking generated skills..."
ls -la test_project/.claude/skills/
# MCP variant: only the /deepwork entry point skill is generated
# (per-step skills are no longer created; MCP server handles orchestration)
test -f test_project/.claude/skills/deepwork/SKILL.md || (echo "Missing deepwork MCP entry point skill" && exit 1)
# Verify the deepwork skill references MCP tools
grep -qi "deepwork" test_project/.claude/skills/deepwork/SKILL.md
echo "Skill generation validated successfully!"
# Job 2: Full end-to-end test with Claude Code
# Tests the COMPLETE workflow:
# Runs on all events, but actual work only happens in merge_group/workflow_dispatch
# This ensures the check name exists for PRs (needed for GitHub's merge queue)
claude-code-e2e:
runs-on: ubuntu-latest
needs: validate-generation
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
steps:
# For PRs: just pass quickly (actual tests run in merge queue)
- name: Skip on PR
if: github.event_name == 'pull_request'
run: echo "E2E tests will run in merge queue. Passing for PR."
- uses: actions/checkout@v4
if: github.event_name != 'pull_request'
- name: Check for API key
if: github.event_name != 'pull_request'
id: check-key
run: |
if [ -z "$ANTHROPIC_API_KEY" ]; then
echo "has_key=false" >> $GITHUB_OUTPUT
echo "::warning::ANTHROPIC_API_KEY not set, skipping Claude Code e2e test"
else
echo "has_key=true" >> $GITHUB_OUTPUT
fi
- name: Install Node.js (for Claude Code CLI)
if: steps.check-key.outputs.has_key == 'true'
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install Claude Code CLI
if: steps.check-key.outputs.has_key == 'true'
run: npm install -g @anthropic-ai/claude-code
- name: Install uv
if: steps.check-key.outputs.has_key == 'true'
uses: astral-sh/setup-uv@v4
with:
version: "latest"
- name: Set up Python
if: steps.check-key.outputs.has_key == 'true'
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install deepwork
if: steps.check-key.outputs.has_key == 'true'
run: |
uv sync
# Add the uv virtualenv bin directory to PATH for all subsequent steps.
#
# Why: `deepwork install` registers an MCP server in .mcp.json with
# the command "deepwork serve --path .". When Claude Code starts, it
# reads .mcp.json and spawns the MCP server as a subprocess using the
# bare "deepwork" command. Without this PATH addition, that subprocess
# fails because "deepwork" only exists inside the uv virtualenv
# (accessible via "uv run deepwork" but not as a bare command).
#
# Without the MCP server running, Claude cannot use the DeepWork MCP
# tools (get_workflows, start_workflow, finished_step) and falls back
# to ad-hoc file creation, which produces the wrong output format.
echo "$(pwd)/.venv/bin" >> $GITHUB_PATH
- name: Set up fresh test project
if: steps.check-key.outputs.has_key == 'true'
run: |
# Create a fresh project with NO pre-existing job definitions
mkdir -p test_project/.claude
cd test_project
git init
git config user.email "test@test.com"
git config user.name "Test"
echo "# CI Test Project - DeepWork E2E Test" > README.md
git add . && git commit -m "init"
cd ..
# Install deepwork (this sets up .deepwork/ with standard jobs only)
uv run deepwork install --platform claude --path test_project
# Create permissive settings.json to allow file operations in CI
cat > test_project/.claude/settings.json << 'SETTINGS_EOF'
{
"permissions": {
"allow": [
"Bash(*)",
"Read(./**)",
"Edit(./**)",
"Write(./**)",
"Skill(*)"
]
}
}
SETTINGS_EOF
echo "Fresh test project setup complete"
echo "Available skills:"
ls -la test_project/.claude/skills/
# STEP 1: Use /deepwork to CREATE the fruits job via MCP workflow
- name: Create job with /deepwork
if: steps.check-key.outputs.has_key == 'true'
working-directory: test_project
timeout-minutes: 6
run: |
echo "=== Running /deepwork to create fruits job ==="
mkdir fruits
# Provide detailed, deterministic instructions for creating the job
claude --print --model claude-sonnet-4-5 <<'PROMPT_EOF'
/deepwork I want to create a simple job called "fruits" for identifying and classifying fruits.
Here are the EXACT specifications.
Intent: A simple workflow that takes a list of mixed items, identifies which are fruits, then classifies them by category. Designed for CI testing.
Steps:
1. Step: identify
Name: Identify Fruits
Description: Filter a list of items to include only the fruits
**CRITICAL**: The output MUST be stored in `fruits/identified_fruits.md`.
2. Step: classify
Name: Classify Fruits
Description: Organize identified fruits into categories (citrus, tropical, berries, etc.).
**CRITICAL**: must put the classified fruit list in `./fruits/classified_fruits.md`.
**Key Instructions:**
- Do not ask questions - just make the job
- Rules are explicitly not desired. Tell the review agents that.
- Do not give long commentary of what you did - just make the job with no commentary.
PROMPT_EOF
# Verify the job.yml was created
echo "=== Checking job.yml was created ==="
if [ -f ".deepwork/jobs/fruits/job.yml" ]; then
echo "SUCCESS: job.yml created"
cat .deepwork/jobs/fruits/job.yml
else
echo "ERROR: job.yml was not created"
echo "Contents of .deepwork/jobs/:"
ls -la .deepwork/jobs/ || echo "No jobs directory"
exit 1
fi
# Verify step files were created
echo "=== Checking step files were created ==="
if [ -f ".deepwork/jobs/fruits/steps/identify.md" ] && [ -f ".deepwork/jobs/fruits/steps/classify.md" ]; then
echo "SUCCESS: Step instruction files created"
echo "--- identify.md ---"
cat .deepwork/jobs/fruits/steps/identify.md
echo ""
echo "--- classify.md ---"
cat .deepwork/jobs/fruits/steps/classify.md
else
echo "ERROR: Step files were not created"
ls -la .deepwork/jobs/fruits/steps/ || echo "No steps directory"
exit 1
fi
# Run sync to regenerate skills after new job was created
echo "=== Running deepwork sync to regenerate skills ==="
cd ..
uv run deepwork sync --path test_project
echo "=== Checking generated skills ==="
ls -la test_project/.claude/skills/
# MCP variant: only the /deepwork entry point skill is generated
if [ -f "test_project/.claude/skills/deepwork/SKILL.md" ]; then
echo "SUCCESS: /deepwork MCP entry point skill generated"
else
echo "ERROR: /deepwork skill was not generated"
exit 1
fi
# STEP 3: Execute the fruits workflow via /deepwork MCP entry point
- name: Run Workflow
if: steps.check-key.outputs.has_key == 'true'
working-directory: test_project
timeout-minutes: 3
run: |
echo "=== Running fruits workflow with test input via /deepwork ==="
claude --print --model claude-sonnet-4-5 <<'PROMPT_EOF'
/deepwork Run the fruits full workflow. Process the list to the file and don't give any extra commentary or text output.
raw_items: apple, car, banana, chair, orange, table, mango, laptop, grape, bicycle
PROMPT_EOF
echo "=== Workflow finished - looking for output file ==="
# Verify both outputs were created
if [ -f "fruits/identified_fruits.md" ]; then
echo "SUCCESS: identified_fruits.md created"
echo "--- Output ---"
cat fruits/identified_fruits.md
else
echo "ERROR: identified_fruits.md was not created"
exit 1
fi
if [ -f "fruits/classified_fruits.md" ]; then
echo "SUCCESS: classified_fruits.md created"
echo "--- Output ---"
cat fruits/classified_fruits.md
else
echo "ERROR: classified_fruits.md was not created"
exit 1
fi
# STEP 4: Validate the complete workflow output
- name: Validate Workflow Output
if: steps.check-key.outputs.has_key == 'true'
working-directory: test_project/fruits
run: |
echo "=== Validating complete workflow ==="
# Check identified_fruits.md contains expected fruits
echo "Checking identified_fruits.md..."
grep -qi "apple" identified_fruits.md || (echo "Missing: apple" && exit 1)
grep -qi "banana" identified_fruits.md || (echo "Missing: banana" && exit 1)
grep -qi "orange" identified_fruits.md || (echo "Missing: orange" && exit 1)
grep -qi "mango" identified_fruits.md || (echo "Missing: mango" && exit 1)
grep -qi "grape" identified_fruits.md || (echo "Missing: grape" && exit 1)
echo " ✓ All expected fruits found in identified_fruits.md"
# Check classified_fruits.md has expected structure
echo "Checking classified_fruits.md..."
grep -qi "citrus\|tropical\|pome\|berr" classified_fruits.md || (echo "Missing fruit categories" && exit 1)
echo " ✓ Fruit categories found in classified_fruits.md"
echo ""
echo "=========================================="
echo " ALL E2E TESTS PASSED SUCCESSFULLY!"
echo "=========================================="
echo ""
echo "Workflow tested: /deepwork fruits full - Executed full fruits workflow (identify + classify)"
echo ""
- name: Upload test artifacts
if: steps.check-key.outputs.has_key == 'true' && always()
uses: actions/upload-artifact@v4
with:
name: claude-code-e2e-outputs
path: |
test_project/.deepwork/jobs/fruits/
test_project/.claude/skills/deepwork/
test_project/fruits/identified_fruits.md
test_project/fruits/classified_fruits.md
retention-days: 7