Add template agent support to create-agent skill #708

Workflow file for this run

.github/workflows/claude-code-test.yml at e56e959

	name: Claude Code Integration Test

	on:
	# Manual trigger for testing
	workflow_dispatch:
	inputs:
	debug:
	description: 'Enable debug logging'
	required: false
	default: 'false'
	type: boolean
	# Run on all PRs (shows as check, but steps skip unless in merge queue)
	pull_request:
	branches: [main]
	# Run in the merge queue to validate before merging
	merge_group:
	branches: [main]

	# Ensure only one instance runs at a time per PR/branch
	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref \|\| github.ref }}
	cancel-in-progress: true

	# Minimal permissions for this workflow
	permissions:
	contents: read

	jobs:
	# Job 1: Validate skill generation from fixtures (no API key needed)
	# Runs on all events, but actual work only happens in merge_group/workflow_dispatch
	# This ensures the check name exists for PRs (needed for GitHub's merge queue)
	validate-generation:
	runs-on: ubuntu-latest
	steps:
	# For PRs: just pass quickly (actual tests run in merge queue)
	- name: Skip on PR
	if: github.event_name == 'pull_request'
	run: echo "Validation will run in merge queue. Passing for PR."

	- uses: actions/checkout@v4
	if: github.event_name != 'pull_request'

	- name: Install uv
	if: github.event_name != 'pull_request'
	uses: astral-sh/setup-uv@v4
	with:
	version: "latest"

	- name: Set up Python
	if: github.event_name != 'pull_request'
	uses: actions/setup-python@v5
	with:
	python-version: "3.11"

	- name: Install dependencies
	if: github.event_name != 'pull_request'
	run: uv sync --extra dev

	- name: Validate fruits fixture parses and install generates correct structure
	if: github.event_name != 'pull_request'
	run: \|
	# Verify the fruits fixture parses correctly via deepwork's parser
	uv run python -c "
	from pathlib import Path
	from deepwork.jobs.parser import parse_job_definition

	job = parse_job_definition(Path('tests/fixtures/jobs/fruits'))

	assert job.name == 'fruits'
	assert job.version == '1.0.0'
	assert len(job.steps) == 2
	assert [s.id for s in job.steps] == ['identify', 'classify']

	# Identify step: user input -> file output
	identify = job.steps[0]
	assert identify.inputs[0].is_user_input()
	assert identify.inputs[0].name == 'raw_items'
	assert identify.outputs[0].name == 'identified_fruits.md'
	assert identify.dependencies == []

	# Classify step: file input from identify -> file output
	classify = job.steps[1]
	assert classify.inputs[0].is_file_input()
	assert classify.inputs[0].file == 'identified_fruits.md'
	assert classify.inputs[0].from_step == 'identify'
	assert classify.outputs[0].name == 'classified_fruits.md'
	assert classify.dependencies == ['identify']

	# Workflow definition
	assert len(job.workflows) == 1
	assert job.workflows[0].name == 'full'
	assert job.workflows[0].steps == ['identify', 'classify']

	# Validations pass
	job.validate_dependencies()
	job.validate_file_inputs()
	job.validate_workflows()

	print('All fruits fixture validations passed!')
	"

	- name: Validate plugin structure and skill content
	if: github.event_name != 'pull_request'
	run: \|
	# Verify the plugin provides the required files
	echo "Checking plugin structure..."

	test -f plugins/claude/.claude-plugin/plugin.json \|\| (echo "Missing plugin.json" && exit 1)
	test -f plugins/claude/skills/deepwork/SKILL.md \|\| (echo "Missing SKILL.md" && exit 1)
	test -f plugins/claude/.mcp.json \|\| (echo "Missing .mcp.json" && exit 1)
	test -f plugins/claude/hooks/hooks.json \|\| (echo "Missing hooks.json" && exit 1)

	# Verify the skill references deepwork
	grep -qi "deepwork" plugins/claude/skills/deepwork/SKILL.md

	# Verify the MCP config runs deepwork serve
	grep -q "deepwork" plugins/claude/.mcp.json
	grep -q "serve" plugins/claude/.mcp.json

	echo "Plugin structure validated successfully!"

	# Job 2: Full end-to-end test with Claude Code
	# Tests the COMPLETE workflow:
	# Runs on all events, but actual work only happens in merge_group/workflow_dispatch
	# This ensures the check name exists for PRs (needed for GitHub's merge queue)
	claude-code-e2e:
	runs-on: ubuntu-latest
	needs: validate-generation
	env:
	ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
	steps:
	# For PRs: just pass quickly (actual tests run in merge queue)
	- name: Skip on PR
	if: github.event_name == 'pull_request'
	run: echo "E2E tests will run in merge queue. Passing for PR."

	- uses: actions/checkout@v4
	if: github.event_name != 'pull_request'

	- name: Check for API key
	if: github.event_name != 'pull_request'
	id: check-key
	run: \|
	if [ -z "$ANTHROPIC_API_KEY" ]; then
	echo "has_key=false" >> $GITHUB_OUTPUT
	echo "::warning::ANTHROPIC_API_KEY not set, skipping Claude Code e2e test"
	else
	echo "has_key=true" >> $GITHUB_OUTPUT
	fi

	- name: Install Node.js (for Claude Code CLI)
	if: steps.check-key.outputs.has_key == 'true'
	uses: actions/setup-node@v4
	with:
	node-version: '20'

	- name: Install Claude Code CLI
	if: steps.check-key.outputs.has_key == 'true'
	run: npm install -g @anthropic-ai/claude-code

	- name: Install uv
	if: steps.check-key.outputs.has_key == 'true'
	uses: astral-sh/setup-uv@v4
	with:
	version: "latest"

	- name: Set up Python
	if: steps.check-key.outputs.has_key == 'true'
	uses: actions/setup-python@v5
	with:
	python-version: "3.11"

	- name: Install deepwork
	if: steps.check-key.outputs.has_key == 'true'
	run: \|
	uv sync

	# Add the uv virtualenv bin directory to PATH for all subsequent steps.
	#
	# Why: The MCP config uses "deepwork serve --path ." to spawn the
	# MCP server. Without this PATH addition, the bare "deepwork" command
	# is not found (it only exists inside the uv virtualenv).
	echo "$(pwd)/.venv/bin" >> $GITHUB_PATH

	- name: Set up fresh test project
	if: steps.check-key.outputs.has_key == 'true'
	run: \|
	# Create a fresh project with NO pre-existing job definitions
	mkdir -p test_project/.claude/skills/deepwork

	cd test_project
	git init
	git config user.email "test@test.com"
	git config user.name "Test"
	echo "# CI Test Project - DeepWork E2E Test" > README.md
	git add . && git commit -m "init"
	cd ..

	# Copy plugin skill into the test project (replaces old `deepwork install`)
	cp plugins/claude/skills/deepwork/SKILL.md test_project/.claude/skills/deepwork/

	# Write MCP config using bare `deepwork` command (CI has it on PATH
	# via .venv/bin; the plugin's .mcp.json uses `uvx` which isn't available here)
	python3 -c "
	import json
	mcp = {'mcpServers': {'deepwork': {
	'command': 'deepwork',
	'args': ['serve', '--path', '.', '--external-runner', 'claude']
	}}}
	with open('test_project/.mcp.json', 'w') as f:
	json.dump(mcp, f, indent=2)
	"

	# Write settings.json with all required permissions
	python3 -c "
	import json
	settings = {
	'permissions': {
	'allow': [
	'Bash()', 'Read(./)', 'Edit(./)', 'Write(./)', 'Skill()',
	'mcp__deepwork__get_workflows', 'mcp__deepwork__start_workflow',
	'mcp__deepwork__finished_step', 'mcp__deepwork__abort_workflow'
	]
	}
	}
	with open('test_project/.claude/settings.json', 'w') as f:
	json.dump(settings, f, indent=2)
	"

	echo "Fresh test project setup complete"
	echo "Available skills:"
	ls -la test_project/.claude/skills/

	# STEP 1: Use /deepwork to CREATE the fruits job via MCP workflow
	#
	# This invokes Claude with the /deepwork skill, which uses MCP tools to
	# walk through the deepwork_jobs/new_job workflow (define → implement →
	# test → iterate). The workflow includes quality gates that spawn Claude
	# subprocesses, so it needs a generous timeout.
	- name: Create job with /deepwork
	if: steps.check-key.outputs.has_key == 'true'
	working-directory: test_project
	timeout-minutes: 10
	run: \|
	echo "=== Running /deepwork to create fruits job ==="
	mkdir fruits

	# Use --debug to capture detailed logs for diagnosing failures.
	# The debug log is dumped in the failure handler below.
	claude --print --debug --model claude-sonnet-4-5 <<'PROMPT_EOF'
	/deepwork I want to create a simple job called "fruits" for identifying and classifying fruits.

	Here are the EXACT specifications.

	Intent: A simple workflow that takes a list of mixed items, identifies which are fruits, then classifies them by category. Designed for CI testing.

	Steps:
	1. Step: identify
	Name: Identify Fruits
	Description: Filter a list of items to include only the fruits
	CRITICAL: The output MUST be stored in `fruits/identified_fruits.md`.

	2. Step: classify
	Name: Classify Fruits
	Description: Organize identified fruits into categories (citrus, tropical, berries, etc.).
	CRITICAL: must put the classified fruit list in `./fruits/classified_fruits.md`.

	Key Instructions:
	- Do not ask questions - just make the job
	- Rules are explicitly not desired. Tell the review agents that.
	- Do not give long commentary of what you did - just make the job with no commentary.
	- IMPORTANT: Once the job.yml and step instruction files have been created (i.e. after the "define" and "implement" steps are done), STOP. Do NOT continue into the "test" or "iterate" steps. Abort the workflow at that point. We only need the job definition files created, not the full workflow run.
	PROMPT_EOF

	# Verify the job.yml was created
	echo "=== Checking job.yml was created ==="
	if [ -f ".deepwork/jobs/fruits/job.yml" ]; then
	echo "SUCCESS: job.yml created"
	cat .deepwork/jobs/fruits/job.yml
	else
	echo "ERROR: job.yml was not created"
	echo "Contents of .deepwork/jobs/:"
	ls -la .deepwork/jobs/ \|\| echo "No jobs directory"
	exit 1
	fi

	# Verify step files were created
	echo "=== Checking step files were created ==="
	if [ -f ".deepwork/jobs/fruits/steps/identify.md" ] && [ -f ".deepwork/jobs/fruits/steps/classify.md" ]; then
	echo "SUCCESS: Step instruction files created"
	echo "--- identify.md ---"
	cat .deepwork/jobs/fruits/steps/identify.md
	echo ""
	echo "--- classify.md ---"
	cat .deepwork/jobs/fruits/steps/classify.md
	else
	echo "ERROR: Step files were not created"
	ls -la .deepwork/jobs/fruits/steps/ \|\| echo "No steps directory"
	exit 1
	fi

	echo "=== Job creation complete ==="

	# Dump Claude debug log if the job creation step failed or timed out.
	# This captures MCP server communication, tool calls, and error details.
	- name: Dump Claude debug log on failure
	if: failure() && steps.check-key.outputs.has_key == 'true'
	working-directory: test_project
	run: \|
	echo "=== Claude debug log ==="
	# Claude --debug writes to ~/.claude/debug.log
	if [ -f "$HOME/.claude/debug.log" ]; then
	echo "--- Last 200 lines of debug.log ---"
	tail -200 "$HOME/.claude/debug.log"
	else
	echo "No debug.log found at ~/.claude/debug.log"
	echo "Searching for debug logs..."
	find "$HOME/.claude" -name "*.log" -type f 2>/dev/null \|\| echo "No log files found"
	fi
	echo ""
	echo "=== MCP server config ==="
	cat .mcp.json 2>/dev/null \|\| echo "No .mcp.json found"
	echo ""
	echo "=== Settings.json ==="
	cat .claude/settings.json 2>/dev/null \|\| echo "No settings.json found"
	echo ""
	echo "=== DeepWork session state ==="
	ls -la .deepwork/tmp/ 2>/dev/null \|\| echo "No tmp directory"
	for f in .deepwork/tmp/session_*.json; do
	[ -f "$f" ] && echo "--- $f ---" && cat "$f"
	done

	# STEP 3: Execute the fruits workflow via /deepwork MCP entry point
	- name: Run Workflow
	if: steps.check-key.outputs.has_key == 'true'
	working-directory: test_project
	timeout-minutes: 6
	run: \|
	echo "=== Running fruits workflow with test input via /deepwork ==="

	claude --print --model claude-sonnet-4-5 <<'PROMPT_EOF'
	/deepwork Run the fruits full workflow. Process the list to the file and don't give any extra commentary or text output.
	raw_items: apple, car, banana, chair, orange, table, mango, laptop, grape, bicycle
	PROMPT_EOF

	echo "=== Workflow finished - looking for output file ==="

	# Verify both outputs were created
	if [ -f "fruits/identified_fruits.md" ]; then
	echo "SUCCESS: identified_fruits.md created"
	echo "--- Output ---"
	cat fruits/identified_fruits.md
	else
	echo "ERROR: identified_fruits.md was not created"
	exit 1
	fi

	if [ -f "fruits/classified_fruits.md" ]; then
	echo "SUCCESS: classified_fruits.md created"
	echo "--- Output ---"
	cat fruits/classified_fruits.md
	else
	echo "ERROR: classified_fruits.md was not created"
	exit 1
	fi

	# STEP 4: Validate the complete workflow output
	- name: Validate Workflow Output
	if: steps.check-key.outputs.has_key == 'true'
	working-directory: test_project/fruits
	run: \|
	echo "=== Validating complete workflow ==="

	# Check identified_fruits.md contains expected fruits
	echo "Checking identified_fruits.md..."
	grep -qi "apple" identified_fruits.md \|\| (echo "Missing: apple" && exit 1)
	grep -qi "banana" identified_fruits.md \|\| (echo "Missing: banana" && exit 1)
	grep -qi "orange" identified_fruits.md \|\| (echo "Missing: orange" && exit 1)
	grep -qi "mango" identified_fruits.md \|\| (echo "Missing: mango" && exit 1)
	grep -qi "grape" identified_fruits.md \|\| (echo "Missing: grape" && exit 1)
	echo " ✓ All expected fruits found in identified_fruits.md"

	# Check classified_fruits.md has expected structure
	echo "Checking classified_fruits.md..."
	grep -qi "citrus\\|tropical\\|pome\\|berr" classified_fruits.md \|\| (echo "Missing fruit categories" && exit 1)
	echo " ✓ Fruit categories found in classified_fruits.md"

	echo ""
	echo "=========================================="
	echo " ALL E2E TESTS PASSED SUCCESSFULLY!"
	echo "=========================================="
	echo ""
	echo "Workflow tested: /deepwork fruits full - Executed full fruits workflow (identify + classify)"
	echo ""

	- name: Upload test artifacts
	if: steps.check-key.outputs.has_key == 'true' && always()
	uses: actions/upload-artifact@v4
	with:
	name: claude-code-e2e-outputs
	path: \|
	test_project/.deepwork/jobs/fruits/
	test_project/.claude/skills/deepwork/
	test_project/fruits/identified_fruits.md
	test_project/fruits/classified_fruits.md
	retention-days: 7

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add template agent support to create-agent skill #708

Workflow file

Add template agent support to create-agent skill #708

Uh oh!

Workflow file for this run