diff --git a/.github/workflows/claude-code-test.yml b/.github/workflows/claude-code-test.yml new file mode 100644 index 00000000..c60eb1b7 --- /dev/null +++ b/.github/workflows/claude-code-test.yml @@ -0,0 +1,222 @@ +name: Claude Code Integration Test + +on: + # Manual trigger for testing + workflow_dispatch: + inputs: + debug: + description: 'Enable debug logging' + required: false + default: 'false' + type: boolean + # Run on PRs that modify core code + pull_request: + branches: ["*"] + paths: + - 'src/deepwork/**' + - 'tests/**' + - '.github/workflows/claude-code-test.yml' + # Scheduled run for continuous validation + schedule: + - cron: '0 6 * * 1' # Weekly on Monday at 6 AM UTC + +# Ensure only one instance runs at a time per PR/branch +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} + cancel-in-progress: true + +jobs: + # Job 1: Validate command generation (always runs) + validate-generation: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + version: "latest" + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: uv sync --extra dev + + - name: Run fruits workflow tests + run: uv run pytest tests/integration/test_fruits_workflow.py -v + + - name: Generate commands and validate structure + run: | + # Create a test environment + mkdir -p test_project/.deepwork/jobs + mkdir -p test_project/.claude # Required for platform detection + cp -r tests/fixtures/jobs/fruits test_project/.deepwork/jobs/ + + # Set up git repo in test project + cd test_project + git init + git config user.email "test@test.com" + git config user.name "Test" + echo "# Test" > README.md + git add . && git commit -m "init" + cd .. + + # Run deepwork install to set up the project (this also runs sync) + uv run deepwork install --platform claude --path test_project + + # Validate generated commands exist + echo "Checking generated commands..." + ls -la test_project/.claude/commands/ + + # Verify command files exist + test -f test_project/.claude/commands/fruits.identify.md || (echo "Missing fruits.identify.md" && exit 1) + test -f test_project/.claude/commands/fruits.classify.md || (echo "Missing fruits.classify.md" && exit 1) + + # Verify command content + grep -q "# fruits.identify" test_project/.claude/commands/fruits.identify.md + grep -q "raw_items" test_project/.claude/commands/fruits.identify.md + grep -q "identified_fruits.md" test_project/.claude/commands/fruits.identify.md + + grep -q "# fruits.classify" test_project/.claude/commands/fruits.classify.md + grep -q "identified_fruits.md" test_project/.claude/commands/fruits.classify.md + grep -q "classified_fruits.md" test_project/.claude/commands/fruits.classify.md + + echo "Command generation validated successfully!" + + # Job 2: End-to-end test with Claude Code (only when API key is available) + claude-code-e2e: + runs-on: ubuntu-latest + needs: validate-generation + if: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + steps: + - uses: actions/checkout@v4 + + - name: Check for API key + id: check-key + run: | + if [ -z "$ANTHROPIC_API_KEY" ]; then + echo "has_key=false" >> $GITHUB_OUTPUT + echo "::warning::ANTHROPIC_API_KEY not set, skipping Claude Code e2e test" + else + echo "has_key=true" >> $GITHUB_OUTPUT + fi + + - name: Install Node.js (for Claude Code CLI) + if: steps.check-key.outputs.has_key == 'true' + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install Claude Code CLI + if: steps.check-key.outputs.has_key == 'true' + run: npm install -g @anthropic-ai/claude-code + + - name: Install uv + if: steps.check-key.outputs.has_key == 'true' + uses: astral-sh/setup-uv@v4 + with: + version: "latest" + + - name: Set up Python + if: steps.check-key.outputs.has_key == 'true' + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install deepwork + if: steps.check-key.outputs.has_key == 'true' + run: uv sync + + - name: Set up test project + if: steps.check-key.outputs.has_key == 'true' + run: | + mkdir -p test_project/.deepwork/jobs + mkdir -p test_project/.claude # Required for platform detection + cp -r tests/fixtures/jobs/fruits test_project/.deepwork/jobs/ + + cd test_project + git init + git config user.email "test@test.com" + git config user.name "Test" + echo "# CI Test Project" > README.md + git add . && git commit -m "init" + cd .. + + # Run deepwork install to set up the project (this also runs sync) + uv run deepwork install --platform claude --path test_project + + echo "Test project setup complete" + ls -la test_project/.claude/commands/ + + - name: Run Claude Code - Identify Step + if: steps.check-key.outputs.has_key == 'true' + working-directory: test_project + timeout-minutes: 5 + run: | + # Run the identify step with a deterministic input + # Using --print to output result, --yes to auto-accept + claude --yes --print "/fruits.identify" < | | | - ---- - -## How to Sign - -When you submit your first pull request, the CLA Assistant bot will guide you through signing the CLA electronically by commenting on your PR. - ---- - -## Corporate Contributors - -Organizations that have signed the Corporate CLA are tracked separately. If you are contributing on behalf of your employer, please ensure your organization has signed the Corporate CLA by contacting legal@unsupervised.com. - ---- - -For questions about the CLA, see [CLA.md](CLA.md) or contact legal@unsupervised.com. +{ + "signedContributors": [ + { + "name": "nhorton", + "id": 204146, + "comment_id": 3752380523, + "created_at": "2026-01-15T00:57:16Z", + "repoId": 1132406094, + "pullRequestNo": 27 + }, + { + "name": "tylerwillis", + "id": 50716, + "comment_id": 3753520846, + "created_at": "2026-01-15T08:27:44Z", + "repoId": 1132406094, + "pullRequestNo": 31 + } + ] +} \ No newline at end of file diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py new file mode 100644 index 00000000..7adc5273 --- /dev/null +++ b/tests/e2e/__init__.py @@ -0,0 +1 @@ +"""End-to-end tests for DeepWork with Claude Code.""" diff --git a/tests/e2e/test_claude_code_integration.py b/tests/e2e/test_claude_code_integration.py new file mode 100644 index 00000000..0f75fac5 --- /dev/null +++ b/tests/e2e/test_claude_code_integration.py @@ -0,0 +1,325 @@ +"""End-to-end tests for DeepWork with Claude Code integration. + +These tests validate that DeepWork-generated commands work correctly +with Claude Code. The tests can run in two modes: + +1. **Generation-only mode** (default): Tests command generation and structure +2. **Full e2e mode**: Actually executes commands with Claude Code + +Set ANTHROPIC_API_KEY and DEEPWORK_E2E_FULL=true to run full e2e tests. +""" + +import os +import shutil +import subprocess +import tempfile +from pathlib import Path + +import pytest + +from deepwork.core.adapters import ClaudeAdapter +from deepwork.core.generator import CommandGenerator +from deepwork.core.parser import parse_job_definition + +# Test input for deterministic validation +TEST_INPUT = "apple, car, banana, chair, orange, table, mango, laptop, grape, bicycle" + +# Expected fruits from test input (for validation) +EXPECTED_FRUITS = {"apple", "banana", "orange", "mango", "grape"} + + +def has_claude_code() -> bool: + """Check if Claude Code CLI is available.""" + try: + result = subprocess.run( + ["claude", "--version"], + capture_output=True, + timeout=10, + ) + return result.returncode == 0 + except (FileNotFoundError, subprocess.TimeoutExpired): + return False + + +def has_api_key() -> bool: + """Check if Anthropic API key is set.""" + return bool(os.environ.get("ANTHROPIC_API_KEY")) + + +def run_full_e2e() -> bool: + """Check if full e2e tests should run.""" + return ( + os.environ.get("DEEPWORK_E2E_FULL", "").lower() == "true" + and has_api_key() + and has_claude_code() + ) + + +class TestCommandGenerationE2E: + """End-to-end tests for command generation.""" + + def test_generate_fruits_commands_in_temp_project(self) -> None: + """Test generating fruits commands in a realistic project structure.""" + with tempfile.TemporaryDirectory() as tmpdir: + project_dir = Path(tmpdir) + + # Set up project structure + deepwork_dir = project_dir / ".deepwork" / "jobs" + deepwork_dir.mkdir(parents=True) + + # Copy fruits job fixture + fixtures_dir = Path(__file__).parent.parent / "fixtures" / "jobs" / "fruits" + shutil.copytree(fixtures_dir, deepwork_dir / "fruits") + + # Initialize git repo (required for some operations) + subprocess.run(["git", "init"], cwd=project_dir, capture_output=True) + subprocess.run( + ["git", "config", "user.email", "test@test.com"], + cwd=project_dir, + capture_output=True, + ) + subprocess.run( + ["git", "config", "user.name", "Test"], + cwd=project_dir, + capture_output=True, + ) + + # Parse job and generate commands + job = parse_job_definition(deepwork_dir / "fruits") + generator = CommandGenerator() + adapter = ClaudeAdapter() + + commands_dir = project_dir / ".claude" + commands_dir.mkdir() + + command_paths = generator.generate_all_commands(job, adapter, commands_dir) + + # Validate commands were generated + assert len(command_paths) == 2 + + identify_cmd = commands_dir / "commands" / "fruits.identify.md" + classify_cmd = commands_dir / "commands" / "fruits.classify.md" + + assert identify_cmd.exists() + assert classify_cmd.exists() + + # Validate command content + identify_content = identify_cmd.read_text() + assert "# fruits.identify" in identify_content + assert "raw_items" in identify_content + assert "identified_fruits.md" in identify_content + + classify_content = classify_cmd.read_text() + assert "# fruits.classify" in classify_content + assert "identified_fruits.md" in classify_content + assert "classified_fruits.md" in classify_content + + def test_command_structure_matches_claude_code_expectations(self) -> None: + """Test that generated commands have the structure Claude Code expects.""" + fixtures_dir = Path(__file__).parent.parent / "fixtures" / "jobs" / "fruits" + job = parse_job_definition(fixtures_dir) + + with tempfile.TemporaryDirectory() as tmpdir: + commands_dir = Path(tmpdir) / ".claude" + commands_dir.mkdir() + + generator = CommandGenerator() + adapter = ClaudeAdapter() + generator.generate_all_commands(job, adapter, commands_dir) + + identify_cmd = commands_dir / "commands" / "fruits.identify.md" + content = identify_cmd.read_text() + + # Claude Code expects specific sections + assert "# fruits.identify" in content # Command name header + assert "## Instructions" in content # Instructions section + assert "## Inputs" in content # Inputs section + assert "## Output" in content # Output section + + # Check for user input prompt + assert "raw_items" in content + + def test_dependency_chain_in_commands(self) -> None: + """Test that dependency chain is correctly represented in commands.""" + fixtures_dir = Path(__file__).parent.parent / "fixtures" / "jobs" / "fruits" + job = parse_job_definition(fixtures_dir) + + with tempfile.TemporaryDirectory() as tmpdir: + commands_dir = Path(tmpdir) / ".claude" + commands_dir.mkdir() + + generator = CommandGenerator() + adapter = ClaudeAdapter() + generator.generate_all_commands(job, adapter, commands_dir) + + # First step should have no prerequisites + identify_cmd = commands_dir / "commands" / "fruits.identify.md" + identify_content = identify_cmd.read_text() + assert "## Prerequisites" not in identify_content + + # Second step should reference first step + classify_cmd = commands_dir / "commands" / "fruits.classify.md" + classify_content = classify_cmd.read_text() + assert "## Prerequisites" in classify_content + assert "identify" in classify_content.lower() + + +@pytest.mark.skipif( + not run_full_e2e(), + reason="Full e2e requires ANTHROPIC_API_KEY, DEEPWORK_E2E_FULL=true, and claude CLI", +) +class TestClaudeCodeExecution: + """End-to-end tests that actually execute with Claude Code. + + These tests only run when: + - ANTHROPIC_API_KEY is set + - DEEPWORK_E2E_FULL=true + - Claude Code CLI is installed + """ + + @pytest.fixture + def project_with_commands(self) -> Path: + """Create a test project with generated commands.""" + tmpdir = tempfile.mkdtemp() + project_dir = Path(tmpdir) + + # Set up project structure + deepwork_dir = project_dir / ".deepwork" / "jobs" + deepwork_dir.mkdir(parents=True) + + # Copy fruits job fixture + fixtures_dir = Path(__file__).parent.parent / "fixtures" / "jobs" / "fruits" + shutil.copytree(fixtures_dir, deepwork_dir / "fruits") + + # Initialize git repo + subprocess.run(["git", "init"], cwd=project_dir, capture_output=True) + subprocess.run( + ["git", "config", "user.email", "test@test.com"], + cwd=project_dir, + capture_output=True, + ) + subprocess.run( + ["git", "config", "user.name", "Test"], + cwd=project_dir, + capture_output=True, + ) + + # Create README + (project_dir / "README.md").write_text("# Test Project\n") + subprocess.run(["git", "add", "."], cwd=project_dir, capture_output=True) + subprocess.run( + ["git", "commit", "-m", "init"], + cwd=project_dir, + capture_output=True, + ) + + # Generate commands + job = parse_job_definition(deepwork_dir / "fruits") + generator = CommandGenerator() + adapter = ClaudeAdapter() + + commands_dir = project_dir / ".claude" + commands_dir.mkdir() + generator.generate_all_commands(job, adapter, commands_dir) + + yield project_dir + + # Cleanup + shutil.rmtree(tmpdir, ignore_errors=True) + + def test_identify_step_execution(self, project_with_commands: Path) -> None: + """Test executing the identify step with Claude Code.""" + # Run Claude Code with the identify command + result = subprocess.run( + [ + "claude", + "--yes", + "--print", + f"/fruits.identify raw_items: {TEST_INPUT}", + ], + cwd=project_with_commands, + capture_output=True, + text=True, + timeout=120, + ) + + assert result.returncode == 0, f"Claude Code failed: {result.stderr}" + + # Check output file was created + output_file = project_with_commands / "identified_fruits.md" + assert output_file.exists(), "identified_fruits.md was not created" + + # Validate content + content = output_file.read_text().lower() + for fruit in EXPECTED_FRUITS: + assert fruit in content, f"Expected fruit '{fruit}' not found in output" + + def test_classify_step_execution(self, project_with_commands: Path) -> None: + """Test executing the classify step with Claude Code.""" + # First, create the input file (simulate identify step output) + identify_output = project_with_commands / "identified_fruits.md" + identify_output.write_text( + "# Identified Fruits\n\n- apple\n- banana\n- orange\n- mango\n- grape\n" + ) + + # Run Claude Code with the classify command + result = subprocess.run( + ["claude", "--yes", "--print", "/fruits.classify"], + cwd=project_with_commands, + capture_output=True, + text=True, + timeout=120, + ) + + assert result.returncode == 0, f"Claude Code failed: {result.stderr}" + + # Check output file was created + output_file = project_with_commands / "classified_fruits.md" + assert output_file.exists(), "classified_fruits.md was not created" + + # Validate content has category structure + content = output_file.read_text().lower() + # Should have at least one category mentioned + categories = ["citrus", "tropical", "pome", "berries", "grape"] + has_category = any(cat in content for cat in categories) + assert has_category, f"No fruit categories found in output: {content[:500]}" + + def test_full_workflow_execution(self, project_with_commands: Path) -> None: + """Test executing the complete fruits workflow with Claude Code.""" + # Run identify step + result1 = subprocess.run( + [ + "claude", + "--yes", + "--print", + f"/fruits.identify raw_items: {TEST_INPUT}", + ], + cwd=project_with_commands, + capture_output=True, + text=True, + timeout=120, + ) + assert result1.returncode == 0, f"Identify step failed: {result1.stderr}" + + # Verify identify output exists + identify_output = project_with_commands / "identified_fruits.md" + assert identify_output.exists(), "Identify step did not create output" + + # Run classify step + result2 = subprocess.run( + ["claude", "--yes", "--print", "/fruits.classify"], + cwd=project_with_commands, + capture_output=True, + text=True, + timeout=120, + ) + assert result2.returncode == 0, f"Classify step failed: {result2.stderr}" + + # Verify classify output exists + classify_output = project_with_commands / "classified_fruits.md" + assert classify_output.exists(), "Classify step did not create output" + + # Validate final output quality + content = classify_output.read_text() + assert len(content) > 100, "Output seems too short" + assert "##" in content, "Output lacks markdown structure" diff --git a/tests/fixtures/jobs/fruits/job.yml b/tests/fixtures/jobs/fruits/job.yml new file mode 100644 index 00000000..e1ce79a6 --- /dev/null +++ b/tests/fixtures/jobs/fruits/job.yml @@ -0,0 +1,40 @@ +name: fruits +version: "1.0.0" +summary: "Identify and classify fruits from a mixed list of items" +description: | + A simple, deterministic job for CI testing of the DeepWork framework. + + This job takes a list of mixed items (fruits and non-fruits) and: + 1. Identifies which items are fruits + 2. Classifies those fruits into categories (citrus, berries, tropical, etc.) + + This workflow is designed to produce predictable, verifiable outputs + making it ideal for automated testing. + +changelog: + - version: "1.0.0" + changes: "Initial version for CI testing" + +steps: + - id: identify + name: "Identify Fruits" + description: "Filter a list of items to identify only the fruits" + instructions_file: steps/identify.md + inputs: + - name: raw_items + description: "Comma-separated list of items to filter (e.g., 'apple, car, banana, chair')" + outputs: + - identified_fruits.md + dependencies: [] + + - id: classify + name: "Classify Fruits" + description: "Organize identified fruits into categories" + instructions_file: steps/classify.md + inputs: + - file: identified_fruits.md + from_step: identify + outputs: + - classified_fruits.md + dependencies: + - identify diff --git a/tests/fixtures/jobs/fruits/steps/classify.md b/tests/fixtures/jobs/fruits/steps/classify.md new file mode 100644 index 00000000..dbd96c98 --- /dev/null +++ b/tests/fixtures/jobs/fruits/steps/classify.md @@ -0,0 +1,83 @@ +# Classify Fruits + +## Objective + +Organize the identified fruits into categories based on their type. + +## Task + +Read the `identified_fruits.md` file from the previous step and categorize each fruit. + +### Fruit Categories + +Use these standard categories: + +1. **Citrus** - orange, lemon, lime, grapefruit, tangerine, mandarin, clementine +2. **Berries** - strawberry, blueberry, raspberry, blackberry, cranberry, mulberry +3. **Tropical** - banana, mango, pineapple, papaya, coconut, kiwi, passion fruit +4. **Stone Fruits** - peach, plum, cherry, apricot, nectarine, lychee +5. **Pome Fruits** - apple, pear, quince +6. **Melons** - watermelon, cantaloupe, honeydew, melon +7. **Grapes** - grape, raisin + +If a fruit doesn't fit any category, list it under **Other**. + +## Output Format + +Create `classified_fruits.md` with the following format: + +```markdown +# Classified Fruits + +## [Category Name] +- [fruit1] +- [fruit2] + +## [Another Category] +- [fruit3] + +--- + +## Summary + +| Category | Count | +|----------|-------| +| [category1] | X | +| [category2] | Y | +| **Total** | **Z** | +``` + +## Example + +If `identified_fruits.md` contains: apple, banana, orange + +Output should be: +```markdown +# Classified Fruits + +## Citrus +- orange + +## Tropical +- banana + +## Pome Fruits +- apple + +--- + +## Summary + +| Category | Count | +|----------|-------| +| Citrus | 1 | +| Tropical | 1 | +| Pome Fruits | 1 | +| **Total** | **3** | +``` + +## Notes + +- Only include categories that have at least one fruit +- Sort fruits alphabetically within each category +- Ensure the summary table matches the categorized fruits diff --git a/tests/fixtures/jobs/fruits/steps/identify.md b/tests/fixtures/jobs/fruits/steps/identify.md new file mode 100644 index 00000000..20acddb9 --- /dev/null +++ b/tests/fixtures/jobs/fruits/steps/identify.md @@ -0,0 +1,64 @@ +# Identify Fruits + +## Objective + +Filter the provided list of items to identify only the fruits. + +## Task + +Given the input `{{raw_items}}`, create a markdown file listing only the items that are fruits. + +### Common Fruits Reference + +The following are considered fruits: +- **Citrus**: orange, lemon, lime, grapefruit, tangerine, mandarin +- **Berries**: strawberry, blueberry, raspberry, blackberry, cranberry +- **Tropical**: banana, mango, pineapple, papaya, coconut, kiwi +- **Stone fruits**: peach, plum, cherry, apricot, nectarine +- **Pome fruits**: apple, pear, quince +- **Melons**: watermelon, cantaloupe, honeydew +- **Grapes**: grape, raisin + +### Instructions + +1. Parse the comma-separated list of items +2. For each item, determine if it is a fruit +3. Create a list of only the fruits found + +## Output Format + +Create `identified_fruits.md` with the following format: + +```markdown +# Identified Fruits + +The following fruits were identified from the input list: + +- [fruit1] +- [fruit2] +- [fruit3] +... + +## Summary + +Found X fruits from Y total items. +``` + +## Example + +If input is: `apple, car, banana, chair, orange, table` + +Output should be: +```markdown +# Identified Fruits + +The following fruits were identified from the input list: + +- apple +- banana +- orange + +## Summary + +Found 3 fruits from 6 total items. +``` diff --git a/tests/integration/test_fruits_workflow.py b/tests/integration/test_fruits_workflow.py new file mode 100644 index 00000000..168c94d4 --- /dev/null +++ b/tests/integration/test_fruits_workflow.py @@ -0,0 +1,189 @@ +"""Integration tests for the fruits CI test workflow. + +This module tests the fruits job - a simple, deterministic workflow +designed for automated CI testing of the DeepWork framework. +""" + +from pathlib import Path + +from deepwork.core.adapters import ClaudeAdapter +from deepwork.core.generator import CommandGenerator +from deepwork.core.parser import parse_job_definition + + +class TestFruitsWorkflow: + """Integration tests for the fruits CI test workflow.""" + + def test_fruits_job_parses_correctly(self, fixtures_dir: Path) -> None: + """Test that the fruits job definition parses correctly.""" + job_dir = fixtures_dir / "jobs" / "fruits" + job = parse_job_definition(job_dir) + + assert job.name == "fruits" + assert job.version == "1.0.0" + assert len(job.steps) == 2 + + # Verify step IDs + step_ids = [step.id for step in job.steps] + assert step_ids == ["identify", "classify"] + + def test_fruits_identify_step_structure(self, fixtures_dir: Path) -> None: + """Test the identify step has correct structure.""" + job_dir = fixtures_dir / "jobs" / "fruits" + job = parse_job_definition(job_dir) + + identify_step = job.steps[0] + assert identify_step.id == "identify" + assert identify_step.name == "Identify Fruits" + + # Has user input + assert len(identify_step.inputs) == 1 + assert identify_step.inputs[0].is_user_input() + assert identify_step.inputs[0].name == "raw_items" + + # Has output + assert identify_step.outputs == ["identified_fruits.md"] + + # No dependencies (first step) + assert identify_step.dependencies == [] + + def test_fruits_classify_step_structure(self, fixtures_dir: Path) -> None: + """Test the classify step has correct structure.""" + job_dir = fixtures_dir / "jobs" / "fruits" + job = parse_job_definition(job_dir) + + classify_step = job.steps[1] + assert classify_step.id == "classify" + assert classify_step.name == "Classify Fruits" + + # Has file input from previous step + assert len(classify_step.inputs) == 1 + assert classify_step.inputs[0].is_file_input() + assert classify_step.inputs[0].file == "identified_fruits.md" + assert classify_step.inputs[0].from_step == "identify" + + # Has output + assert classify_step.outputs == ["classified_fruits.md"] + + # Depends on identify step + assert classify_step.dependencies == ["identify"] + + def test_fruits_command_generation(self, fixtures_dir: Path, temp_dir: Path) -> None: + """Test that fruits job generates valid Claude commands.""" + job_dir = fixtures_dir / "jobs" / "fruits" + job = parse_job_definition(job_dir) + + generator = CommandGenerator() + adapter = ClaudeAdapter() + commands_dir = temp_dir / ".claude" + commands_dir.mkdir() + + command_paths = generator.generate_all_commands(job, adapter, commands_dir) + + assert len(command_paths) == 2 + + # Verify command files exist + identify_cmd = commands_dir / "commands" / "fruits.identify.md" + classify_cmd = commands_dir / "commands" / "fruits.classify.md" + assert identify_cmd.exists() + assert classify_cmd.exists() + + def test_fruits_identify_command_content(self, fixtures_dir: Path, temp_dir: Path) -> None: + """Test the identify command has correct content.""" + job_dir = fixtures_dir / "jobs" / "fruits" + job = parse_job_definition(job_dir) + + generator = CommandGenerator() + adapter = ClaudeAdapter() + commands_dir = temp_dir / ".claude" + commands_dir.mkdir() + + generator.generate_all_commands(job, adapter, commands_dir) + + identify_cmd = commands_dir / "commands" / "fruits.identify.md" + content = identify_cmd.read_text() + + # Check header + assert "# fruits.identify" in content + + # Check step info + assert "Step 1 of 2" in content + + # Check user input is mentioned + assert "raw_items" in content + + # Check output is mentioned + assert "identified_fruits.md" in content + + # Check next step is suggested + assert "/fruits.classify" in content + + def test_fruits_classify_command_content(self, fixtures_dir: Path, temp_dir: Path) -> None: + """Test the classify command has correct content.""" + job_dir = fixtures_dir / "jobs" / "fruits" + job = parse_job_definition(job_dir) + + generator = CommandGenerator() + adapter = ClaudeAdapter() + commands_dir = temp_dir / ".claude" + commands_dir.mkdir() + + generator.generate_all_commands(job, adapter, commands_dir) + + classify_cmd = commands_dir / "commands" / "fruits.classify.md" + content = classify_cmd.read_text() + + # Check header + assert "# fruits.classify" in content + + # Check step info + assert "Step 2 of 2" in content + + # Check file input is mentioned + assert "identified_fruits.md" in content + assert "from step `identify`" in content + + # Check output is mentioned + assert "classified_fruits.md" in content + + # Check workflow complete (last step) + assert "Workflow Complete" in content + + def test_fruits_dependency_validation(self, fixtures_dir: Path) -> None: + """Test that dependency validation passes for fruits job.""" + job_dir = fixtures_dir / "jobs" / "fruits" + job = parse_job_definition(job_dir) + + # This should not raise - dependencies are valid + job.validate_dependencies() + + def test_fruits_job_is_deterministic_design(self, fixtures_dir: Path) -> None: + """Verify the fruits job is designed for deterministic testing. + + This test documents the design properties that make this job + suitable for CI testing. + """ + job_dir = fixtures_dir / "jobs" / "fruits" + job = parse_job_definition(job_dir) + + # Job has clear, simple structure + assert len(job.steps) == 2 + + # Steps form a linear dependency chain + assert job.steps[0].dependencies == [] + assert job.steps[1].dependencies == ["identify"] + + # First step takes user input + identify_step = job.steps[0] + assert len(identify_step.inputs) == 1 + assert identify_step.inputs[0].is_user_input() + + # Second step uses output from first step + classify_step = job.steps[1] + assert len(classify_step.inputs) == 1 + assert classify_step.inputs[0].is_file_input() + assert classify_step.inputs[0].from_step == "identify" + + # Outputs are well-defined markdown files + assert identify_step.outputs == ["identified_fruits.md"] + assert classify_step.outputs == ["classified_fruits.md"]