From 1e90375ed9b766058b4c3aff198303acf6b6337c Mon Sep 17 00:00:00 2001 From: David Ahmann Date: Wed, 18 Feb 2026 14:26:47 -0500 Subject: [PATCH] test: add tier-11 scenarios and CI validation --- .github/workflows/ci.yml | 4 + .github/workflows/pr-fast.yml | 3 + CODEOWNERS | 5 + Makefile | 6 +- go.mod | 3 +- go.sum | 5 +- internal/scenarios/fixtures.go | 35 ++ internal/scenarios/scenario_test.go | 447 ++++++++++++++++++ internal/scenarios/validate_test.go | 135 ++++++ scenarios/CHANGELOG.md | 7 + scenarios/README.md | 27 ++ scenarios/gait/README.md | 10 + .../gait/approval-expiry-1s-past/README.md | 3 + .../approval-token.json | 21 + .../approval_public.key | 1 + .../approval-expiry-1s-past/expected.yaml | 4 + .../gait/approval-expiry-1s-past/intent.json | 1 + .../gait/approval-expiry-1s-past/policy.yaml | 1 + scenarios/gait/approval-token-valid/README.md | 3 + .../approval-token-valid/approval-token.json | 21 + .../approval-token-valid/approval_public.key | 1 + .../gait/approval-token-valid/expected.yaml | 4 + .../gait/approval-token-valid/intent.json | 1 + .../gait/approval-token-valid/policy.yaml | 1 + .../gait/concurrent-evaluation-10/README.md | 3 + .../concurrent-evaluation-10/expected.yaml | 3 + .../gait/concurrent-evaluation-10/flags.yaml | 1 + .../gait/concurrent-evaluation-10/intent.json | 1 + .../gait/concurrent-evaluation-10/policy.yaml | 1 + .../gait/delegation-chain-depth-3/README.md | 3 + .../delegation-token-1.json | 20 + .../delegation-token-2.json | 20 + .../delegation-token-3.json | 20 + .../delegation_public.key | 1 + .../delegation-chain-depth-3/expected.yaml | 5 + .../gait/delegation-chain-depth-3/flags.yaml | 3 + .../gait/delegation-chain-depth-3/intent.json | 1 + .../gait/delegation-chain-depth-3/policy.yaml | 11 + .../gait/dry-run-no-side-effects/README.md | 3 + .../dry-run-no-side-effects/expected.yaml | 5 + .../gait/dry-run-no-side-effects/flags.yaml | 1 + .../gait/dry-run-no-side-effects/intent.json | 1 + .../gait/dry-run-no-side-effects/policy.yaml | 1 + .../gait/pack-integrity-round-trip/README.md | 3 + .../pack-integrity-round-trip/expected.yaml | 3 + .../gait/policy-allow-safe-tools/README.md | 3 + .../expected-verdicts.jsonl | 3 + .../policy-allow-safe-tools/intents.jsonl | 3 + .../gait/policy-allow-safe-tools/policy.yaml | 6 + .../gait/policy-block-destructive/README.md | 3 + .../expected-verdicts.jsonl | 5 + .../policy-block-destructive/intents.jsonl | 5 + .../gait/policy-block-destructive/policy.yaml | 10 + scripts/run_scenarios.sh | 53 +++ scripts/validate_scenarios.sh | 36 ++ 55 files changed, 982 insertions(+), 4 deletions(-) create mode 100644 internal/scenarios/fixtures.go create mode 100644 internal/scenarios/scenario_test.go create mode 100644 internal/scenarios/validate_test.go create mode 100644 scenarios/CHANGELOG.md create mode 100644 scenarios/README.md create mode 100644 scenarios/gait/README.md create mode 100644 scenarios/gait/approval-expiry-1s-past/README.md create mode 100644 scenarios/gait/approval-expiry-1s-past/approval-token.json create mode 100644 scenarios/gait/approval-expiry-1s-past/approval_public.key create mode 100644 scenarios/gait/approval-expiry-1s-past/expected.yaml create mode 100644 scenarios/gait/approval-expiry-1s-past/intent.json create mode 100644 scenarios/gait/approval-expiry-1s-past/policy.yaml create mode 100644 scenarios/gait/approval-token-valid/README.md create mode 100644 scenarios/gait/approval-token-valid/approval-token.json create mode 100644 scenarios/gait/approval-token-valid/approval_public.key create mode 100644 scenarios/gait/approval-token-valid/expected.yaml create mode 100644 scenarios/gait/approval-token-valid/intent.json create mode 100644 scenarios/gait/approval-token-valid/policy.yaml create mode 100644 scenarios/gait/concurrent-evaluation-10/README.md create mode 100644 scenarios/gait/concurrent-evaluation-10/expected.yaml create mode 100644 scenarios/gait/concurrent-evaluation-10/flags.yaml create mode 100644 scenarios/gait/concurrent-evaluation-10/intent.json create mode 100644 scenarios/gait/concurrent-evaluation-10/policy.yaml create mode 100644 scenarios/gait/delegation-chain-depth-3/README.md create mode 100644 scenarios/gait/delegation-chain-depth-3/delegation-token-1.json create mode 100644 scenarios/gait/delegation-chain-depth-3/delegation-token-2.json create mode 100644 scenarios/gait/delegation-chain-depth-3/delegation-token-3.json create mode 100644 scenarios/gait/delegation-chain-depth-3/delegation_public.key create mode 100644 scenarios/gait/delegation-chain-depth-3/expected.yaml create mode 100644 scenarios/gait/delegation-chain-depth-3/flags.yaml create mode 100644 scenarios/gait/delegation-chain-depth-3/intent.json create mode 100644 scenarios/gait/delegation-chain-depth-3/policy.yaml create mode 100644 scenarios/gait/dry-run-no-side-effects/README.md create mode 100644 scenarios/gait/dry-run-no-side-effects/expected.yaml create mode 100644 scenarios/gait/dry-run-no-side-effects/flags.yaml create mode 100644 scenarios/gait/dry-run-no-side-effects/intent.json create mode 100644 scenarios/gait/dry-run-no-side-effects/policy.yaml create mode 100644 scenarios/gait/pack-integrity-round-trip/README.md create mode 100644 scenarios/gait/pack-integrity-round-trip/expected.yaml create mode 100644 scenarios/gait/policy-allow-safe-tools/README.md create mode 100644 scenarios/gait/policy-allow-safe-tools/expected-verdicts.jsonl create mode 100644 scenarios/gait/policy-allow-safe-tools/intents.jsonl create mode 100644 scenarios/gait/policy-allow-safe-tools/policy.yaml create mode 100644 scenarios/gait/policy-block-destructive/README.md create mode 100644 scenarios/gait/policy-block-destructive/expected-verdicts.jsonl create mode 100644 scenarios/gait/policy-block-destructive/intents.jsonl create mode 100644 scenarios/gait/policy-block-destructive/policy.yaml create mode 100755 scripts/run_scenarios.sh create mode 100755 scripts/validate_scenarios.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6c44530..a3b290b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -207,6 +207,10 @@ jobs: run: | cd sdk/python python -m uv run --extra dev pytest --cov=gait --cov-report=term-missing --cov-fail-under=85 + - name: Tier 11 scenario suite + shell: bash + run: | + make test-scenarios e2e: needs: changes diff --git a/.github/workflows/pr-fast.yml b/.github/workflows/pr-fast.yml index 10d4e63..77874b9 100644 --- a/.github/workflows/pr-fast.yml +++ b/.github/workflows/pr-fast.yml @@ -54,6 +54,9 @@ jobs: PYTHONPATH: . run: | make test-fast + - name: Validate scenario fixtures + run: | + bash scripts/validate_scenarios.sh windows-fast: name: pr-fast-windows diff --git a/CODEOWNERS b/CODEOWNERS index d4c83ff..5ad7de3 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -5,4 +5,9 @@ /cmd/gait/** @davidahmann /core/** @davidahmann /schemas/** @davidahmann + +# Scenario fixtures are contract artifacts and require explicit owner review. +# Changes under /scenarios/ should be reviewed for expected behavior drift. +/scenarios/ @davidahmann + /.github/workflows/** @davidahmann diff --git a/Makefile b/Makefile index 5b37e61..6834309 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ BENCH_REGEX := Benchmark(EvaluatePolicyTypical|VerifyZipTypical|DiffRunpacksTypi BENCH_OUTPUT ?= perf/bench_output.txt BENCH_BASELINE ?= perf/bench_baseline.json -.PHONY: fmt lint lint-fast codeql test test-fast prepush prepush-full github-guardrails github-guardrails-strict test-hardening test-hardening-acceptance test-chaos test-e2e test-acceptance test-v1-6-acceptance test-v1-7-acceptance test-v1-8-acceptance test-v2-3-acceptance test-v2-4-acceptance test-v2-5-acceptance test-v2-6-acceptance test-voice-acceptance test-context-conformance test-context-chaos test-packspec-tck test-ui-acceptance test-ui-unit test-ui-e2e-smoke test-ui-perf test-adoption test-adapter-parity test-ecosystem-automation test-release-smoke test-install test-install-path-versions test-contracts test-intent-receipt-conformance test-ci-regress-template test-ci-portability-templates test-live-connectors test-skill-supply-chain test-runtime-slo test-ent-consumer-contract test-uat-local test-openclaw-skill-install test-beads-bridge test-docs-storyline test-docs-consistency test-demo-recording openclaw-skill-install build bench bench-check bench-budgets context-budgets skills-validate ecosystem-validate ecosystem-release-notes demo-90s demo-hero-gif homebrew-formula wiki-publish tool-allowlist-policy ui-build ui-sync ui-deps-check +.PHONY: fmt lint lint-fast codeql test test-fast test-scenarios prepush prepush-full github-guardrails github-guardrails-strict test-hardening test-hardening-acceptance test-chaos test-e2e test-acceptance test-v1-6-acceptance test-v1-7-acceptance test-v1-8-acceptance test-v2-3-acceptance test-v2-4-acceptance test-v2-5-acceptance test-v2-6-acceptance test-voice-acceptance test-context-conformance test-context-chaos test-packspec-tck test-ui-acceptance test-ui-unit test-ui-e2e-smoke test-ui-perf test-adoption test-adapter-parity test-ecosystem-automation test-release-smoke test-install test-install-path-versions test-contracts test-intent-receipt-conformance test-ci-regress-template test-ci-portability-templates test-live-connectors test-skill-supply-chain test-runtime-slo test-ent-consumer-contract test-uat-local test-openclaw-skill-install test-beads-bridge test-docs-storyline test-docs-consistency test-demo-recording openclaw-skill-install build bench bench-check bench-budgets context-budgets skills-validate ecosystem-validate ecosystem-release-notes demo-90s demo-hero-gif homebrew-formula wiki-publish tool-allowlist-policy ui-build ui-sync ui-deps-check .PHONY: hooks .PHONY: docs-site-install docs-site-build docs-site-lint docs-site-check @@ -66,6 +66,9 @@ test-fast: $(GO) test ./... (cd $(SDK_DIR) && PYTHONPATH=. uv run --python $(UV_PY) --extra dev pytest) +test-scenarios: + $(GO) test ./internal/scenarios -count=1 -tags=scenario -v + prepush: $(MAKE) lint-fast $(MAKE) test-fast @@ -73,6 +76,7 @@ prepush: prepush-full: $(MAKE) lint $(MAKE) test + $(MAKE) test-scenarios $(MAKE) codeql github-guardrails: diff --git a/go.mod b/go.mod index 991afba..6c16e7e 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,9 @@ module github.com/Clyra-AI/gait go 1.25.7 require ( - github.com/Clyra-AI/proof v0.3.0 + github.com/Clyra-AI/proof v0.4.0 github.com/goccy/go-yaml v1.19.2 + gopkg.in/yaml.v3 v3.0.1 ) require ( diff --git a/go.sum b/go.sum index 4625fd2..6812cf8 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,5 @@ -github.com/Clyra-AI/proof v0.3.0 h1:3AjRK3YqGuzWZCzhu6isXKr/EGNoOvo/kkkM4JFxNqI= -github.com/Clyra-AI/proof v0.3.0/go.mod h1:EDff6buidj222E+EYyqQXXj1rtPgSFlYOxl2JFfWKFs= +github.com/Clyra-AI/proof v0.4.0 h1:tcto9gVZeIA96eCQunnY5LICqg1bl+IkMR4i1k9Eg3o= +github.com/Clyra-AI/proof v0.4.0/go.mod h1:EDff6buidj222E+EYyqQXXj1rtPgSFlYOxl2JFfWKFs= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -15,6 +15,7 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/internal/scenarios/fixtures.go b/internal/scenarios/fixtures.go new file mode 100644 index 0000000..4e38936 --- /dev/null +++ b/internal/scenarios/fixtures.go @@ -0,0 +1,35 @@ +package scenarios + +import ( + "fmt" + "os" + "path/filepath" +) + +const scenarioRootRelativePath = "scenarios/gait" + +var requiredScenarioMinimumFiles = map[string][]string{ + "policy-block-destructive": {"README.md", "policy.yaml", "intents.jsonl", "expected-verdicts.jsonl"}, + "policy-allow-safe-tools": {"README.md", "policy.yaml", "intents.jsonl", "expected-verdicts.jsonl"}, + "dry-run-no-side-effects": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"}, + "concurrent-evaluation-10": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"}, + "pack-integrity-round-trip": {"README.md", "expected.yaml"}, + "delegation-chain-depth-3": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml", "delegation-token-1.json", "delegation-token-2.json", "delegation-token-3.json", "delegation_public.key"}, + "approval-expiry-1s-past": {"README.md", "policy.yaml", "intent.json", "expected.yaml", "approval-token.json", "approval_public.key"}, + "approval-token-valid": {"README.md", "policy.yaml", "intent.json", "expected.yaml", "approval-token.json", "approval_public.key"}, +} + +func findRepoRoot(startDir string) (string, error) { + current := startDir + for { + candidate := filepath.Join(current, "go.mod") + if info, err := os.Stat(candidate); err == nil && !info.IsDir() { + return current, nil + } + parent := filepath.Dir(current) + if parent == current { + return "", fmt.Errorf("unable to locate repository root from %s", startDir) + } + current = parent + } +} diff --git a/internal/scenarios/scenario_test.go b/internal/scenarios/scenario_test.go new file mode 100644 index 0000000..b74a4cd --- /dev/null +++ b/internal/scenarios/scenario_test.go @@ -0,0 +1,447 @@ +//go:build scenario + +package scenarios + +import ( + "bufio" + "encoding/json" + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "sort" + "strings" + "sync" + "testing" + + "gopkg.in/yaml.v3" +) + +type scenarioFlags struct { + Simulate bool `yaml:"simulate"` + Concurrency int `yaml:"concurrency"` + DelegationChainFiles []string `yaml:"delegation_chain_files"` +} + +type expectedYAML struct { + ExitCode int `yaml:"exit_code"` + Verdict string `yaml:"verdict"` + SimulateMode bool `yaml:"simulate_mode"` + SuccessfulRuns int `yaml:"successful_runs"` + VerifySignatureStatus string `yaml:"verify_signature_status"` + SourceRef string `yaml:"source_ref"` + ValidDelegations int `yaml:"valid_delegations"` + ReasonCodes []string `yaml:"reason_codes"` + ReasonCodesMustInclude []string `yaml:"reason_codes_must_include"` +} + +type expectedVerdictLine struct { + Index int `json:"index"` + ToolName string `json:"tool_name"` + Verdict string `json:"verdict"` + ExitCode int `json:"exit_code"` +} + +type gateEvalOutput struct { + Verdict string `json:"verdict"` + ReasonCodes []string `json:"reason_codes"` + SimulateMode bool `json:"simulate_mode"` + ValidDelegations int `json:"valid_delegations"` +} + +type packVerifyOutput struct { + SourceRef string `json:"source_ref"` + Verify struct { + SignatureStatus string `json:"signature_status"` + SourceRef string `json:"source_ref"` + } `json:"verify"` +} + +func TestTier11Scenarios(t *testing.T) { + cwd, err := os.Getwd() + if err != nil { + t.Fatalf("get cwd: %v", err) + } + repoRoot, err := findRepoRoot(cwd) + if err != nil { + t.Fatalf("find repo root: %v", err) + } + scenarioRoot := filepath.Join(repoRoot, scenarioRootRelativePath) + binaryPath := buildGaitBinary(t, repoRoot) + + scenarioNames := make([]string, 0, len(requiredScenarioMinimumFiles)) + for name := range requiredScenarioMinimumFiles { + scenarioNames = append(scenarioNames, name) + } + sort.Strings(scenarioNames) + + for _, name := range scenarioNames { + name := name + t.Run(name, func(t *testing.T) { + scenarioPath := filepath.Join(scenarioRoot, name) + runScenario(t, repoRoot, binaryPath, name, scenarioPath) + }) + } +} + +func runScenario(t *testing.T, repoRoot string, binaryPath string, name string, scenarioPath string) { + switch name { + case "policy-block-destructive", "policy-allow-safe-tools": + runPolicyVerdictScenario(t, repoRoot, binaryPath, scenarioPath) + case "dry-run-no-side-effects": + runDryRunScenario(t, repoRoot, binaryPath, scenarioPath) + case "concurrent-evaluation-10": + runConcurrentScenario(t, repoRoot, binaryPath, scenarioPath) + case "pack-integrity-round-trip": + runPackScenario(t, repoRoot, binaryPath, scenarioPath) + case "delegation-chain-depth-3": + runDelegationScenario(t, repoRoot, binaryPath, scenarioPath) + case "approval-expiry-1s-past", "approval-token-valid": + runApprovalScenario(t, repoRoot, binaryPath, scenarioPath) + default: + t.Fatalf("unsupported scenario: %s", name) + } +} + +func runPolicyVerdictScenario(t *testing.T, repoRoot string, binaryPath string, scenarioPath string) { + expected := readExpectedVerdicts(t, filepath.Join(scenarioPath, "expected-verdicts.jsonl")) + intents := readJSONLLines(t, filepath.Join(scenarioPath, "intents.jsonl")) + policyPath := filepath.Join(scenarioPath, "policy.yaml") + + if len(expected) != len(intents) { + t.Fatalf("expected/intents length mismatch: expected=%d intents=%d", len(expected), len(intents)) + } + + workDir := t.TempDir() + for i := range expected { + intentPath := filepath.Join(workDir, fmt.Sprintf("intent_%02d.json", i+1)) + if err := os.WriteFile(intentPath, []byte(intents[i]), 0o600); err != nil { + t.Fatalf("write intent fixture %s: %v", intentPath, err) + } + output, code := runCommand(t, workDir, binaryPath, + "gate", "eval", + "--policy", policyPath, + "--intent", intentPath, + "--json", + ) + if code != expected[i].ExitCode { + t.Fatalf("unexpected exit code for index=%d tool=%s: got=%d want=%d output=%s", expected[i].Index, expected[i].ToolName, code, expected[i].ExitCode, output) + } + var got gateEvalOutput + if err := json.Unmarshal([]byte(output), &got); err != nil { + t.Fatalf("parse gate output for index=%d: %v output=%s", expected[i].Index, err, output) + } + if got.Verdict != expected[i].Verdict { + t.Fatalf("unexpected verdict for index=%d tool=%s: got=%s want=%s output=%s", expected[i].Index, expected[i].ToolName, got.Verdict, expected[i].Verdict, output) + } + } +} + +func runDryRunScenario(t *testing.T, repoRoot string, binaryPath string, scenarioPath string) { + expected := readExpectedYAML(t, filepath.Join(scenarioPath, "expected.yaml")) + flags := readScenarioFlags(t, filepath.Join(scenarioPath, "flags.yaml")) + intentPath := filepath.Join(scenarioPath, "intent.json") + policyPath := filepath.Join(scenarioPath, "policy.yaml") + + args := []string{"gate", "eval", "--policy", policyPath, "--intent", intentPath, "--json"} + if flags.Simulate { + args = append(args, "--simulate") + } + output, code := runCommand(t, t.TempDir(), binaryPath, args...) + if code != expected.ExitCode { + t.Fatalf("unexpected exit code: got=%d want=%d output=%s", code, expected.ExitCode, output) + } + var got gateEvalOutput + if err := json.Unmarshal([]byte(output), &got); err != nil { + t.Fatalf("parse gate output: %v output=%s", err, output) + } + if got.Verdict != expected.Verdict { + t.Fatalf("unexpected verdict: got=%s want=%s output=%s", got.Verdict, expected.Verdict, output) + } + if got.SimulateMode != expected.SimulateMode { + t.Fatalf("unexpected simulate_mode: got=%v want=%v output=%s", got.SimulateMode, expected.SimulateMode, output) + } + for _, required := range expected.ReasonCodes { + if !contains(got.ReasonCodes, required) { + t.Fatalf("missing required reason code %q in %v", required, got.ReasonCodes) + } + } +} + +func runConcurrentScenario(t *testing.T, repoRoot string, binaryPath string, scenarioPath string) { + expected := readExpectedYAML(t, filepath.Join(scenarioPath, "expected.yaml")) + flags := readScenarioFlags(t, filepath.Join(scenarioPath, "flags.yaml")) + if flags.Concurrency <= 0 { + flags.Concurrency = 1 + } + + intentPath := filepath.Join(scenarioPath, "intent.json") + policyPath := filepath.Join(scenarioPath, "policy.yaml") + baseWorkDir := t.TempDir() + + var wg sync.WaitGroup + errCh := make(chan error, flags.Concurrency) + for i := 0; i < flags.Concurrency; i++ { + i := i + wg.Add(1) + go func() { + defer wg.Done() + workDir := filepath.Join(baseWorkDir, fmt.Sprintf("run_%02d", i+1)) + if err := os.MkdirAll(workDir, 0o755); err != nil { + errCh <- fmt.Errorf("mkdir workdir: %w", err) + return + } + output, code := runCommand(t, workDir, binaryPath, + "gate", "eval", + "--policy", policyPath, + "--intent", intentPath, + "--json", + ) + if code != expected.ExitCode { + errCh <- fmt.Errorf("unexpected exit code run=%d got=%d want=%d output=%s", i+1, code, expected.ExitCode, output) + return + } + var got gateEvalOutput + if err := json.Unmarshal([]byte(output), &got); err != nil { + errCh <- fmt.Errorf("parse gate output run=%d: %w output=%s", i+1, err, output) + return + } + if got.Verdict != expected.Verdict { + errCh <- fmt.Errorf("unexpected verdict run=%d got=%s want=%s", i+1, got.Verdict, expected.Verdict) + return + } + }() + } + wg.Wait() + close(errCh) + + failures := []string{} + for err := range errCh { + if err != nil { + failures = append(failures, err.Error()) + } + } + if len(failures) > 0 { + t.Fatalf("concurrent evaluation failures: %v", failures) + } + if expected.SuccessfulRuns > 0 && flags.Concurrency != expected.SuccessfulRuns { + t.Fatalf("unexpected concurrency run count: got=%d want=%d", flags.Concurrency, expected.SuccessfulRuns) + } +} + +func runPackScenario(t *testing.T, repoRoot string, binaryPath string, scenarioPath string) { + expected := readExpectedYAML(t, filepath.Join(scenarioPath, "expected.yaml")) + workDir := t.TempDir() + + _, demoCode := runCommand(t, workDir, binaryPath, "demo", "--json") + if demoCode != 0 { + t.Fatalf("demo failed with exit code %d", demoCode) + } + packPath := filepath.Join(workDir, "scenario-pack.zip") + buildOutput, buildCode := runCommand(t, workDir, binaryPath, + "pack", "build", + "--type", "run", + "--from", "run_demo", + "--out", packPath, + "--json", + ) + if buildCode != 0 { + t.Fatalf("pack build failed: code=%d output=%s", buildCode, buildOutput) + } + verifyOutput, verifyCode := runCommand(t, workDir, binaryPath, + "pack", "verify", + packPath, + "--json", + ) + if verifyCode != expected.ExitCode { + t.Fatalf("pack verify exit mismatch: got=%d want=%d output=%s", verifyCode, expected.ExitCode, verifyOutput) + } + + var got packVerifyOutput + if err := json.Unmarshal([]byte(verifyOutput), &got); err != nil { + t.Fatalf("parse pack verify output: %v output=%s", err, verifyOutput) + } + if expected.VerifySignatureStatus != "" && got.Verify.SignatureStatus != expected.VerifySignatureStatus { + t.Fatalf("unexpected signature status: got=%s want=%s", got.Verify.SignatureStatus, expected.VerifySignatureStatus) + } + sourceRef := got.SourceRef + if sourceRef == "" { + sourceRef = got.Verify.SourceRef + } + if expected.SourceRef != "" && sourceRef != expected.SourceRef { + t.Fatalf("unexpected source_ref: got=%s want=%s", sourceRef, expected.SourceRef) + } +} + +func runDelegationScenario(t *testing.T, repoRoot string, binaryPath string, scenarioPath string) { + expected := readExpectedYAML(t, filepath.Join(scenarioPath, "expected.yaml")) + flags := readScenarioFlags(t, filepath.Join(scenarioPath, "flags.yaml")) + + chain := make([]string, 0, len(flags.DelegationChainFiles)) + for _, rel := range flags.DelegationChainFiles { + chain = append(chain, filepath.Join(scenarioPath, rel)) + } + output, code := runCommand(t, t.TempDir(), binaryPath, + "gate", "eval", + "--policy", filepath.Join(scenarioPath, "policy.yaml"), + "--intent", filepath.Join(scenarioPath, "intent.json"), + "--delegation-token", filepath.Join(scenarioPath, "delegation-token-1.json"), + "--delegation-token-chain", strings.Join(chain, ","), + "--delegation-public-key", filepath.Join(scenarioPath, "delegation_public.key"), + "--json", + ) + if code != expected.ExitCode { + t.Fatalf("delegation scenario exit mismatch: got=%d want=%d output=%s", code, expected.ExitCode, output) + } + var got gateEvalOutput + if err := json.Unmarshal([]byte(output), &got); err != nil { + t.Fatalf("parse delegation output: %v output=%s", err, output) + } + if got.Verdict != expected.Verdict { + t.Fatalf("unexpected delegation verdict: got=%s want=%s output=%s", got.Verdict, expected.Verdict, output) + } + if expected.ValidDelegations > 0 && got.ValidDelegations != expected.ValidDelegations { + t.Fatalf("unexpected valid_delegations: got=%d want=%d", got.ValidDelegations, expected.ValidDelegations) + } + for _, required := range expected.ReasonCodesMustInclude { + if !contains(got.ReasonCodes, required) { + t.Fatalf("missing required reason code %q in %v", required, got.ReasonCodes) + } + } +} + +func runApprovalScenario(t *testing.T, repoRoot string, binaryPath string, scenarioPath string) { + expected := readExpectedYAML(t, filepath.Join(scenarioPath, "expected.yaml")) + output, code := runCommand(t, t.TempDir(), binaryPath, + "gate", "eval", + "--policy", filepath.Join(scenarioPath, "policy.yaml"), + "--intent", filepath.Join(scenarioPath, "intent.json"), + "--approval-token", filepath.Join(scenarioPath, "approval-token.json"), + "--approval-public-key", filepath.Join(scenarioPath, "approval_public.key"), + "--json", + ) + if code != expected.ExitCode { + t.Fatalf("approval scenario exit mismatch: got=%d want=%d output=%s", code, expected.ExitCode, output) + } + var got gateEvalOutput + if err := json.Unmarshal([]byte(output), &got); err != nil { + t.Fatalf("parse approval output: %v output=%s", err, output) + } + if got.Verdict != expected.Verdict { + t.Fatalf("unexpected approval verdict: got=%s want=%s output=%s", got.Verdict, expected.Verdict, output) + } + for _, required := range expected.ReasonCodesMustInclude { + if !contains(got.ReasonCodes, required) { + t.Fatalf("missing required reason code %q in %v", required, got.ReasonCodes) + } + } +} + +func buildGaitBinary(t *testing.T, repoRoot string) string { + t.Helper() + if prebuilt := strings.TrimSpace(os.Getenv("GAIT_SCENARIO_BIN")); prebuilt != "" { + if info, err := os.Stat(prebuilt); err == nil && !info.IsDir() { + return prebuilt + } + t.Fatalf("GAIT_SCENARIO_BIN does not point to a valid file: %s", prebuilt) + } + binaryPath := filepath.Join(t.TempDir(), "gait") + cmd := exec.Command("go", "build", "-o", binaryPath, "./cmd/gait") + cmd.Dir = repoRoot + output, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("build gait binary: %v output=%s", err, string(output)) + } + return binaryPath +} + +func runCommand(t *testing.T, workDir string, binaryPath string, args ...string) (string, int) { + t.Helper() + cmd := exec.Command(binaryPath, args...) + cmd.Dir = workDir + output, err := cmd.CombinedOutput() + if err == nil { + return strings.TrimSpace(string(output)), 0 + } + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + return strings.TrimSpace(string(output)), exitErr.ExitCode() + } + t.Fatalf("run command %v: %v output=%s", args, err, string(output)) + return "", -1 +} + +func readScenarioFlags(t *testing.T, path string) scenarioFlags { + t.Helper() + payload, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read flags file %s: %v", path, err) + } + var flags scenarioFlags + if err := yaml.Unmarshal(payload, &flags); err != nil { + t.Fatalf("parse flags file %s: %v", path, err) + } + return flags +} + +func readExpectedYAML(t *testing.T, path string) expectedYAML { + t.Helper() + payload, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read expected yaml %s: %v", path, err) + } + var expected expectedYAML + if err := yaml.Unmarshal(payload, &expected); err != nil { + t.Fatalf("parse expected yaml %s: %v", path, err) + } + return expected +} + +func readExpectedVerdicts(t *testing.T, path string) []expectedVerdictLine { + t.Helper() + lines := readJSONLLines(t, path) + out := make([]expectedVerdictLine, 0, len(lines)) + for _, line := range lines { + var item expectedVerdictLine + if err := json.Unmarshal([]byte(line), &item); err != nil { + t.Fatalf("parse expected verdict line in %s: %v line=%s", path, err, line) + } + out = append(out, item) + } + return out +} + +func readJSONLLines(t *testing.T, path string) []string { + t.Helper() + file, err := os.Open(path) + if err != nil { + t.Fatalf("open jsonl file %s: %v", path, err) + } + defer func() { + _ = file.Close() + }() + + lines := []string{} + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" { + continue + } + lines = append(lines, line) + } + if err := scanner.Err(); err != nil { + t.Fatalf("scan jsonl file %s: %v", path, err) + } + return lines +} + +func contains(items []string, target string) bool { + for _, item := range items { + if strings.TrimSpace(item) == target { + return true + } + } + return false +} diff --git a/internal/scenarios/validate_test.go b/internal/scenarios/validate_test.go new file mode 100644 index 0000000..1de19a2 --- /dev/null +++ b/internal/scenarios/validate_test.go @@ -0,0 +1,135 @@ +package scenarios + +import ( + "bufio" + "encoding/json" + "os" + "path/filepath" + "sort" + "strings" + "testing" + + "gopkg.in/yaml.v3" +) + +func TestValidateScenarioFixtures(t *testing.T) { + cwd, err := os.Getwd() + if err != nil { + t.Fatalf("get cwd: %v", err) + } + repoRoot, err := findRepoRoot(cwd) + if err != nil { + t.Fatalf("find repo root: %v", err) + } + scenarioRoot := filepath.Join(repoRoot, scenarioRootRelativePath) + + entries, err := os.ReadDir(scenarioRoot) + if err != nil { + t.Fatalf("read scenario root: %v", err) + } + + seenScenarios := map[string]struct{}{} + for _, entry := range entries { + if !entry.IsDir() { + continue + } + scenarioName := entry.Name() + scenarioPath := filepath.Join(scenarioRoot, scenarioName) + seenScenarios[scenarioName] = struct{}{} + + requiredFiles, known := requiredScenarioMinimumFiles[scenarioName] + if !known { + t.Fatalf("unexpected scenario directory: %s", scenarioName) + } + for _, required := range requiredFiles { + filePath := filepath.Join(scenarioPath, required) + info, statErr := os.Stat(filePath) + if statErr != nil { + t.Fatalf("missing required file %s: %v", filePath, statErr) + } + if info.IsDir() { + t.Fatalf("required file is a directory: %s", filePath) + } + } + + walkErr := filepath.WalkDir(scenarioPath, func(path string, d os.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if d.IsDir() { + return nil + } + return validateFixtureFileSyntax(path) + }) + if walkErr != nil { + t.Fatalf("validate fixture syntax for %s: %v", scenarioName, walkErr) + } + } + + if len(seenScenarios) != len(requiredScenarioMinimumFiles) { + known := make([]string, 0, len(requiredScenarioMinimumFiles)) + for name := range requiredScenarioMinimumFiles { + known = append(known, name) + } + sort.Strings(known) + + seen := make([]string, 0, len(seenScenarios)) + for name := range seenScenarios { + seen = append(seen, name) + } + sort.Strings(seen) + t.Fatalf("scenario count mismatch: expected=%d got=%d expected_names=%v got_names=%v", len(requiredScenarioMinimumFiles), len(seenScenarios), known, seen) + } + + t.Logf("validated %d scenarios under %s", len(seenScenarios), scenarioRoot) +} + +func validateFixtureFileSyntax(path string) error { + ext := strings.ToLower(filepath.Ext(path)) + switch ext { + case ".yaml", ".yml": + payload, err := os.ReadFile(path) + if err != nil { + return err + } + var parsed any + if err := yaml.Unmarshal(payload, &parsed); err != nil { + return err + } + case ".json": + payload, err := os.ReadFile(path) + if err != nil { + return err + } + var parsed any + if err := json.Unmarshal(payload, &parsed); err != nil { + return err + } + case ".jsonl": + file, err := os.Open(path) + if err != nil { + return err + } + defer func() { + _ = file.Close() + }() + + scanner := bufio.NewScanner(file) + line := 0 + for scanner.Scan() { + line++ + text := strings.TrimSpace(scanner.Text()) + if text == "" { + continue + } + var parsed any + if err := json.Unmarshal([]byte(text), &parsed); err != nil { + return err + } + } + if err := scanner.Err(); err != nil { + return err + } + } + return nil +} diff --git a/scenarios/CHANGELOG.md b/scenarios/CHANGELOG.md new file mode 100644 index 0000000..43cc9dc --- /dev/null +++ b/scenarios/CHANGELOG.md @@ -0,0 +1,7 @@ +# Scenario Changelog + +## 2026-02-18 + +- Added Tier 11 `scenarios/gait/` fixture corpus with eight deterministic scenarios. +- Added policy, dry-run, concurrency, pack integrity, delegation, and approval fixture coverage. +- Added expected outcome artifacts for scenario runner and CI validation. diff --git a/scenarios/README.md b/scenarios/README.md new file mode 100644 index 0000000..ac22631 --- /dev/null +++ b/scenarios/README.md @@ -0,0 +1,27 @@ +# Tier 11 Scenario Fixtures + +Scenario fixtures are human-reviewed behavior specifications for outside-in validation. +All fixtures are offline-first and deterministic. + +## Running + +- Validate fixture structure and syntax: `bash scripts/validate_scenarios.sh` +- Run all gait scenarios: `bash scripts/run_scenarios.sh gait` +- Run Go scenario harness directly: `go test ./internal/scenarios -count=1 -tags=scenario -v` + +## Authorship Rules + +- Changes under `scenarios/` are specification changes and require CODEOWNERS review. +- Expected outcome files (`expected.yaml`, `expected-verdicts.jsonl`) define contract behavior. +- Fixtures must be self-contained and must not require network access. + +## Scenario Set (gait) + +1. `policy-block-destructive` +2. `policy-allow-safe-tools` +3. `dry-run-no-side-effects` +4. `concurrent-evaluation-10` +5. `pack-integrity-round-trip` +6. `delegation-chain-depth-3` +7. `approval-expiry-1s-past` +8. `approval-token-valid` diff --git a/scenarios/gait/README.md b/scenarios/gait/README.md new file mode 100644 index 0000000..291d2dc --- /dev/null +++ b/scenarios/gait/README.md @@ -0,0 +1,10 @@ +# Gait Scenario Fixtures + +These scenarios validate Gate and Pack behavior from externally-authored fixtures. + +Each scenario directory contains: + +- `README.md` for intent and rationale +- input artifacts (`policy.yaml`, `intent.json`, `intents.jsonl`, token files, etc.) +- expected artifacts (`expected.yaml` or `expected-verdicts.jsonl`) +- optional `flags.yaml` for execution options diff --git a/scenarios/gait/approval-expiry-1s-past/README.md b/scenarios/gait/approval-expiry-1s-past/README.md new file mode 100644 index 0000000..32b44c9 --- /dev/null +++ b/scenarios/gait/approval-expiry-1s-past/README.md @@ -0,0 +1,3 @@ +# approval-expiry-1s-past + +Ensures expired approval tokens fail closed and keep verdict at require_approval. diff --git a/scenarios/gait/approval-expiry-1s-past/approval-token.json b/scenarios/gait/approval-expiry-1s-past/approval-token.json new file mode 100644 index 0000000..d3fc14a --- /dev/null +++ b/scenarios/gait/approval-expiry-1s-past/approval-token.json @@ -0,0 +1,21 @@ +{ + "schema_id": "gait.gate.approval_token", + "schema_version": "1.0.0", + "created_at": "2026-02-18T19:13:05.165818Z", + "producer_version": "0.0.0-dev", + "token_id": "dfa1d0984e22f984a9c0ca45", + "approver_identity": "human.reviewer", + "reason_code": "MANUAL_APPROVAL", + "intent_digest": "350b03d56a4427cdddccb78038cf7ce3eb47d497242749793ddc15d0c809a24b", + "policy_digest": "07a528a879e4f0e0cd5a0e2ca801924746b47dac624862207ef21d4ec266f8f9", + "scope": [ + "tool:tool.write" + ], + "expires_at": "2026-02-18T19:13:06.165818Z", + "signature": { + "alg": "ed25519", + "key_id": "4057fdde7cca116d12910f084ed93107c06bf44ed971c9c7ef72ea172776c4da", + "sig": "e0lnN37/bjusfb5lfIC/tin5NSbZL5zIn/bhOB5kmzQ8asCfVYejple6c86bfam+pWnPqycKAGTb63yfvw7MCQ==", + "signed_digest": "e30643b2f6fbb5d3929b08173f8f61f56c3a2c06b0e7ed3795b2e6ad191e317e" + } +} diff --git a/scenarios/gait/approval-expiry-1s-past/approval_public.key b/scenarios/gait/approval-expiry-1s-past/approval_public.key new file mode 100644 index 0000000..50a1ffa --- /dev/null +++ b/scenarios/gait/approval-expiry-1s-past/approval_public.key @@ -0,0 +1 @@ +pqc3MBXcSfGexJz8uZLMOgzHSCvvYOXlkyqRi970UFk= diff --git a/scenarios/gait/approval-expiry-1s-past/expected.yaml b/scenarios/gait/approval-expiry-1s-past/expected.yaml new file mode 100644 index 0000000..d167ac9 --- /dev/null +++ b/scenarios/gait/approval-expiry-1s-past/expected.yaml @@ -0,0 +1,4 @@ +exit_code: 4 +verdict: require_approval +reason_codes_must_include: + - approval_token_expired diff --git a/scenarios/gait/approval-expiry-1s-past/intent.json b/scenarios/gait/approval-expiry-1s-past/intent.json new file mode 100644 index 0000000..898a98b --- /dev/null +++ b/scenarios/gait/approval-expiry-1s-past/intent.json @@ -0,0 +1 @@ +{"schema_id":"gait.gate.intent_request","schema_version":"1.0.0","created_at":"2026-02-18T00:00:00Z","producer_version":"0.0.0-dev","tool_name":"tool.write","args":{"path":"/tmp/approve-expired.txt"},"targets":[{"kind":"path","value":"/tmp/approve-expired.txt","operation":"write","endpoint_class":"fs.write"}],"arg_provenance":[{"arg_path":"$.path","source":"user"}],"context":{"identity":"agent.scenario","workspace":"/repo/gait","risk_class":"high","session_id":"sess-tier11"}} diff --git a/scenarios/gait/approval-expiry-1s-past/policy.yaml b/scenarios/gait/approval-expiry-1s-past/policy.yaml new file mode 100644 index 0000000..bf1668e --- /dev/null +++ b/scenarios/gait/approval-expiry-1s-past/policy.yaml @@ -0,0 +1 @@ +default_verdict: require_approval diff --git a/scenarios/gait/approval-token-valid/README.md b/scenarios/gait/approval-token-valid/README.md new file mode 100644 index 0000000..953ede4 --- /dev/null +++ b/scenarios/gait/approval-token-valid/README.md @@ -0,0 +1,3 @@ +# approval-token-valid + +Ensures a valid approval token satisfies require_approval and yields allow. diff --git a/scenarios/gait/approval-token-valid/approval-token.json b/scenarios/gait/approval-token-valid/approval-token.json new file mode 100644 index 0000000..b650983 --- /dev/null +++ b/scenarios/gait/approval-token-valid/approval-token.json @@ -0,0 +1,21 @@ +{ + "schema_id": "gait.gate.approval_token", + "schema_version": "1.0.0", + "created_at": "2026-02-18T19:13:05.081373Z", + "producer_version": "0.0.0-dev", + "token_id": "da3a8c4454aaba580ea21586", + "approver_identity": "human.reviewer", + "reason_code": "MANUAL_APPROVAL", + "intent_digest": "0b82a0291b42e868e91897bbeaa6a16bf76694ad21a6c3aa5e8fa5d44b7562b6", + "policy_digest": "07a528a879e4f0e0cd5a0e2ca801924746b47dac624862207ef21d4ec266f8f9", + "scope": [ + "tool:tool.write" + ], + "expires_at": "2036-02-16T19:13:05.081373Z", + "signature": { + "alg": "ed25519", + "key_id": "ff74abb7cb8fcdcfeed002ba39fd6c8b074afaa02e214d626f5677f5a2fcafd2", + "sig": "C2fcpy/QaFHuqpV7fhLInhCwoPBwT1uZ3rvCEUMbAuEz4ueeJPMcsXMVAt53AUrEMWg5+NtCzMRc6iIji5d/Cg==", + "signed_digest": "fc0f39bf6c5ea451f66e1ac97d6ffed0a7f5f67be04cb23f21ae031c3ece6a66" + } +} diff --git a/scenarios/gait/approval-token-valid/approval_public.key b/scenarios/gait/approval-token-valid/approval_public.key new file mode 100644 index 0000000..d30d8a8 --- /dev/null +++ b/scenarios/gait/approval-token-valid/approval_public.key @@ -0,0 +1 @@ +OHLUcz7bHnDiCJOaRaGqVGNeG+j/NYBNd9FNajegulU= diff --git a/scenarios/gait/approval-token-valid/expected.yaml b/scenarios/gait/approval-token-valid/expected.yaml new file mode 100644 index 0000000..00e756c --- /dev/null +++ b/scenarios/gait/approval-token-valid/expected.yaml @@ -0,0 +1,4 @@ +exit_code: 0 +verdict: allow +reason_codes_must_include: + - approval_granted diff --git a/scenarios/gait/approval-token-valid/intent.json b/scenarios/gait/approval-token-valid/intent.json new file mode 100644 index 0000000..7e6aa78 --- /dev/null +++ b/scenarios/gait/approval-token-valid/intent.json @@ -0,0 +1 @@ +{"schema_id":"gait.gate.intent_request","schema_version":"1.0.0","created_at":"2026-02-18T00:00:00Z","producer_version":"0.0.0-dev","tool_name":"tool.write","args":{"path":"/tmp/approve-valid.txt"},"targets":[{"kind":"path","value":"/tmp/approve-valid.txt","operation":"write","endpoint_class":"fs.write"}],"arg_provenance":[{"arg_path":"$.path","source":"user"}],"context":{"identity":"agent.scenario","workspace":"/repo/gait","risk_class":"high","session_id":"sess-tier11"}} diff --git a/scenarios/gait/approval-token-valid/policy.yaml b/scenarios/gait/approval-token-valid/policy.yaml new file mode 100644 index 0000000..bf1668e --- /dev/null +++ b/scenarios/gait/approval-token-valid/policy.yaml @@ -0,0 +1 @@ +default_verdict: require_approval diff --git a/scenarios/gait/concurrent-evaluation-10/README.md b/scenarios/gait/concurrent-evaluation-10/README.md new file mode 100644 index 0000000..73ec060 --- /dev/null +++ b/scenarios/gait/concurrent-evaluation-10/README.md @@ -0,0 +1,3 @@ +# concurrent-evaluation-10 + +Ensures ten concurrent evaluations of the same intent return stable verdicts without errors. diff --git a/scenarios/gait/concurrent-evaluation-10/expected.yaml b/scenarios/gait/concurrent-evaluation-10/expected.yaml new file mode 100644 index 0000000..ff32ded --- /dev/null +++ b/scenarios/gait/concurrent-evaluation-10/expected.yaml @@ -0,0 +1,3 @@ +exit_code: 0 +verdict: allow +successful_runs: 10 diff --git a/scenarios/gait/concurrent-evaluation-10/flags.yaml b/scenarios/gait/concurrent-evaluation-10/flags.yaml new file mode 100644 index 0000000..6fb819d --- /dev/null +++ b/scenarios/gait/concurrent-evaluation-10/flags.yaml @@ -0,0 +1 @@ +concurrency: 10 diff --git a/scenarios/gait/concurrent-evaluation-10/intent.json b/scenarios/gait/concurrent-evaluation-10/intent.json new file mode 100644 index 0000000..1961997 --- /dev/null +++ b/scenarios/gait/concurrent-evaluation-10/intent.json @@ -0,0 +1 @@ +{"schema_id":"gait.gate.intent_request","schema_version":"1.0.0","created_at":"2026-02-18T00:00:00Z","producer_version":"0.0.0-dev","tool_name":"tool.read","args":{"path":"/tmp/concurrent.txt"},"targets":[{"kind":"path","value":"/tmp/concurrent.txt","operation":"read","endpoint_class":"fs.read"}],"arg_provenance":[{"arg_path":"$.path","source":"user"}],"context":{"identity":"agent.scenario","workspace":"/repo/gait","risk_class":"medium","session_id":"sess-tier11"}} diff --git a/scenarios/gait/concurrent-evaluation-10/policy.yaml b/scenarios/gait/concurrent-evaluation-10/policy.yaml new file mode 100644 index 0000000..67c77e4 --- /dev/null +++ b/scenarios/gait/concurrent-evaluation-10/policy.yaml @@ -0,0 +1 @@ +default_verdict: allow diff --git a/scenarios/gait/delegation-chain-depth-3/README.md b/scenarios/gait/delegation-chain-depth-3/README.md new file mode 100644 index 0000000..c3a86c8 --- /dev/null +++ b/scenarios/gait/delegation-chain-depth-3/README.md @@ -0,0 +1,3 @@ +# delegation-chain-depth-3 + +Validates a three-link delegation chain can authorize a delegated write intent. diff --git a/scenarios/gait/delegation-chain-depth-3/delegation-token-1.json b/scenarios/gait/delegation-chain-depth-3/delegation-token-1.json new file mode 100644 index 0000000..079590e --- /dev/null +++ b/scenarios/gait/delegation-chain-depth-3/delegation-token-1.json @@ -0,0 +1,20 @@ +{ + "schema_id": "gait.gate.delegation_token", + "schema_version": "1.0.0", + "created_at": "2026-02-18T19:13:04.95397Z", + "producer_version": "0.0.0-dev", + "token_id": "694428976ab1b10859171a05", + "delegator_identity": "agent.lead", + "delegate_identity": "agent.manager", + "scope": [ + "tool:tool.write" + ], + "scope_class": "write", + "expires_at": "2036-02-16T19:13:04.95397Z", + "signature": { + "alg": "ed25519", + "key_id": "854baff4855aef78ee7bff54778815baba4f4c8dd693e12ad4429edd118f6590", + "sig": "WffWARts662LDNJ4Nm7YOT6T/iqLxLyBXz9qP/+3SUye/yrtRmjE+hAbd/i0Jep8ARBotfoKuoNOqS/JiiUQAQ==", + "signed_digest": "63ffcb2916228baf7e2a3b4d57f833aeba8a08d114659990e4644b9b13bdd9e0" + } +} diff --git a/scenarios/gait/delegation-chain-depth-3/delegation-token-2.json b/scenarios/gait/delegation-chain-depth-3/delegation-token-2.json new file mode 100644 index 0000000..8b0d678 --- /dev/null +++ b/scenarios/gait/delegation-chain-depth-3/delegation-token-2.json @@ -0,0 +1,20 @@ +{ + "schema_id": "gait.gate.delegation_token", + "schema_version": "1.0.0", + "created_at": "2026-02-18T19:13:04.96196Z", + "producer_version": "0.0.0-dev", + "token_id": "7e37847cf7d9320eb0854cc8", + "delegator_identity": "agent.manager", + "delegate_identity": "agent.senior", + "scope": [ + "tool:tool.write" + ], + "scope_class": "write", + "expires_at": "2036-02-16T19:13:04.96196Z", + "signature": { + "alg": "ed25519", + "key_id": "854baff4855aef78ee7bff54778815baba4f4c8dd693e12ad4429edd118f6590", + "sig": "bd15cvO/JBhJoCKov4WlPAB0A1jitmoIK3ISnBUqJNMq0cB9SzfTb1NlhSmhx9cHNC/vGYinfvzi+Vf1E+jfDw==", + "signed_digest": "3a9e00573bb8527ca7a89813547ec21043a60c77b0c891eb5c817896262344e8" + } +} diff --git a/scenarios/gait/delegation-chain-depth-3/delegation-token-3.json b/scenarios/gait/delegation-chain-depth-3/delegation-token-3.json new file mode 100644 index 0000000..0c244f4 --- /dev/null +++ b/scenarios/gait/delegation-chain-depth-3/delegation-token-3.json @@ -0,0 +1,20 @@ +{ + "schema_id": "gait.gate.delegation_token", + "schema_version": "1.0.0", + "created_at": "2026-02-18T19:13:04.968914Z", + "producer_version": "0.0.0-dev", + "token_id": "8a715283a36e6b05e9287e12", + "delegator_identity": "agent.senior", + "delegate_identity": "agent.worker", + "scope": [ + "tool:tool.write" + ], + "scope_class": "write", + "expires_at": "2036-02-16T19:13:04.968914Z", + "signature": { + "alg": "ed25519", + "key_id": "854baff4855aef78ee7bff54778815baba4f4c8dd693e12ad4429edd118f6590", + "sig": "I+s4WhBmAtbPQPMd2jzwb0s7xwkq8n7lxLbKvoJ4rXFhc1cr99qy6ZoWlw1rnD1/4buBaT0YRtCQivSsaDAADg==", + "signed_digest": "d37f2bc6e182a895c1639450d684860bd2fe51025abd470afbf267170fb4baf8" + } +} diff --git a/scenarios/gait/delegation-chain-depth-3/delegation_public.key b/scenarios/gait/delegation-chain-depth-3/delegation_public.key new file mode 100644 index 0000000..cdce620 --- /dev/null +++ b/scenarios/gait/delegation-chain-depth-3/delegation_public.key @@ -0,0 +1 @@ +SKLYKQdooJ4c6INOrN/k1xHsxVL6dkWCcBA5i1XGVAI= diff --git a/scenarios/gait/delegation-chain-depth-3/expected.yaml b/scenarios/gait/delegation-chain-depth-3/expected.yaml new file mode 100644 index 0000000..45c0dd2 --- /dev/null +++ b/scenarios/gait/delegation-chain-depth-3/expected.yaml @@ -0,0 +1,5 @@ +exit_code: 0 +verdict: allow +valid_delegations: 1 +reason_codes_must_include: + - delegation_granted diff --git a/scenarios/gait/delegation-chain-depth-3/flags.yaml b/scenarios/gait/delegation-chain-depth-3/flags.yaml new file mode 100644 index 0000000..f4f0770 --- /dev/null +++ b/scenarios/gait/delegation-chain-depth-3/flags.yaml @@ -0,0 +1,3 @@ +delegation_chain_files: + - delegation-token-2.json + - delegation-token-3.json diff --git a/scenarios/gait/delegation-chain-depth-3/intent.json b/scenarios/gait/delegation-chain-depth-3/intent.json new file mode 100644 index 0000000..f39f362 --- /dev/null +++ b/scenarios/gait/delegation-chain-depth-3/intent.json @@ -0,0 +1 @@ +{"schema_id": "gait.gate.intent_request", "schema_version": "1.0.0", "created_at": "2026-02-18T00:00:00Z", "producer_version": "0.0.0-dev", "tool_name": "tool.write", "args": {"path": "/tmp/delegated.txt"}, "targets": [{"kind": "path", "value": "/tmp/delegated.txt", "operation": "write", "endpoint_class": "fs.write"}], "arg_provenance": [{"arg_path": "$.path", "source": "user"}], "delegation": {"requester_identity": "agent.worker", "scope_class": "write", "token_refs": ["694428976ab1b10859171a05", "7e37847cf7d9320eb0854cc8", "8a715283a36e6b05e9287e12"], "chain": [{"delegator_identity": "agent.lead", "delegate_identity": "agent.manager", "scope_class": "write", "token_ref": "694428976ab1b10859171a05"}, {"delegator_identity": "agent.manager", "delegate_identity": "agent.senior", "scope_class": "write", "token_ref": "7e37847cf7d9320eb0854cc8"}, {"delegator_identity": "agent.senior", "delegate_identity": "agent.worker", "scope_class": "write", "token_ref": "8a715283a36e6b05e9287e12"}]}, "context": {"identity": "agent.worker", "workspace": "/repo/gait", "risk_class": "high", "session_id": "sess-tier11"}} \ No newline at end of file diff --git a/scenarios/gait/delegation-chain-depth-3/policy.yaml b/scenarios/gait/delegation-chain-depth-3/policy.yaml new file mode 100644 index 0000000..5f701f9 --- /dev/null +++ b/scenarios/gait/delegation-chain-depth-3/policy.yaml @@ -0,0 +1,11 @@ +default_verdict: block +rules: + - name: allow-delegated-write + effect: allow + match: + tool_names: [tool.write] + require_delegation: true + allowed_delegator_identities: [agent.lead, agent.manager, agent.senior] + allowed_delegate_identities: [agent.manager, agent.senior, agent.worker] + delegation_scopes: [write] + max_delegation_depth: 3 diff --git a/scenarios/gait/dry-run-no-side-effects/README.md b/scenarios/gait/dry-run-no-side-effects/README.md new file mode 100644 index 0000000..42bcf5e --- /dev/null +++ b/scenarios/gait/dry-run-no-side-effects/README.md @@ -0,0 +1,3 @@ +# dry-run-no-side-effects + +Ensures simulate mode remains non-enforcing while still returning deterministic gate output. diff --git a/scenarios/gait/dry-run-no-side-effects/expected.yaml b/scenarios/gait/dry-run-no-side-effects/expected.yaml new file mode 100644 index 0000000..a23b38c --- /dev/null +++ b/scenarios/gait/dry-run-no-side-effects/expected.yaml @@ -0,0 +1,5 @@ +exit_code: 0 +verdict: allow +simulate_mode: true +reason_codes: + - simulate_mode_non_enforcing diff --git a/scenarios/gait/dry-run-no-side-effects/flags.yaml b/scenarios/gait/dry-run-no-side-effects/flags.yaml new file mode 100644 index 0000000..f115967 --- /dev/null +++ b/scenarios/gait/dry-run-no-side-effects/flags.yaml @@ -0,0 +1 @@ +simulate: true diff --git a/scenarios/gait/dry-run-no-side-effects/intent.json b/scenarios/gait/dry-run-no-side-effects/intent.json new file mode 100644 index 0000000..106be69 --- /dev/null +++ b/scenarios/gait/dry-run-no-side-effects/intent.json @@ -0,0 +1 @@ +{"schema_id":"gait.gate.intent_request","schema_version":"1.0.0","created_at":"2026-02-18T00:00:00Z","producer_version":"0.0.0-dev","tool_name":"tool.write","args":{"path":"/tmp/dry-run.txt"},"targets":[{"kind":"path","value":"/tmp/dry-run.txt","operation":"write","endpoint_class":"fs.write"}],"arg_provenance":[{"arg_path":"$.path","source":"user"}],"context":{"identity":"agent.scenario","workspace":"/repo/gait","risk_class":"high","session_id":"sess-tier11"}} diff --git a/scenarios/gait/dry-run-no-side-effects/policy.yaml b/scenarios/gait/dry-run-no-side-effects/policy.yaml new file mode 100644 index 0000000..67c77e4 --- /dev/null +++ b/scenarios/gait/dry-run-no-side-effects/policy.yaml @@ -0,0 +1 @@ +default_verdict: allow diff --git a/scenarios/gait/pack-integrity-round-trip/README.md b/scenarios/gait/pack-integrity-round-trip/README.md new file mode 100644 index 0000000..76edba5 --- /dev/null +++ b/scenarios/gait/pack-integrity-round-trip/README.md @@ -0,0 +1,3 @@ +# pack-integrity-round-trip + +Builds a run pack from a deterministic demo run and verifies pack integrity end-to-end. diff --git a/scenarios/gait/pack-integrity-round-trip/expected.yaml b/scenarios/gait/pack-integrity-round-trip/expected.yaml new file mode 100644 index 0000000..0a7b56b --- /dev/null +++ b/scenarios/gait/pack-integrity-round-trip/expected.yaml @@ -0,0 +1,3 @@ +exit_code: 0 +verify_signature_status: missing +source_ref: run_demo diff --git a/scenarios/gait/policy-allow-safe-tools/README.md b/scenarios/gait/policy-allow-safe-tools/README.md new file mode 100644 index 0000000..a87f38d --- /dev/null +++ b/scenarios/gait/policy-allow-safe-tools/README.md @@ -0,0 +1,3 @@ +# policy-allow-safe-tools + +Ensures safe tools can be explicitly allowlisted under a block-by-default policy. diff --git a/scenarios/gait/policy-allow-safe-tools/expected-verdicts.jsonl b/scenarios/gait/policy-allow-safe-tools/expected-verdicts.jsonl new file mode 100644 index 0000000..8a5a3b6 --- /dev/null +++ b/scenarios/gait/policy-allow-safe-tools/expected-verdicts.jsonl @@ -0,0 +1,3 @@ +{"index":1,"tool_name":"tool.read","verdict":"allow","exit_code":0} +{"index":2,"tool_name":"tool.list","verdict":"allow","exit_code":0} +{"index":3,"tool_name":"tool.info","verdict":"allow","exit_code":0} diff --git a/scenarios/gait/policy-allow-safe-tools/intents.jsonl b/scenarios/gait/policy-allow-safe-tools/intents.jsonl new file mode 100644 index 0000000..54286cf --- /dev/null +++ b/scenarios/gait/policy-allow-safe-tools/intents.jsonl @@ -0,0 +1,3 @@ +{"schema_id":"gait.gate.intent_request","schema_version":"1.0.0","created_at":"2026-02-18T00:00:00Z","producer_version":"0.0.0-dev","tool_name":"tool.read","args":{"path":"/tmp/a.txt"},"targets":[{"kind":"path","value":"/tmp/a.txt","operation":"read","endpoint_class":"fs.read"}],"arg_provenance":[{"arg_path":"$.path","source":"user"}],"context":{"identity":"agent.scenario","workspace":"/repo/gait","risk_class":"low","session_id":"sess-tier11"}} +{"schema_id":"gait.gate.intent_request","schema_version":"1.0.0","created_at":"2026-02-18T00:00:00Z","producer_version":"0.0.0-dev","tool_name":"tool.list","args":{"path":"/tmp"},"targets":[{"kind":"path","value":"/tmp","operation":"read","endpoint_class":"fs.read"}],"arg_provenance":[{"arg_path":"$.path","source":"user"}],"context":{"identity":"agent.scenario","workspace":"/repo/gait","risk_class":"low","session_id":"sess-tier11"}} +{"schema_id":"gait.gate.intent_request","schema_version":"1.0.0","created_at":"2026-02-18T00:00:00Z","producer_version":"0.0.0-dev","tool_name":"tool.info","args":{"path":"/tmp/b.txt"},"targets":[{"kind":"path","value":"/tmp/b.txt","operation":"read","endpoint_class":"fs.read"}],"arg_provenance":[{"arg_path":"$.path","source":"user"}],"context":{"identity":"agent.scenario","workspace":"/repo/gait","risk_class":"low","session_id":"sess-tier11"}} diff --git a/scenarios/gait/policy-allow-safe-tools/policy.yaml b/scenarios/gait/policy-allow-safe-tools/policy.yaml new file mode 100644 index 0000000..3ea9230 --- /dev/null +++ b/scenarios/gait/policy-allow-safe-tools/policy.yaml @@ -0,0 +1,6 @@ +default_verdict: block +rules: + - name: allow-safe-tools + effect: allow + match: + tool_names: [tool.read, tool.list, tool.info] diff --git a/scenarios/gait/policy-block-destructive/README.md b/scenarios/gait/policy-block-destructive/README.md new file mode 100644 index 0000000..c8e43e8 --- /dev/null +++ b/scenarios/gait/policy-block-destructive/README.md @@ -0,0 +1,3 @@ +# policy-block-destructive + +Ensures safe read/list/info intents are allowed while destructive intents are blocked. diff --git a/scenarios/gait/policy-block-destructive/expected-verdicts.jsonl b/scenarios/gait/policy-block-destructive/expected-verdicts.jsonl new file mode 100644 index 0000000..1305823 --- /dev/null +++ b/scenarios/gait/policy-block-destructive/expected-verdicts.jsonl @@ -0,0 +1,5 @@ +{"index":1,"tool_name":"tool.read","verdict":"allow","exit_code":0} +{"index":2,"tool_name":"tool.list","verdict":"allow","exit_code":0} +{"index":3,"tool_name":"tool.info","verdict":"allow","exit_code":0} +{"index":4,"tool_name":"tool.delete","verdict":"block","exit_code":3} +{"index":5,"tool_name":"tool.exec","verdict":"block","exit_code":3} diff --git a/scenarios/gait/policy-block-destructive/intents.jsonl b/scenarios/gait/policy-block-destructive/intents.jsonl new file mode 100644 index 0000000..726c57a --- /dev/null +++ b/scenarios/gait/policy-block-destructive/intents.jsonl @@ -0,0 +1,5 @@ +{"schema_id":"gait.gate.intent_request","schema_version":"1.0.0","created_at":"2026-02-18T00:00:00Z","producer_version":"0.0.0-dev","tool_name":"tool.read","args":{"path":"/tmp/input.txt"},"targets":[{"kind":"path","value":"/tmp/input.txt","operation":"read","endpoint_class":"fs.read"}],"arg_provenance":[{"arg_path":"$.path","source":"user"}],"context":{"identity":"agent.scenario","workspace":"/repo/gait","risk_class":"medium","session_id":"sess-tier11"}} +{"schema_id":"gait.gate.intent_request","schema_version":"1.0.0","created_at":"2026-02-18T00:00:00Z","producer_version":"0.0.0-dev","tool_name":"tool.list","args":{"path":"/tmp"},"targets":[{"kind":"path","value":"/tmp","operation":"read","endpoint_class":"fs.read"}],"arg_provenance":[{"arg_path":"$.path","source":"user"}],"context":{"identity":"agent.scenario","workspace":"/repo/gait","risk_class":"medium","session_id":"sess-tier11"}} +{"schema_id":"gait.gate.intent_request","schema_version":"1.0.0","created_at":"2026-02-18T00:00:00Z","producer_version":"0.0.0-dev","tool_name":"tool.info","args":{"path":"/tmp/info.txt"},"targets":[{"kind":"path","value":"/tmp/info.txt","operation":"read","endpoint_class":"fs.read"}],"arg_provenance":[{"arg_path":"$.path","source":"user"}],"context":{"identity":"agent.scenario","workspace":"/repo/gait","risk_class":"medium","session_id":"sess-tier11"}} +{"schema_id":"gait.gate.intent_request","schema_version":"1.0.0","created_at":"2026-02-18T00:00:00Z","producer_version":"0.0.0-dev","tool_name":"tool.delete","args":{"path":"/tmp/unsafe.txt"},"targets":[{"kind":"path","value":"/tmp/unsafe.txt","operation":"delete","endpoint_class":"fs.delete"}],"arg_provenance":[{"arg_path":"$.path","source":"user"}],"context":{"identity":"agent.scenario","workspace":"/repo/gait","risk_class":"high","session_id":"sess-tier11"}} +{"schema_id":"gait.gate.intent_request","schema_version":"1.0.0","created_at":"2026-02-18T00:00:00Z","producer_version":"0.0.0-dev","tool_name":"tool.exec","args":{"cmd":"rm -rf /tmp/unsafe"},"targets":[{"kind":"other","value":"rm -rf /tmp/unsafe","operation":"exec","endpoint_class":"proc.exec"}],"arg_provenance":[{"arg_path":"$.cmd","source":"user"}],"context":{"identity":"agent.scenario","workspace":"/repo/gait","risk_class":"high","session_id":"sess-tier11"}} diff --git a/scenarios/gait/policy-block-destructive/policy.yaml b/scenarios/gait/policy-block-destructive/policy.yaml new file mode 100644 index 0000000..dcd9a5e --- /dev/null +++ b/scenarios/gait/policy-block-destructive/policy.yaml @@ -0,0 +1,10 @@ +default_verdict: block +rules: + - name: allow-safe-tools + effect: allow + match: + tool_names: [tool.read, tool.list, tool.info] + - name: block-destructive-tools + effect: block + match: + tool_names: [tool.delete, tool.exec] diff --git a/scripts/run_scenarios.sh b/scripts/run_scenarios.sh new file mode 100755 index 0000000..103c586 --- /dev/null +++ b/scripts/run_scenarios.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +PRODUCT="${1:-}" + +if [[ -z "$PRODUCT" ]]; then + echo "usage: bash scripts/run_scenarios.sh " >&2 + exit 1 +fi +if [[ "$PRODUCT" != "gait" ]]; then + echo "unsupported product: $PRODUCT (expected: gait)" >&2 + exit 1 +fi + +bash "$SCRIPT_DIR/validate_scenarios.sh" + +BIN_DIR="$(mktemp -d)" +BIN_PATH="$BIN_DIR/gait" +OUT_FILE="$(mktemp)" + +cleanup() { + rm -f "$OUT_FILE" + rm -rf "$BIN_DIR" +} +trap cleanup EXIT + +( + cd "$REPO_ROOT" + go build -o "$BIN_PATH" ./cmd/gait +) + +set +e +( + cd "$REPO_ROOT" + GAIT_SCENARIO_BIN="$BIN_PATH" go test ./internal/scenarios -count=1 -tags=scenario -v | tee "$OUT_FILE" +) +status=${PIPESTATUS[0]} +set -e + +pass_count=$(grep -c -- '--- PASS: TestTier11Scenarios/' "$OUT_FILE" || true) +expected_count=8 +if [[ "$status" -ne 0 ]]; then + echo "scenario test execution failed for $PRODUCT" >&2 + exit "$status" +fi +if [[ "$pass_count" -ne "$expected_count" ]]; then + echo "scenario pass-count mismatch: got=$pass_count expected=$expected_count" >&2 + exit 1 +fi + +echo "$PRODUCT scenarios: $pass_count/$expected_count passed" diff --git a/scripts/validate_scenarios.sh b/scripts/validate_scenarios.sh new file mode 100755 index 0000000..e215436 --- /dev/null +++ b/scripts/validate_scenarios.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +SCENARIO_ROOT="$REPO_ROOT/scenarios/gait" + +required_scenarios=( + "approval-expiry-1s-past" + "approval-token-valid" + "concurrent-evaluation-10" + "delegation-chain-depth-3" + "dry-run-no-side-effects" + "pack-integrity-round-trip" + "policy-allow-safe-tools" + "policy-block-destructive" +) + +if [[ ! -d "$SCENARIO_ROOT" ]]; then + echo "scenario root missing: $SCENARIO_ROOT" >&2 + exit 1 +fi + +for scenario in "${required_scenarios[@]}"; do + if [[ ! -d "$SCENARIO_ROOT/$scenario" ]]; then + echo "missing scenario directory: $SCENARIO_ROOT/$scenario" >&2 + exit 1 + fi +done + +( + cd "$REPO_ROOT" + go test ./internal/scenarios -run TestValidateScenarioFixtures -count=1 +) + +echo "validated ${#required_scenarios[@]} scenarios in $SCENARIO_ROOT"