Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions internal/scenarios/fixtures.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,19 @@ import (
const scenarioRootRelativePath = "scenarios/gait"

var requiredScenarioMinimumFiles = map[string][]string{
"policy-block-destructive": {"README.md", "policy.yaml", "intents.jsonl", "expected-verdicts.jsonl"},
"policy-allow-safe-tools": {"README.md", "policy.yaml", "intents.jsonl", "expected-verdicts.jsonl"},
"dry-run-no-side-effects": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"},
"concurrent-evaluation-10": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"},
"pack-integrity-round-trip": {"README.md", "expected.yaml"},
"delegation-chain-depth-3": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml", "delegation-token-1.json", "delegation-token-2.json", "delegation-token-3.json", "delegation_public.key"},
"approval-expiry-1s-past": {"README.md", "policy.yaml", "intent.json", "expected.yaml", "approval-token.json", "approval_public.key"},
"approval-token-valid": {"README.md", "policy.yaml", "intent.json", "expected.yaml", "approval-token.json", "approval_public.key"},
"policy-block-destructive": {"README.md", "policy.yaml", "intents.jsonl", "expected-verdicts.jsonl"},
"policy-allow-safe-tools": {"README.md", "policy.yaml", "intents.jsonl", "expected-verdicts.jsonl"},
"dry-run-no-side-effects": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"},
"concurrent-evaluation-10": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"},
"pack-integrity-round-trip": {"README.md", "expected.yaml"},
"delegation-chain-depth-3": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml", "delegation-token-1.json", "delegation-token-2.json", "delegation-token-3.json", "delegation_public.key"},
"approval-expiry-1s-past": {"README.md", "policy.yaml", "intent.json", "expected.yaml", "approval-token.json", "approval_public.key"},
"approval-token-valid": {"README.md", "policy.yaml", "intent.json", "expected.yaml", "approval-token.json", "approval_public.key"},
"script-threshold-approval-determinism": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"},
"script-max-steps-exceeded": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"},
"script-mixed-risk-block": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"},
"wrkr-missing-fail-closed-high-risk": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"},
"approved-registry-signature-mismatch-high-risk": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml", "approved_scripts_tampered.json", "approval_public.key"},
}

func findRepoRoot(startDir string) (string, error) {
Expand Down
152 changes: 148 additions & 4 deletions internal/scenarios/scenario_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"os"
"os/exec"
"path/filepath"
"reflect"
"sort"
"strings"
"sync"
Expand All @@ -22,6 +23,10 @@ type scenarioFlags struct {
Simulate bool `yaml:"simulate"`
Concurrency int `yaml:"concurrency"`
DelegationChainFiles []string `yaml:"delegation_chain_files"`
Repeat int `yaml:"repeat"`
WrkrInventory string `yaml:"wrkr_inventory"`
ApprovedRegistry string `yaml:"approved_script_registry"`
ApprovedPublicKey string `yaml:"approved_script_public_key"`
}

type expectedYAML struct {
Expand All @@ -34,6 +39,15 @@ type expectedYAML struct {
ValidDelegations int `yaml:"valid_delegations"`
ReasonCodes []string `yaml:"reason_codes"`
ReasonCodesMustInclude []string `yaml:"reason_codes_must_include"`
Script *bool `yaml:"script"`
StepCount int `yaml:"step_count"`
StepVerdictCount int `yaml:"step_verdict_count"`
CompositeRiskClass string `yaml:"composite_risk_class"`
ContextSource string `yaml:"context_source"`
PreApproved *bool `yaml:"pre_approved"`
RegistryReason string `yaml:"registry_reason"`
OK *bool `yaml:"ok"`
ErrorContains string `yaml:"error_contains"`
}

type expectedVerdictLine struct {
Expand All @@ -44,10 +58,30 @@ type expectedVerdictLine struct {
}

type gateEvalOutput struct {
Verdict string `json:"verdict"`
ReasonCodes []string `json:"reason_codes"`
SimulateMode bool `json:"simulate_mode"`
ValidDelegations int `json:"valid_delegations"`
OK bool `json:"ok"`
Verdict string `json:"verdict"`
ReasonCodes []string `json:"reason_codes"`
SimulateMode bool `json:"simulate_mode"`
ValidDelegations int `json:"valid_delegations"`
Script bool `json:"script"`
StepCount int `json:"step_count"`
ScriptHash string `json:"script_hash"`
CompositeRiskClass string `json:"composite_risk_class"`
StepVerdicts []stepVerdict `json:"step_verdicts"`
ContextSource string `json:"context_source"`
PreApproved bool `json:"pre_approved"`
PatternID string `json:"pattern_id"`
RegistryReason string `json:"registry_reason"`
Error string `json:"error"`
}

type stepVerdict struct {
Index int `json:"index"`
ToolName string `json:"tool_name"`
Verdict string `json:"verdict"`
ReasonCodes []string `json:"reason_codes"`
Violations []string `json:"violations"`
MatchedRule string `json:"matched_rule"`
}

type packVerifyOutput struct {
Expand Down Expand Up @@ -99,6 +133,12 @@ func runScenario(t *testing.T, repoRoot string, binaryPath string, name string,
runDelegationScenario(t, repoRoot, binaryPath, scenarioPath)
case "approval-expiry-1s-past", "approval-token-valid":
runApprovalScenario(t, repoRoot, binaryPath, scenarioPath)
case "script-threshold-approval-determinism",
"script-max-steps-exceeded",
"script-mixed-risk-block",
"wrkr-missing-fail-closed-high-risk",
"approved-registry-signature-mismatch-high-risk":
runScriptGovernanceScenario(t, repoRoot, binaryPath, scenarioPath)
default:
t.Fatalf("unsupported scenario: %s", name)
}
Expand Down Expand Up @@ -342,6 +382,110 @@ func runApprovalScenario(t *testing.T, repoRoot string, binaryPath string, scena
}
}

func runScriptGovernanceScenario(t *testing.T, repoRoot string, binaryPath string, scenarioPath string) {
expected := readExpectedYAML(t, filepath.Join(scenarioPath, "expected.yaml"))
flags := readScenarioFlags(t, filepath.Join(scenarioPath, "flags.yaml"))
repeats := flags.Repeat
if repeats <= 0 {
repeats = 1
}

var baseline *gateEvalOutput
for attempt := 0; attempt < repeats; attempt++ {
args := []string{
"gate", "eval",
"--policy", filepath.Join(scenarioPath, "policy.yaml"),
"--intent", filepath.Join(scenarioPath, "intent.json"),
"--json",
}
if strings.TrimSpace(flags.WrkrInventory) != "" {
args = append(args, "--wrkr-inventory", filepath.Join(scenarioPath, flags.WrkrInventory))
}
if strings.TrimSpace(flags.ApprovedRegistry) != "" {
args = append(args, "--approved-script-registry", filepath.Join(scenarioPath, flags.ApprovedRegistry))
}
if strings.TrimSpace(flags.ApprovedPublicKey) != "" {
args = append(args, "--approved-script-public-key", filepath.Join(scenarioPath, flags.ApprovedPublicKey))
}

output, code := mustRunCommand(t, t.TempDir(), binaryPath, args...)
if code != expected.ExitCode {
t.Fatalf("script governance exit mismatch: got=%d want=%d output=%s", code, expected.ExitCode, output)
}

var got gateEvalOutput
if err := json.Unmarshal([]byte(output), &got); err != nil {
t.Fatalf("parse script governance output: %v output=%s", err, output)
}
assertScriptGovernanceOutput(t, expected, got, output)

if repeats > 1 {
current := got
if baseline == nil {
baseline = &current
} else {
if current.ScriptHash != baseline.ScriptHash {
t.Fatalf("non-deterministic script_hash across runs: first=%s next=%s", baseline.ScriptHash, current.ScriptHash)
}
if current.Verdict != baseline.Verdict {
t.Fatalf("non-deterministic verdict across runs: first=%s next=%s", baseline.Verdict, current.Verdict)
}
if !reflect.DeepEqual(current.ReasonCodes, baseline.ReasonCodes) {
t.Fatalf("non-deterministic reason_codes across runs: first=%v next=%v", baseline.ReasonCodes, current.ReasonCodes)
}
if !reflect.DeepEqual(current.StepVerdicts, baseline.StepVerdicts) {
t.Fatalf("non-deterministic step_verdicts across runs: first=%v next=%v", baseline.StepVerdicts, current.StepVerdicts)
}
}
}
}
}

func assertScriptGovernanceOutput(t *testing.T, expected expectedYAML, got gateEvalOutput, raw string) {
t.Helper()
if expected.OK != nil && got.OK != *expected.OK {
t.Fatalf("unexpected ok field: got=%v want=%v output=%s", got.OK, *expected.OK, raw)
}
if expected.Verdict != "" && got.Verdict != expected.Verdict {
t.Fatalf("unexpected verdict: got=%s want=%s output=%s", got.Verdict, expected.Verdict, raw)
}
if expected.Script != nil && got.Script != *expected.Script {
t.Fatalf("unexpected script flag: got=%v want=%v output=%s", got.Script, *expected.Script, raw)
}
if expected.StepCount > 0 && got.StepCount != expected.StepCount {
t.Fatalf("unexpected step_count: got=%d want=%d output=%s", got.StepCount, expected.StepCount, raw)
}
if expected.StepVerdictCount > 0 && len(got.StepVerdicts) != expected.StepVerdictCount {
t.Fatalf("unexpected step_verdict_count: got=%d want=%d output=%s", len(got.StepVerdicts), expected.StepVerdictCount, raw)
}
if expected.CompositeRiskClass != "" && got.CompositeRiskClass != expected.CompositeRiskClass {
t.Fatalf("unexpected composite_risk_class: got=%s want=%s output=%s", got.CompositeRiskClass, expected.CompositeRiskClass, raw)
}
if expected.ContextSource != "" && got.ContextSource != expected.ContextSource {
t.Fatalf("unexpected context_source: got=%s want=%s output=%s", got.ContextSource, expected.ContextSource, raw)
}
if expected.PreApproved != nil && got.PreApproved != *expected.PreApproved {
t.Fatalf("unexpected pre_approved: got=%v want=%v output=%s", got.PreApproved, *expected.PreApproved, raw)
}
if expected.RegistryReason != "" && got.RegistryReason != expected.RegistryReason {
t.Fatalf("unexpected registry_reason: got=%s want=%s output=%s", got.RegistryReason, expected.RegistryReason, raw)
}
if expected.ErrorContains != "" && !strings.Contains(got.Error, expected.ErrorContains) {
t.Fatalf("missing expected error substring %q in %q output=%s", expected.ErrorContains, got.Error, raw)
}

for _, required := range expected.ReasonCodes {
if !contains(got.ReasonCodes, required) {
t.Fatalf("missing required reason code %q in %v output=%s", required, got.ReasonCodes, raw)
}
}
for _, required := range expected.ReasonCodesMustInclude {
if !contains(got.ReasonCodes, required) {
t.Fatalf("missing required reason code %q in %v output=%s", required, got.ReasonCodes, raw)
}
}
}

func buildGaitBinary(t *testing.T, repoRoot string) string {
t.Helper()
if prebuilt := strings.TrimSpace(os.Getenv("GAIT_SCENARIO_BIN")); prebuilt != "" {
Expand Down
7 changes: 7 additions & 0 deletions scenarios/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Scenario Changelog

## 2026-02-20

- Expanded `scenarios/gait/` from 8 to 13 fixtures.
- Added script-governance fixtures for threshold approvals, max-step blocking, and mixed-risk blocking.
- Added fail-closed fixtures for missing Wrkr inventory and approved-script registry signature mismatch in high-risk contexts.
- Extended scenario harness assertions for script metadata fields, deterministic repeat checks, and fail-closed error expectations.

## 2026-02-18

- Added Tier 11 `scenarios/gait/` fixture corpus with eight deterministic scenarios.
Expand Down
5 changes: 5 additions & 0 deletions scenarios/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,8 @@ All fixtures are offline-first and deterministic.
6. `delegation-chain-depth-3`
7. `approval-expiry-1s-past`
8. `approval-token-valid`
9. `script-threshold-approval-determinism`
10. `script-max-steps-exceeded`
11. `script-mixed-risk-block`
12. `wrkr-missing-fail-closed-high-risk`
13. `approved-registry-signature-mismatch-high-risk`
7 changes: 7 additions & 0 deletions scenarios/gait/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,10 @@ Each scenario directory contains:
- input artifacts (`policy.yaml`, `intent.json`, `intents.jsonl`, token files, etc.)
- expected artifacts (`expected.yaml` or `expected-verdicts.jsonl`)
- optional `flags.yaml` for execution options

The suite includes baseline policy/pack/delegation/approval scenarios plus script-governance fixtures for:

- script threshold approvals and deterministic script metadata
- script max-step and mixed-risk policy controls
- Wrkr inventory fail-closed behavior in high-risk contexts
- approved-script registry signature mismatch fail-closed behavior
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# approved-registry-signature-mismatch-high-risk

Validates fail-closed blocking when approved-script registry signature verification fails for high-risk intent context.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2452IFcJJprSJUSaASiKpWRN4bzs7KbckliBk2cKGeg=
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"entries": [
{
"schema_id": "gait.gate.approved_script_entry",
"schema_version": "1.0.0",
"created_at": "2026-02-20T02:31:28.283948Z",
"producer_version": "0.0.0-dev",
"pattern_id": "pattern_a0dce61c1eb7",
"policy_digest": "44ae19688443915c381be9f806b7fc9ddb2736d286c4e40464b7575a874e9420",
"script_hash": "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
"tool_sequence": [
"tool.read",
"tool.write"
],
"approver_identity": "secops",
"expires_at": "2026-02-27T02:31:28.283948Z",
"signature": {
"alg": "ed25519",
"key_id": "8393424682c702d59a923d26924b2d4827711e6c585117077766645e22efeb52",
"sig": "uS11HrmDO+D1b/IaSosCRfEmolp5FTP2J6fWsMHg/Nvn1TAVaT24VUJdnJrfbcZTbUmW0WqQ4+FnoalnYMpVCg==",
"signed_digest": "1e19eb1f8f11a450139cbe01ad9081be99a455c51157ed5f9793e08dc613dce1"
}
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
exit_code: 3
ok: false
error_contains: approved script registry verification failed at entry 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
repeat: 1
approved_script_registry: approved_scripts_tampered.json
approved_script_public_key: approval_public.key
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"schema_id": "gait.gate.intent_request",
"schema_version": "1.0.0",
"created_at": "2026-02-20T00:00:00Z",
"producer_version": "scenario-approved-registry-mismatch",
"tool_name": "script",
"args": {},
"targets": [],
"context": {
"identity": "alice",
"workspace": "/repo/gait",
"risk_class": "high"
},
"script": {
"steps": [
{
"tool_name": "tool.read",
"args": {
"path": "/tmp/input.txt"
},
"targets": [
{
"kind": "path",
"value": "/tmp/input.txt",
"operation": "read",
"endpoint_class": "fs.read"
}
]
},
{
"tool_name": "tool.write",
"args": {
"path": "/tmp/output.txt"
},
"targets": [
{
"kind": "path",
"value": "/tmp/output.txt",
"operation": "write",
"endpoint_class": "fs.write"
}
]
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
schema_id: gait.gate.policy
schema_version: 1.0.0
default_verdict: allow
rules:
- name: require_write_approval
priority: 10
effect: require_approval
match:
tool_names: [tool.write]
reason_codes: [approval_required_write]
3 changes: 3 additions & 0 deletions scenarios/gait/script-max-steps-exceeded/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# script-max-steps-exceeded

Validates script fail-closed block behavior when `scripts.max_steps` is exceeded.
9 changes: 9 additions & 0 deletions scenarios/gait/script-max-steps-exceeded/expected.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
exit_code: 3
ok: true
verdict: block
script: true
step_count: 2
step_verdict_count: 2
composite_risk_class: medium
reason_codes_must_include:
- script_max_steps_exceeded
1 change: 1 addition & 0 deletions scenarios/gait/script-max-steps-exceeded/flags.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
repeat: 1
Loading
Loading