diff --git a/internal/scenarios/fixtures.go b/internal/scenarios/fixtures.go index 4e38936..4fb27c4 100644 --- a/internal/scenarios/fixtures.go +++ b/internal/scenarios/fixtures.go @@ -9,14 +9,19 @@ import ( const scenarioRootRelativePath = "scenarios/gait" var requiredScenarioMinimumFiles = map[string][]string{ - "policy-block-destructive": {"README.md", "policy.yaml", "intents.jsonl", "expected-verdicts.jsonl"}, - "policy-allow-safe-tools": {"README.md", "policy.yaml", "intents.jsonl", "expected-verdicts.jsonl"}, - "dry-run-no-side-effects": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"}, - "concurrent-evaluation-10": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"}, - "pack-integrity-round-trip": {"README.md", "expected.yaml"}, - "delegation-chain-depth-3": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml", "delegation-token-1.json", "delegation-token-2.json", "delegation-token-3.json", "delegation_public.key"}, - "approval-expiry-1s-past": {"README.md", "policy.yaml", "intent.json", "expected.yaml", "approval-token.json", "approval_public.key"}, - "approval-token-valid": {"README.md", "policy.yaml", "intent.json", "expected.yaml", "approval-token.json", "approval_public.key"}, + "policy-block-destructive": {"README.md", "policy.yaml", "intents.jsonl", "expected-verdicts.jsonl"}, + "policy-allow-safe-tools": {"README.md", "policy.yaml", "intents.jsonl", "expected-verdicts.jsonl"}, + "dry-run-no-side-effects": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"}, + "concurrent-evaluation-10": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"}, + "pack-integrity-round-trip": {"README.md", "expected.yaml"}, + "delegation-chain-depth-3": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml", "delegation-token-1.json", "delegation-token-2.json", "delegation-token-3.json", "delegation_public.key"}, + "approval-expiry-1s-past": {"README.md", "policy.yaml", "intent.json", "expected.yaml", "approval-token.json", "approval_public.key"}, + "approval-token-valid": {"README.md", "policy.yaml", "intent.json", "expected.yaml", "approval-token.json", "approval_public.key"}, + "script-threshold-approval-determinism": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"}, + "script-max-steps-exceeded": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"}, + "script-mixed-risk-block": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"}, + "wrkr-missing-fail-closed-high-risk": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml"}, + "approved-registry-signature-mismatch-high-risk": {"README.md", "policy.yaml", "intent.json", "flags.yaml", "expected.yaml", "approved_scripts_tampered.json", "approval_public.key"}, } func findRepoRoot(startDir string) (string, error) { diff --git a/internal/scenarios/scenario_test.go b/internal/scenarios/scenario_test.go index 99ed6f3..1ecaf55 100644 --- a/internal/scenarios/scenario_test.go +++ b/internal/scenarios/scenario_test.go @@ -10,6 +10,7 @@ import ( "os" "os/exec" "path/filepath" + "reflect" "sort" "strings" "sync" @@ -22,6 +23,10 @@ type scenarioFlags struct { Simulate bool `yaml:"simulate"` Concurrency int `yaml:"concurrency"` DelegationChainFiles []string `yaml:"delegation_chain_files"` + Repeat int `yaml:"repeat"` + WrkrInventory string `yaml:"wrkr_inventory"` + ApprovedRegistry string `yaml:"approved_script_registry"` + ApprovedPublicKey string `yaml:"approved_script_public_key"` } type expectedYAML struct { @@ -34,6 +39,15 @@ type expectedYAML struct { ValidDelegations int `yaml:"valid_delegations"` ReasonCodes []string `yaml:"reason_codes"` ReasonCodesMustInclude []string `yaml:"reason_codes_must_include"` + Script *bool `yaml:"script"` + StepCount int `yaml:"step_count"` + StepVerdictCount int `yaml:"step_verdict_count"` + CompositeRiskClass string `yaml:"composite_risk_class"` + ContextSource string `yaml:"context_source"` + PreApproved *bool `yaml:"pre_approved"` + RegistryReason string `yaml:"registry_reason"` + OK *bool `yaml:"ok"` + ErrorContains string `yaml:"error_contains"` } type expectedVerdictLine struct { @@ -44,10 +58,30 @@ type expectedVerdictLine struct { } type gateEvalOutput struct { - Verdict string `json:"verdict"` - ReasonCodes []string `json:"reason_codes"` - SimulateMode bool `json:"simulate_mode"` - ValidDelegations int `json:"valid_delegations"` + OK bool `json:"ok"` + Verdict string `json:"verdict"` + ReasonCodes []string `json:"reason_codes"` + SimulateMode bool `json:"simulate_mode"` + ValidDelegations int `json:"valid_delegations"` + Script bool `json:"script"` + StepCount int `json:"step_count"` + ScriptHash string `json:"script_hash"` + CompositeRiskClass string `json:"composite_risk_class"` + StepVerdicts []stepVerdict `json:"step_verdicts"` + ContextSource string `json:"context_source"` + PreApproved bool `json:"pre_approved"` + PatternID string `json:"pattern_id"` + RegistryReason string `json:"registry_reason"` + Error string `json:"error"` +} + +type stepVerdict struct { + Index int `json:"index"` + ToolName string `json:"tool_name"` + Verdict string `json:"verdict"` + ReasonCodes []string `json:"reason_codes"` + Violations []string `json:"violations"` + MatchedRule string `json:"matched_rule"` } type packVerifyOutput struct { @@ -99,6 +133,12 @@ func runScenario(t *testing.T, repoRoot string, binaryPath string, name string, runDelegationScenario(t, repoRoot, binaryPath, scenarioPath) case "approval-expiry-1s-past", "approval-token-valid": runApprovalScenario(t, repoRoot, binaryPath, scenarioPath) + case "script-threshold-approval-determinism", + "script-max-steps-exceeded", + "script-mixed-risk-block", + "wrkr-missing-fail-closed-high-risk", + "approved-registry-signature-mismatch-high-risk": + runScriptGovernanceScenario(t, repoRoot, binaryPath, scenarioPath) default: t.Fatalf("unsupported scenario: %s", name) } @@ -342,6 +382,110 @@ func runApprovalScenario(t *testing.T, repoRoot string, binaryPath string, scena } } +func runScriptGovernanceScenario(t *testing.T, repoRoot string, binaryPath string, scenarioPath string) { + expected := readExpectedYAML(t, filepath.Join(scenarioPath, "expected.yaml")) + flags := readScenarioFlags(t, filepath.Join(scenarioPath, "flags.yaml")) + repeats := flags.Repeat + if repeats <= 0 { + repeats = 1 + } + + var baseline *gateEvalOutput + for attempt := 0; attempt < repeats; attempt++ { + args := []string{ + "gate", "eval", + "--policy", filepath.Join(scenarioPath, "policy.yaml"), + "--intent", filepath.Join(scenarioPath, "intent.json"), + "--json", + } + if strings.TrimSpace(flags.WrkrInventory) != "" { + args = append(args, "--wrkr-inventory", filepath.Join(scenarioPath, flags.WrkrInventory)) + } + if strings.TrimSpace(flags.ApprovedRegistry) != "" { + args = append(args, "--approved-script-registry", filepath.Join(scenarioPath, flags.ApprovedRegistry)) + } + if strings.TrimSpace(flags.ApprovedPublicKey) != "" { + args = append(args, "--approved-script-public-key", filepath.Join(scenarioPath, flags.ApprovedPublicKey)) + } + + output, code := mustRunCommand(t, t.TempDir(), binaryPath, args...) + if code != expected.ExitCode { + t.Fatalf("script governance exit mismatch: got=%d want=%d output=%s", code, expected.ExitCode, output) + } + + var got gateEvalOutput + if err := json.Unmarshal([]byte(output), &got); err != nil { + t.Fatalf("parse script governance output: %v output=%s", err, output) + } + assertScriptGovernanceOutput(t, expected, got, output) + + if repeats > 1 { + current := got + if baseline == nil { + baseline = ¤t + } else { + if current.ScriptHash != baseline.ScriptHash { + t.Fatalf("non-deterministic script_hash across runs: first=%s next=%s", baseline.ScriptHash, current.ScriptHash) + } + if current.Verdict != baseline.Verdict { + t.Fatalf("non-deterministic verdict across runs: first=%s next=%s", baseline.Verdict, current.Verdict) + } + if !reflect.DeepEqual(current.ReasonCodes, baseline.ReasonCodes) { + t.Fatalf("non-deterministic reason_codes across runs: first=%v next=%v", baseline.ReasonCodes, current.ReasonCodes) + } + if !reflect.DeepEqual(current.StepVerdicts, baseline.StepVerdicts) { + t.Fatalf("non-deterministic step_verdicts across runs: first=%v next=%v", baseline.StepVerdicts, current.StepVerdicts) + } + } + } + } +} + +func assertScriptGovernanceOutput(t *testing.T, expected expectedYAML, got gateEvalOutput, raw string) { + t.Helper() + if expected.OK != nil && got.OK != *expected.OK { + t.Fatalf("unexpected ok field: got=%v want=%v output=%s", got.OK, *expected.OK, raw) + } + if expected.Verdict != "" && got.Verdict != expected.Verdict { + t.Fatalf("unexpected verdict: got=%s want=%s output=%s", got.Verdict, expected.Verdict, raw) + } + if expected.Script != nil && got.Script != *expected.Script { + t.Fatalf("unexpected script flag: got=%v want=%v output=%s", got.Script, *expected.Script, raw) + } + if expected.StepCount > 0 && got.StepCount != expected.StepCount { + t.Fatalf("unexpected step_count: got=%d want=%d output=%s", got.StepCount, expected.StepCount, raw) + } + if expected.StepVerdictCount > 0 && len(got.StepVerdicts) != expected.StepVerdictCount { + t.Fatalf("unexpected step_verdict_count: got=%d want=%d output=%s", len(got.StepVerdicts), expected.StepVerdictCount, raw) + } + if expected.CompositeRiskClass != "" && got.CompositeRiskClass != expected.CompositeRiskClass { + t.Fatalf("unexpected composite_risk_class: got=%s want=%s output=%s", got.CompositeRiskClass, expected.CompositeRiskClass, raw) + } + if expected.ContextSource != "" && got.ContextSource != expected.ContextSource { + t.Fatalf("unexpected context_source: got=%s want=%s output=%s", got.ContextSource, expected.ContextSource, raw) + } + if expected.PreApproved != nil && got.PreApproved != *expected.PreApproved { + t.Fatalf("unexpected pre_approved: got=%v want=%v output=%s", got.PreApproved, *expected.PreApproved, raw) + } + if expected.RegistryReason != "" && got.RegistryReason != expected.RegistryReason { + t.Fatalf("unexpected registry_reason: got=%s want=%s output=%s", got.RegistryReason, expected.RegistryReason, raw) + } + if expected.ErrorContains != "" && !strings.Contains(got.Error, expected.ErrorContains) { + t.Fatalf("missing expected error substring %q in %q output=%s", expected.ErrorContains, got.Error, raw) + } + + for _, required := range expected.ReasonCodes { + if !contains(got.ReasonCodes, required) { + t.Fatalf("missing required reason code %q in %v output=%s", required, got.ReasonCodes, raw) + } + } + for _, required := range expected.ReasonCodesMustInclude { + if !contains(got.ReasonCodes, required) { + t.Fatalf("missing required reason code %q in %v output=%s", required, got.ReasonCodes, raw) + } + } +} + func buildGaitBinary(t *testing.T, repoRoot string) string { t.Helper() if prebuilt := strings.TrimSpace(os.Getenv("GAIT_SCENARIO_BIN")); prebuilt != "" { diff --git a/scenarios/CHANGELOG.md b/scenarios/CHANGELOG.md index 43cc9dc..8884966 100644 --- a/scenarios/CHANGELOG.md +++ b/scenarios/CHANGELOG.md @@ -1,5 +1,12 @@ # Scenario Changelog +## 2026-02-20 + +- Expanded `scenarios/gait/` from 8 to 13 fixtures. +- Added script-governance fixtures for threshold approvals, max-step blocking, and mixed-risk blocking. +- Added fail-closed fixtures for missing Wrkr inventory and approved-script registry signature mismatch in high-risk contexts. +- Extended scenario harness assertions for script metadata fields, deterministic repeat checks, and fail-closed error expectations. + ## 2026-02-18 - Added Tier 11 `scenarios/gait/` fixture corpus with eight deterministic scenarios. diff --git a/scenarios/README.md b/scenarios/README.md index ac22631..237a9d6 100644 --- a/scenarios/README.md +++ b/scenarios/README.md @@ -25,3 +25,8 @@ All fixtures are offline-first and deterministic. 6. `delegation-chain-depth-3` 7. `approval-expiry-1s-past` 8. `approval-token-valid` +9. `script-threshold-approval-determinism` +10. `script-max-steps-exceeded` +11. `script-mixed-risk-block` +12. `wrkr-missing-fail-closed-high-risk` +13. `approved-registry-signature-mismatch-high-risk` diff --git a/scenarios/gait/README.md b/scenarios/gait/README.md index 291d2dc..25eb18e 100644 --- a/scenarios/gait/README.md +++ b/scenarios/gait/README.md @@ -8,3 +8,10 @@ Each scenario directory contains: - input artifacts (`policy.yaml`, `intent.json`, `intents.jsonl`, token files, etc.) - expected artifacts (`expected.yaml` or `expected-verdicts.jsonl`) - optional `flags.yaml` for execution options + +The suite includes baseline policy/pack/delegation/approval scenarios plus script-governance fixtures for: + +- script threshold approvals and deterministic script metadata +- script max-step and mixed-risk policy controls +- Wrkr inventory fail-closed behavior in high-risk contexts +- approved-script registry signature mismatch fail-closed behavior diff --git a/scenarios/gait/approved-registry-signature-mismatch-high-risk/README.md b/scenarios/gait/approved-registry-signature-mismatch-high-risk/README.md new file mode 100644 index 0000000..9caff21 --- /dev/null +++ b/scenarios/gait/approved-registry-signature-mismatch-high-risk/README.md @@ -0,0 +1,3 @@ +# approved-registry-signature-mismatch-high-risk + +Validates fail-closed blocking when approved-script registry signature verification fails for high-risk intent context. diff --git a/scenarios/gait/approved-registry-signature-mismatch-high-risk/approval_public.key b/scenarios/gait/approved-registry-signature-mismatch-high-risk/approval_public.key new file mode 100644 index 0000000..9d1c2d1 --- /dev/null +++ b/scenarios/gait/approved-registry-signature-mismatch-high-risk/approval_public.key @@ -0,0 +1 @@ +2452IFcJJprSJUSaASiKpWRN4bzs7KbckliBk2cKGeg= diff --git a/scenarios/gait/approved-registry-signature-mismatch-high-risk/approved_scripts_tampered.json b/scenarios/gait/approved-registry-signature-mismatch-high-risk/approved_scripts_tampered.json new file mode 100644 index 0000000..0aa7c91 --- /dev/null +++ b/scenarios/gait/approved-registry-signature-mismatch-high-risk/approved_scripts_tampered.json @@ -0,0 +1,25 @@ +{ + "entries": [ + { + "schema_id": "gait.gate.approved_script_entry", + "schema_version": "1.0.0", + "created_at": "2026-02-20T02:31:28.283948Z", + "producer_version": "0.0.0-dev", + "pattern_id": "pattern_a0dce61c1eb7", + "policy_digest": "44ae19688443915c381be9f806b7fc9ddb2736d286c4e40464b7575a874e9420", + "script_hash": "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + "tool_sequence": [ + "tool.read", + "tool.write" + ], + "approver_identity": "secops", + "expires_at": "2026-02-27T02:31:28.283948Z", + "signature": { + "alg": "ed25519", + "key_id": "8393424682c702d59a923d26924b2d4827711e6c585117077766645e22efeb52", + "sig": "uS11HrmDO+D1b/IaSosCRfEmolp5FTP2J6fWsMHg/Nvn1TAVaT24VUJdnJrfbcZTbUmW0WqQ4+FnoalnYMpVCg==", + "signed_digest": "1e19eb1f8f11a450139cbe01ad9081be99a455c51157ed5f9793e08dc613dce1" + } + } + ] +} diff --git a/scenarios/gait/approved-registry-signature-mismatch-high-risk/expected.yaml b/scenarios/gait/approved-registry-signature-mismatch-high-risk/expected.yaml new file mode 100644 index 0000000..c67bbe2 --- /dev/null +++ b/scenarios/gait/approved-registry-signature-mismatch-high-risk/expected.yaml @@ -0,0 +1,3 @@ +exit_code: 3 +ok: false +error_contains: approved script registry verification failed at entry 0 diff --git a/scenarios/gait/approved-registry-signature-mismatch-high-risk/flags.yaml b/scenarios/gait/approved-registry-signature-mismatch-high-risk/flags.yaml new file mode 100644 index 0000000..2ddf4a6 --- /dev/null +++ b/scenarios/gait/approved-registry-signature-mismatch-high-risk/flags.yaml @@ -0,0 +1,3 @@ +repeat: 1 +approved_script_registry: approved_scripts_tampered.json +approved_script_public_key: approval_public.key diff --git a/scenarios/gait/approved-registry-signature-mismatch-high-risk/intent.json b/scenarios/gait/approved-registry-signature-mismatch-high-risk/intent.json new file mode 100644 index 0000000..d9f1d3a --- /dev/null +++ b/scenarios/gait/approved-registry-signature-mismatch-high-risk/intent.json @@ -0,0 +1,46 @@ +{ + "schema_id": "gait.gate.intent_request", + "schema_version": "1.0.0", + "created_at": "2026-02-20T00:00:00Z", + "producer_version": "scenario-approved-registry-mismatch", + "tool_name": "script", + "args": {}, + "targets": [], + "context": { + "identity": "alice", + "workspace": "/repo/gait", + "risk_class": "high" + }, + "script": { + "steps": [ + { + "tool_name": "tool.read", + "args": { + "path": "/tmp/input.txt" + }, + "targets": [ + { + "kind": "path", + "value": "/tmp/input.txt", + "operation": "read", + "endpoint_class": "fs.read" + } + ] + }, + { + "tool_name": "tool.write", + "args": { + "path": "/tmp/output.txt" + }, + "targets": [ + { + "kind": "path", + "value": "/tmp/output.txt", + "operation": "write", + "endpoint_class": "fs.write" + } + ] + } + ] + } +} diff --git a/scenarios/gait/approved-registry-signature-mismatch-high-risk/policy.yaml b/scenarios/gait/approved-registry-signature-mismatch-high-risk/policy.yaml new file mode 100644 index 0000000..adcfff9 --- /dev/null +++ b/scenarios/gait/approved-registry-signature-mismatch-high-risk/policy.yaml @@ -0,0 +1,10 @@ +schema_id: gait.gate.policy +schema_version: 1.0.0 +default_verdict: allow +rules: + - name: require_write_approval + priority: 10 + effect: require_approval + match: + tool_names: [tool.write] + reason_codes: [approval_required_write] diff --git a/scenarios/gait/script-max-steps-exceeded/README.md b/scenarios/gait/script-max-steps-exceeded/README.md new file mode 100644 index 0000000..285830f --- /dev/null +++ b/scenarios/gait/script-max-steps-exceeded/README.md @@ -0,0 +1,3 @@ +# script-max-steps-exceeded + +Validates script fail-closed block behavior when `scripts.max_steps` is exceeded. diff --git a/scenarios/gait/script-max-steps-exceeded/expected.yaml b/scenarios/gait/script-max-steps-exceeded/expected.yaml new file mode 100644 index 0000000..2d3e4e2 --- /dev/null +++ b/scenarios/gait/script-max-steps-exceeded/expected.yaml @@ -0,0 +1,9 @@ +exit_code: 3 +ok: true +verdict: block +script: true +step_count: 2 +step_verdict_count: 2 +composite_risk_class: medium +reason_codes_must_include: + - script_max_steps_exceeded diff --git a/scenarios/gait/script-max-steps-exceeded/flags.yaml b/scenarios/gait/script-max-steps-exceeded/flags.yaml new file mode 100644 index 0000000..7cf88e6 --- /dev/null +++ b/scenarios/gait/script-max-steps-exceeded/flags.yaml @@ -0,0 +1 @@ +repeat: 1 diff --git a/scenarios/gait/script-max-steps-exceeded/intent.json b/scenarios/gait/script-max-steps-exceeded/intent.json new file mode 100644 index 0000000..4bce038 --- /dev/null +++ b/scenarios/gait/script-max-steps-exceeded/intent.json @@ -0,0 +1,46 @@ +{ + "schema_id": "gait.gate.intent_request", + "schema_version": "1.0.0", + "created_at": "2026-02-20T00:00:00Z", + "producer_version": "scenario-script-threshold", + "tool_name": "script", + "args": {}, + "targets": [], + "context": { + "identity": "alice", + "workspace": "/repo/gait", + "risk_class": "medium" + }, + "script": { + "steps": [ + { + "tool_name": "tool.read", + "args": { + "path": "/tmp/input.txt" + }, + "targets": [ + { + "kind": "path", + "value": "/tmp/input.txt", + "operation": "read", + "endpoint_class": "fs.read" + } + ] + }, + { + "tool_name": "tool.write", + "args": { + "path": "/tmp/output.txt" + }, + "targets": [ + { + "kind": "path", + "value": "/tmp/output.txt", + "operation": "write", + "endpoint_class": "fs.write" + } + ] + } + ] + } +} diff --git a/scenarios/gait/script-max-steps-exceeded/policy.yaml b/scenarios/gait/script-max-steps-exceeded/policy.yaml new file mode 100644 index 0000000..289edd2 --- /dev/null +++ b/scenarios/gait/script-max-steps-exceeded/policy.yaml @@ -0,0 +1,5 @@ +schema_id: gait.gate.policy +schema_version: 1.0.0 +default_verdict: allow +scripts: + max_steps: 1 diff --git a/scenarios/gait/script-mixed-risk-block/README.md b/scenarios/gait/script-mixed-risk-block/README.md new file mode 100644 index 0000000..d29c344 --- /dev/null +++ b/scenarios/gait/script-mixed-risk-block/README.md @@ -0,0 +1,3 @@ +# script-mixed-risk-block + +Validates mixed-risk script blocking when `scripts.block_mixed_risk` is enabled. diff --git a/scenarios/gait/script-mixed-risk-block/expected.yaml b/scenarios/gait/script-mixed-risk-block/expected.yaml new file mode 100644 index 0000000..a0b2a9a --- /dev/null +++ b/scenarios/gait/script-mixed-risk-block/expected.yaml @@ -0,0 +1,9 @@ +exit_code: 3 +ok: true +verdict: block +script: true +step_count: 2 +step_verdict_count: 2 +composite_risk_class: medium +reason_codes_must_include: + - script_mixed_risk_blocked diff --git a/scenarios/gait/script-mixed-risk-block/flags.yaml b/scenarios/gait/script-mixed-risk-block/flags.yaml new file mode 100644 index 0000000..7cf88e6 --- /dev/null +++ b/scenarios/gait/script-mixed-risk-block/flags.yaml @@ -0,0 +1 @@ +repeat: 1 diff --git a/scenarios/gait/script-mixed-risk-block/intent.json b/scenarios/gait/script-mixed-risk-block/intent.json new file mode 100644 index 0000000..4bce038 --- /dev/null +++ b/scenarios/gait/script-mixed-risk-block/intent.json @@ -0,0 +1,46 @@ +{ + "schema_id": "gait.gate.intent_request", + "schema_version": "1.0.0", + "created_at": "2026-02-20T00:00:00Z", + "producer_version": "scenario-script-threshold", + "tool_name": "script", + "args": {}, + "targets": [], + "context": { + "identity": "alice", + "workspace": "/repo/gait", + "risk_class": "medium" + }, + "script": { + "steps": [ + { + "tool_name": "tool.read", + "args": { + "path": "/tmp/input.txt" + }, + "targets": [ + { + "kind": "path", + "value": "/tmp/input.txt", + "operation": "read", + "endpoint_class": "fs.read" + } + ] + }, + { + "tool_name": "tool.write", + "args": { + "path": "/tmp/output.txt" + }, + "targets": [ + { + "kind": "path", + "value": "/tmp/output.txt", + "operation": "write", + "endpoint_class": "fs.write" + } + ] + } + ] + } +} diff --git a/scenarios/gait/script-mixed-risk-block/policy.yaml b/scenarios/gait/script-mixed-risk-block/policy.yaml new file mode 100644 index 0000000..e11871f --- /dev/null +++ b/scenarios/gait/script-mixed-risk-block/policy.yaml @@ -0,0 +1,5 @@ +schema_id: gait.gate.policy +schema_version: 1.0.0 +default_verdict: allow +scripts: + block_mixed_risk: true diff --git a/scenarios/gait/script-threshold-approval-determinism/README.md b/scenarios/gait/script-threshold-approval-determinism/README.md new file mode 100644 index 0000000..35ab74e --- /dev/null +++ b/scenarios/gait/script-threshold-approval-determinism/README.md @@ -0,0 +1,3 @@ +# script-threshold-approval-determinism + +Validates deterministic script evaluation output when `scripts.require_approval_above` triggers `require_approval`. diff --git a/scenarios/gait/script-threshold-approval-determinism/expected.yaml b/scenarios/gait/script-threshold-approval-determinism/expected.yaml new file mode 100644 index 0000000..f800270 --- /dev/null +++ b/scenarios/gait/script-threshold-approval-determinism/expected.yaml @@ -0,0 +1,9 @@ +exit_code: 4 +ok: true +verdict: require_approval +script: true +step_count: 2 +step_verdict_count: 2 +composite_risk_class: medium +reason_codes_must_include: + - script_step_threshold_approval diff --git a/scenarios/gait/script-threshold-approval-determinism/flags.yaml b/scenarios/gait/script-threshold-approval-determinism/flags.yaml new file mode 100644 index 0000000..3cbc000 --- /dev/null +++ b/scenarios/gait/script-threshold-approval-determinism/flags.yaml @@ -0,0 +1 @@ +repeat: 2 diff --git a/scenarios/gait/script-threshold-approval-determinism/intent.json b/scenarios/gait/script-threshold-approval-determinism/intent.json new file mode 100644 index 0000000..4bce038 --- /dev/null +++ b/scenarios/gait/script-threshold-approval-determinism/intent.json @@ -0,0 +1,46 @@ +{ + "schema_id": "gait.gate.intent_request", + "schema_version": "1.0.0", + "created_at": "2026-02-20T00:00:00Z", + "producer_version": "scenario-script-threshold", + "tool_name": "script", + "args": {}, + "targets": [], + "context": { + "identity": "alice", + "workspace": "/repo/gait", + "risk_class": "medium" + }, + "script": { + "steps": [ + { + "tool_name": "tool.read", + "args": { + "path": "/tmp/input.txt" + }, + "targets": [ + { + "kind": "path", + "value": "/tmp/input.txt", + "operation": "read", + "endpoint_class": "fs.read" + } + ] + }, + { + "tool_name": "tool.write", + "args": { + "path": "/tmp/output.txt" + }, + "targets": [ + { + "kind": "path", + "value": "/tmp/output.txt", + "operation": "write", + "endpoint_class": "fs.write" + } + ] + } + ] + } +} diff --git a/scenarios/gait/script-threshold-approval-determinism/policy.yaml b/scenarios/gait/script-threshold-approval-determinism/policy.yaml new file mode 100644 index 0000000..61c58f2 --- /dev/null +++ b/scenarios/gait/script-threshold-approval-determinism/policy.yaml @@ -0,0 +1,5 @@ +schema_id: gait.gate.policy +schema_version: 1.0.0 +default_verdict: allow +scripts: + require_approval_above: 1 diff --git a/scenarios/gait/wrkr-missing-fail-closed-high-risk/README.md b/scenarios/gait/wrkr-missing-fail-closed-high-risk/README.md new file mode 100644 index 0000000..bc02799 --- /dev/null +++ b/scenarios/gait/wrkr-missing-fail-closed-high-risk/README.md @@ -0,0 +1,3 @@ +# wrkr-missing-fail-closed-high-risk + +Validates fail-closed behavior when Wrkr inventory is configured but unavailable for high-risk intent context. diff --git a/scenarios/gait/wrkr-missing-fail-closed-high-risk/expected.yaml b/scenarios/gait/wrkr-missing-fail-closed-high-risk/expected.yaml new file mode 100644 index 0000000..44fb71d --- /dev/null +++ b/scenarios/gait/wrkr-missing-fail-closed-high-risk/expected.yaml @@ -0,0 +1,3 @@ +exit_code: 3 +ok: false +error_contains: wrkr inventory unavailable in fail-closed mode diff --git a/scenarios/gait/wrkr-missing-fail-closed-high-risk/flags.yaml b/scenarios/gait/wrkr-missing-fail-closed-high-risk/flags.yaml new file mode 100644 index 0000000..85f8726 --- /dev/null +++ b/scenarios/gait/wrkr-missing-fail-closed-high-risk/flags.yaml @@ -0,0 +1,2 @@ +repeat: 1 +wrkr_inventory: missing_wrkr_inventory.json diff --git a/scenarios/gait/wrkr-missing-fail-closed-high-risk/intent.json b/scenarios/gait/wrkr-missing-fail-closed-high-risk/intent.json new file mode 100644 index 0000000..28dd5f1 --- /dev/null +++ b/scenarios/gait/wrkr-missing-fail-closed-high-risk/intent.json @@ -0,0 +1,32 @@ +{ + "schema_id": "gait.gate.intent_request", + "schema_version": "1.0.0", + "created_at": "2026-02-20T00:00:00Z", + "producer_version": "scenario-wrkr-missing", + "tool_name": "script", + "args": {}, + "targets": [], + "context": { + "identity": "alice", + "workspace": "/repo/gait", + "risk_class": "high" + }, + "script": { + "steps": [ + { + "tool_name": "tool.read", + "args": { + "path": "/tmp/input.txt" + }, + "targets": [ + { + "kind": "path", + "value": "/tmp/input.txt", + "operation": "read", + "endpoint_class": "fs.read" + } + ] + } + ] + } +} diff --git a/scenarios/gait/wrkr-missing-fail-closed-high-risk/policy.yaml b/scenarios/gait/wrkr-missing-fail-closed-high-risk/policy.yaml new file mode 100644 index 0000000..258d36f --- /dev/null +++ b/scenarios/gait/wrkr-missing-fail-closed-high-risk/policy.yaml @@ -0,0 +1,3 @@ +schema_id: gait.gate.policy +schema_version: 1.0.0 +default_verdict: allow diff --git a/scripts/run_scenarios.sh b/scripts/run_scenarios.sh index 103c586..afb6712 100755 --- a/scripts/run_scenarios.sh +++ b/scripts/run_scenarios.sh @@ -40,7 +40,7 @@ status=${PIPESTATUS[0]} set -e pass_count=$(grep -c -- '--- PASS: TestTier11Scenarios/' "$OUT_FILE" || true) -expected_count=8 +expected_count=13 if [[ "$status" -ne 0 ]]; then echo "scenario test execution failed for $PRODUCT" >&2 exit "$status" diff --git a/scripts/validate_scenarios.sh b/scripts/validate_scenarios.sh index e215436..f09e783 100755 --- a/scripts/validate_scenarios.sh +++ b/scripts/validate_scenarios.sh @@ -8,12 +8,17 @@ SCENARIO_ROOT="$REPO_ROOT/scenarios/gait" required_scenarios=( "approval-expiry-1s-past" "approval-token-valid" + "approved-registry-signature-mismatch-high-risk" "concurrent-evaluation-10" "delegation-chain-depth-3" "dry-run-no-side-effects" "pack-integrity-round-trip" "policy-allow-safe-tools" "policy-block-destructive" + "script-max-steps-exceeded" + "script-mixed-risk-block" + "script-threshold-approval-determinism" + "wrkr-missing-fail-closed-high-risk" ) if [[ ! -d "$SCENARIO_ROOT" ]]; then