diff --git a/cmd/entire/cli/checkpoint/temporary.go b/cmd/entire/cli/checkpoint/temporary.go index 1fc1b64c0..e1fdb1964 100644 --- a/cmd/entire/cli/checkpoint/temporary.go +++ b/cmd/entire/cli/checkpoint/temporary.go @@ -922,6 +922,8 @@ func addDirectoryToEntriesWithAbsPath(repo *git.Repository, dirPathAbs, dirPathR treePath := filepath.ToSlash(filepath.Join(dirPathRel, relWithinDir)) + // Use redacted blob creation for metadata files (transcripts, prompts, etc.) + // to ensure PII and secrets are redacted before writing to git. blobHash, mode, err := createRedactedBlobFromFile(repo, path, treePath) if err != nil { return fmt.Errorf("failed to create blob for %s: %w", path, err) diff --git a/cmd/entire/cli/doctor.go b/cmd/entire/cli/doctor.go index 1b5a0a940..a19b06155 100644 --- a/cmd/entire/cli/doctor.go +++ b/cmd/entire/cli/doctor.go @@ -38,6 +38,9 @@ For each stuck session, you can choose to: Use --force to condense all fixable sessions without prompting. Sessions that can't be condensed will be discarded.`, + PreRun: func(_ *cobra.Command, _ []string) { + strategy.EnsureRedactionConfigured() + }, RunE: func(cmd *cobra.Command, _ []string) error { return runSessionsFix(cmd, forceFlag) }, diff --git a/cmd/entire/cli/hooks_git_cmd.go b/cmd/entire/cli/hooks_git_cmd.go index 2655478cd..b3e7bd15a 100644 --- a/cmd/entire/cli/hooks_git_cmd.go +++ b/cmd/entire/cli/hooks_git_cmd.go @@ -81,6 +81,10 @@ func initHookLogging() func() { // Init failed - logging will use stderr fallback return func() {} } + + // Configure PII redaction once at startup (reads settings, no-op if disabled). + strategy.EnsureRedactionConfigured() + return logging.Close } diff --git a/cmd/entire/cli/settings/settings.go b/cmd/entire/cli/settings/settings.go index 1249eb01c..62b728f22 100644 --- a/cmd/entire/cli/settings/settings.go +++ b/cmd/entire/cli/settings/settings.go @@ -49,6 +49,24 @@ type EntireSettings struct { // Telemetry controls anonymous usage analytics. // nil = not asked yet (show prompt), true = opted in, false = opted out Telemetry *bool `json:"telemetry,omitempty"` + + // Redaction configures PII redaction behavior for transcripts and metadata. + Redaction *RedactionSettings `json:"redaction,omitempty"` +} + +// RedactionSettings configures redaction behavior beyond the default secret detection. +type RedactionSettings struct { + PII *PIISettings `json:"pii,omitempty"` +} + +// PIISettings configures PII detection categories. +// When Enabled is true, email and phone default to true; address defaults to false. +type PIISettings struct { + Enabled bool `json:"enabled"` + Email *bool `json:"email,omitempty"` + Phone *bool `json:"phone,omitempty"` + Address *bool `json:"address,omitempty"` + CustomPatterns map[string]string `json:"custom_patterns,omitempty"` } // Load loads the Entire settings from .entire/settings.json, @@ -204,6 +222,84 @@ func mergeJSON(settings *EntireSettings, data []byte) error { settings.Telemetry = &t } + // Merge redaction sub-fields if present (field-level, not wholesale replace). + if redactionRaw, ok := raw["redaction"]; ok { + if settings.Redaction == nil { + settings.Redaction = &RedactionSettings{} + } + if err := mergeRedaction(settings.Redaction, redactionRaw); err != nil { + return fmt.Errorf("parsing redaction field: %w", err) + } + } + + return nil +} + +// mergeRedaction merges redaction overrides into existing RedactionSettings. +// Only fields present in the override JSON are applied. +func mergeRedaction(dst *RedactionSettings, data json.RawMessage) error { + var raw map[string]json.RawMessage + if err := json.Unmarshal(data, &raw); err != nil { + return fmt.Errorf("parsing redaction: %w", err) + } + if piiRaw, ok := raw["pii"]; ok { + if dst.PII == nil { + dst.PII = &PIISettings{} + } + if err := mergePIISettings(dst.PII, piiRaw); err != nil { + return err + } + } + return nil +} + +// mergePIISettings merges PII overrides into existing PIISettings. +// Only fields present in the override JSON are applied; missing fields +// are preserved from the base settings. +func mergePIISettings(dst *PIISettings, data json.RawMessage) error { + var raw map[string]json.RawMessage + if err := json.Unmarshal(data, &raw); err != nil { + return fmt.Errorf("parsing pii: %w", err) + } + if v, ok := raw["enabled"]; ok { + if err := json.Unmarshal(v, &dst.Enabled); err != nil { + return fmt.Errorf("parsing pii.enabled: %w", err) + } + } + if v, ok := raw["email"]; ok { + var b bool + if err := json.Unmarshal(v, &b); err != nil { + return fmt.Errorf("parsing pii.email: %w", err) + } + dst.Email = &b + } + if v, ok := raw["phone"]; ok { + var b bool + if err := json.Unmarshal(v, &b); err != nil { + return fmt.Errorf("parsing pii.phone: %w", err) + } + dst.Phone = &b + } + if v, ok := raw["address"]; ok { + var b bool + if err := json.Unmarshal(v, &b); err != nil { + return fmt.Errorf("parsing pii.address: %w", err) + } + dst.Address = &b + } + if v, ok := raw["custom_patterns"]; ok { + var cp map[string]string + if err := json.Unmarshal(v, &cp); err != nil { + return fmt.Errorf("parsing pii.custom_patterns: %w", err) + } + if dst.CustomPatterns == nil { + dst.CustomPatterns = cp + } else { + for k, val := range cp { + dst.CustomPatterns[k] = val + } + } + } return nil } diff --git a/cmd/entire/cli/settings/settings_test.go b/cmd/entire/cli/settings/settings_test.go index ad09bc57a..6b5f2eb7f 100644 --- a/cmd/entire/cli/settings/settings_test.go +++ b/cmd/entire/cli/settings/settings_test.go @@ -59,7 +59,8 @@ func TestLoad_AcceptsValidKeys(t *testing.T) { "local_dev": false, "log_level": "debug", "strategy_options": {"key": "value"}, - "telemetry": true + "telemetry": true, + "redaction": {"pii": {"enabled": true, "email": true, "phone": false}} }` if err := os.WriteFile(settingsFile, []byte(settingsContent), 0644); err != nil { t.Fatalf("failed to write settings file: %v", err) @@ -92,6 +93,21 @@ func TestLoad_AcceptsValidKeys(t *testing.T) { if settings.Telemetry == nil || !*settings.Telemetry { t.Error("expected telemetry to be true") } + if settings.Redaction == nil { + t.Fatal("expected redaction to be non-nil") + } + if settings.Redaction.PII == nil { + t.Fatal("expected redaction.pii to be non-nil") + } + if !settings.Redaction.PII.Enabled { + t.Error("expected redaction.pii.enabled to be true") + } + if settings.Redaction.PII.Email == nil || !*settings.Redaction.PII.Email { + t.Error("expected redaction.pii.email to be true") + } + if settings.Redaction.PII.Phone == nil || *settings.Redaction.PII.Phone { + t.Error("expected redaction.pii.phone to be false") + } } func TestLoad_LocalSettingsRejectsUnknownKeys(t *testing.T) { @@ -135,6 +151,121 @@ func TestLoad_LocalSettingsRejectsUnknownKeys(t *testing.T) { } } +func TestLoad_MissingRedactionIsNil(t *testing.T) { + tmpDir := t.TempDir() + entireDir := filepath.Join(tmpDir, ".entire") + if err := os.MkdirAll(entireDir, 0755); err != nil { + t.Fatalf("failed to create .entire directory: %v", err) + } + + settingsFile := filepath.Join(entireDir, "settings.json") + if err := os.WriteFile(settingsFile, []byte(`{"strategy": "manual-commit"}`), 0644); err != nil { + t.Fatalf("failed to write settings file: %v", err) + } + if err := os.MkdirAll(filepath.Join(tmpDir, ".git"), 0755); err != nil { + t.Fatalf("failed to create .git directory: %v", err) + } + t.Chdir(tmpDir) + + settings, err := Load() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if settings.Redaction != nil { + t.Error("expected redaction to be nil when not in settings") + } +} + +func TestLoad_LocalOverridesRedaction(t *testing.T) { + tmpDir := t.TempDir() + entireDir := filepath.Join(tmpDir, ".entire") + if err := os.MkdirAll(entireDir, 0755); err != nil { + t.Fatalf("failed to create .entire directory: %v", err) + } + + // Base settings: PII disabled + settingsFile := filepath.Join(entireDir, "settings.json") + if err := os.WriteFile(settingsFile, []byte(`{"strategy": "manual-commit", "redaction": {"pii": {"enabled": false}}}`), 0644); err != nil { + t.Fatalf("failed to write settings file: %v", err) + } + + // Local override: PII enabled with custom patterns + localFile := filepath.Join(entireDir, "settings.local.json") + localContent := `{"redaction": {"pii": {"enabled": true, "custom_patterns": {"employee_id": "EMP-\\d{6}"}}}}` + if err := os.WriteFile(localFile, []byte(localContent), 0644); err != nil { + t.Fatalf("failed to write local settings file: %v", err) + } + + if err := os.MkdirAll(filepath.Join(tmpDir, ".git"), 0755); err != nil { + t.Fatalf("failed to create .git directory: %v", err) + } + t.Chdir(tmpDir) + + settings, err := Load() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if settings.Redaction == nil || settings.Redaction.PII == nil { + t.Fatal("expected redaction.pii to be non-nil after local override") + } + if !settings.Redaction.PII.Enabled { + t.Error("expected local override to enable PII") + } + if settings.Redaction.PII.CustomPatterns == nil { + t.Fatal("expected custom_patterns to be non-nil") + } + if settings.Redaction.PII.CustomPatterns["employee_id"] != `EMP-\d{6}` { + t.Errorf("expected employee_id pattern, got %v", settings.Redaction.PII.CustomPatterns) + } +} + +func TestLoad_LocalMergesRedactionSubfields(t *testing.T) { + tmpDir := t.TempDir() + entireDir := filepath.Join(tmpDir, ".entire") + if err := os.MkdirAll(entireDir, 0755); err != nil { + t.Fatalf("failed to create .entire directory: %v", err) + } + + // Base: PII enabled with email=true, phone=true + baseContent := `{"strategy":"manual-commit","redaction":{"pii":{"enabled":true,"email":true,"phone":true}}}` + if err := os.WriteFile(filepath.Join(entireDir, "settings.json"), []byte(baseContent), 0644); err != nil { + t.Fatalf("failed to write settings file: %v", err) + } + + // Local: adds custom_patterns only — should NOT erase email/phone from base + localContent := `{"redaction":{"pii":{"enabled":true,"custom_patterns":{"ssn":"\\d{3}-\\d{2}-\\d{4}"}}}}` + if err := os.WriteFile(filepath.Join(entireDir, "settings.local.json"), []byte(localContent), 0644); err != nil { + t.Fatalf("failed to write local settings file: %v", err) + } + + if err := os.MkdirAll(filepath.Join(tmpDir, ".git"), 0755); err != nil { + t.Fatalf("failed to create .git directory: %v", err) + } + t.Chdir(tmpDir) + + settings, err := Load() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if settings.Redaction == nil || settings.Redaction.PII == nil { + t.Fatal("expected redaction.pii to be non-nil") + } + // email and phone from base should survive local merge + if settings.Redaction.PII.Email == nil || !*settings.Redaction.PII.Email { + t.Error("expected email=true from base to survive local merge") + } + if settings.Redaction.PII.Phone == nil || !*settings.Redaction.PII.Phone { + t.Error("expected phone=true from base to survive local merge") + } + // custom_patterns from local should be present + if settings.Redaction.PII.CustomPatterns == nil { + t.Fatal("expected custom_patterns from local to be present") + } + if _, ok := settings.Redaction.PII.CustomPatterns["ssn"]; !ok { + t.Error("expected ssn pattern from local override") + } +} + // containsUnknownField checks if the error message indicates an unknown field func containsUnknownField(msg string) bool { // Go's json package reports unknown fields with this message format diff --git a/cmd/entire/cli/strategy/common.go b/cmd/entire/cli/strategy/common.go index ecc5b5775..a63318c27 100644 --- a/cmd/entire/cli/strategy/common.go +++ b/cmd/entire/cli/strategy/common.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "log/slog" "os" "os/exec" "path/filepath" @@ -16,8 +17,11 @@ import ( "github.com/entireio/cli/cmd/entire/cli/agent" "github.com/entireio/cli/cmd/entire/cli/checkpoint" "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/logging" "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/cmd/entire/cli/settings" "github.com/entireio/cli/cmd/entire/cli/trailers" + "github.com/entireio/cli/redact" "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" @@ -238,6 +242,37 @@ var ( protectedDirsCache []string ) +var initRedactionOnce sync.Once + +// EnsureRedactionConfigured loads PII redaction settings and configures the +// redact package. No-op if PII is not enabled in settings. +// Must be called at each process entry point before checkpoint writes +// (e.g., hook PersistentPreRunE, doctor PreRun). +func EnsureRedactionConfigured() { + initRedactionOnce.Do(func() { + s, err := settings.Load() + if err != nil { + logCtx := logging.WithComponent(context.Background(), "redaction") + logging.Warn(logCtx, "failed to load settings for PII redaction", slog.String("error", err.Error())) + return + } + if s.Redaction == nil || s.Redaction.PII == nil || !s.Redaction.PII.Enabled { + return + } + pii := s.Redaction.PII + cfg := redact.PIIConfig{ + Enabled: true, + Categories: make(map[redact.PIICategory]bool), + CustomPatterns: pii.CustomPatterns, + } + // Email and phone default to true when PII is enabled; address defaults to false. + cfg.Categories[redact.PIIEmail] = pii.Email == nil || *pii.Email + cfg.Categories[redact.PIIPhone] = pii.Phone == nil || *pii.Phone + cfg.Categories[redact.PIIAddress] = pii.Address != nil && *pii.Address + redact.ConfigurePII(cfg) + }) +} + // isSpecificAgentType returns true if the agent type is a known, specific value // (not empty and not the generic "Agent" fallback). func isSpecificAgentType(t agent.AgentType) bool { diff --git a/redact/pii.go b/redact/pii.go new file mode 100644 index 000000000..e48b3db45 --- /dev/null +++ b/redact/pii.go @@ -0,0 +1,201 @@ +package redact + +import ( + "log/slog" + "regexp" + "strings" + "sync" +) + +// PIICategory identifies a category of personally identifiable information. +type PIICategory string + +const ( + PIIEmail PIICategory = "email" + PIIPhone PIICategory = "phone" + PIIAddress PIICategory = "address" +) + +// Label constants used in replacement tokens and pattern matching. +const ( + labelEmail = "EMAIL" + labelPhone = "PHONE" + labelAddress = "ADDRESS" +) + +// PIIConfig controls which PII categories are detected and redacted. +type PIIConfig struct { + // Enabled globally enables/disables PII redaction. + // When false, no PII patterns are checked (secrets still redacted). + Enabled bool + + // Categories maps each PII category to whether it is enabled. + // Missing keys default to false (disabled). + Categories map[PIICategory]bool + + // CustomPatterns allows teams to define additional regex patterns. + // Each key is a label used in the replacement token (uppercased), + // and each value is a regex pattern string. + // Example: {"employee_id": `EMP-\d{6}`} produces [REDACTED_EMPLOYEE_ID]. + CustomPatterns map[string]string + + // patterns holds pre-compiled patterns, populated by ConfigurePII. + // When nil (e.g., in tests constructing PIIConfig directly), + // detectPII falls back to compilePIIPatterns. + patterns []piiPattern +} + +// piiPattern is a compiled regex with its replacement token label. +type piiPattern struct { + regex *regexp.Regexp + label string // e.g., "EMAIL", "PHONE", "ADDRESS" +} + +var ( + piiConfig *PIIConfig + piiConfigMu sync.RWMutex +) + +// ConfigurePII sets the global PII redaction configuration. +// Pre-compiles patterns so the hot path (String → detectPII) does no compilation. +// Call once at startup after loading settings. Thread-safe. +func ConfigurePII(cfg PIIConfig) { + piiConfigMu.Lock() + defer piiConfigMu.Unlock() + cfgCopy := cfg + cfgCopy.patterns = compilePIIPatterns(&cfgCopy) + piiConfig = &cfgCopy +} + +// getPIIConfig returns the current PII configuration, or nil if not configured. +func getPIIConfig() *PIIConfig { + piiConfigMu.RLock() + defer piiConfigMu.RUnlock() + return piiConfig +} + +// Pre-compiled builtin PII regexes. +var ( + emailRegex = regexp.MustCompile(`\b[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}\b`) + // phoneRegex uses three branches to avoid false-positives on dotted-decimal + // strings like version numbers (1.234.567.8901) and IPs (192.168.001.0001). + // Dots are only allowed as separators when preceded by +1 (unambiguous intl prefix). + // Without +1, only dashes and spaces are accepted as separators. + phoneRegex = regexp.MustCompile( + `(?:` + + `\+1[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}` + // +1 intl prefix: any separator + `|` + + `(?:1[-\s])?\(\d{3}\)\s?\d{3}[-.\s]?\d{4}` + // parenthesized area code + `|` + + `(?:1[-\s])?\d{3}[-\s]\d{3}[-\s]\d{4}` + // bare digits: dash/space only + `)`, + ) + addressRegex = regexp.MustCompile(`\d{1,5}\s+[A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*\s+(?:St(?:reet)?|Ave(?:nue)?|Blvd|Boulevard|Dr(?:ive)?|Ln|Lane|Rd|Road|Ct|Court|Pl(?:ace)?|Way|Cir(?:cle)?|Ter(?:race)?|Pkwy|Parkway)\.?`) +) + +// emailAllowPatterns are email patterns that should NOT be treated as PII. +// These appear frequently in coding transcripts (git authors, bot accounts) +// and are public metadata rather than private information. +// Entries starting with "@" match the email suffix; entries ending with "@" +// match the email prefix. All comparisons are case-insensitive. +var emailAllowPatterns = []string{ + "noreply@", // Generic noreply addresses + "actions@", // GitHub Actions bot + "@users.noreply.github.com", // GitHub user noreply + "@noreply.github.com", // GitHub noreply +} + +// isAllowlistedEmail returns true if the email matches a known non-sensitive pattern. +func isAllowlistedEmail(email string) bool { + lower := strings.ToLower(email) + for _, pattern := range emailAllowPatterns { + lp := strings.ToLower(pattern) + switch { + case strings.HasPrefix(pattern, "@"): + if strings.HasSuffix(lower, lp) { + return true + } + case strings.HasSuffix(pattern, "@"): + if strings.HasPrefix(lower, lp) { + return true + } + default: + if lower == lp { + return true + } + } + } + return false +} + +// builtinPIIPattern associates a compiled regex with a category and label. +type builtinPIIPattern struct { + category PIICategory + label string + regex *regexp.Regexp +} + +// builtinPIIPatterns is the set of default PII detection patterns. +var builtinPIIPatterns = []builtinPIIPattern{ + {PIIEmail, labelEmail, emailRegex}, + {PIIPhone, labelPhone, phoneRegex}, + {PIIAddress, labelAddress, addressRegex}, +} + +// detectPII returns tagged regions for PII matches in s. +// Returns nil immediately if PII redaction is not configured or not enabled. +func detectPII(cfg *PIIConfig, s string) []taggedRegion { + if cfg == nil || !cfg.Enabled { + return nil + } + + patterns := cfg.patterns + if patterns == nil { + patterns = compilePIIPatterns(cfg) + } + var regions []taggedRegion + for _, p := range patterns { + for _, loc := range p.regex.FindAllStringIndex(s, -1) { + // Skip allowlisted email addresses (noreply, bot accounts, etc.). + if p.label == labelEmail && isAllowlistedEmail(s[loc[0]:loc[1]]) { + continue + } + regions = append(regions, taggedRegion{ + region: region{loc[0], loc[1]}, + label: p.label, + }) + } + } + return regions +} + +// compilePIIPatterns builds the pattern list from config. +// Builtin regexes are pre-compiled package vars; only custom patterns +// need compilation here. +func compilePIIPatterns(cfg *PIIConfig) []piiPattern { + var patterns []piiPattern + for _, bp := range builtinPIIPatterns { + if enabled, ok := cfg.Categories[bp.category]; ok && enabled { + patterns = append(patterns, piiPattern{regex: bp.regex, label: bp.label}) + } + } + for label, pattern := range cfg.CustomPatterns { + compiled, err := regexp.Compile(pattern) + if err != nil { + slog.Warn("skipping invalid custom PII pattern", slog.String("label", label), slog.String("error", err.Error())) + continue + } + patterns = append(patterns, piiPattern{regex: compiled, label: strings.ToUpper(label)}) + } + return patterns +} + +// replacementToken returns the redaction placeholder for a given label. +// Empty label (secrets) returns "REDACTED" for backward compatibility. +// Non-empty label (PII) returns "[REDACTED_