diff --git a/cmd/entire/cli/benchutil/benchutil.go b/cmd/entire/cli/benchutil/benchutil.go index a382fa84c..d6d2c477a 100644 --- a/cmd/entire/cli/benchutil/benchutil.go +++ b/cmd/entire/cli/benchutil/benchutil.go @@ -537,6 +537,32 @@ func generateTranscriptMessage(index int, opts TranscriptOpts) map[string]any { return msg } +// SeedBranches creates N branches pointing at the current HEAD. +// The branches are named with the given prefix (e.g., "feature/bench-" → "feature/bench-000"). +// This simulates a repo with many refs, which affects go-git ref scanning performance. +func (br *BenchRepo) SeedBranches(b *testing.B, prefix string, count int) { + b.Helper() + headHash := plumbing.NewHash(br.HeadHash) + for i := range count { + name := fmt.Sprintf("%s%03d", prefix, i) + ref := plumbing.NewHashReference(plumbing.NewBranchReferenceName(name), headHash) + if err := br.Repo.Storer.SetReference(ref); err != nil { + b.Fatalf("create branch %s: %v", name, err) + } + } +} + +// PackRefs runs `git pack-refs --all` to simulate a real repo where most refs +// are in the packed-refs file. Large repos almost always have packed refs. +func (br *BenchRepo) PackRefs(b *testing.B) { + b.Helper() + cmd := exec.CommandContext(context.Background(), "git", "pack-refs", "--all") + cmd.Dir = br.Dir + if output, err := cmd.CombinedOutput(); err != nil { + b.Fatalf("git pack-refs: %v\n%s", err, output) + } +} + func generatePadding(prefix string, targetBytes int) string { if len(prefix) >= targetBytes { return prefix[:targetBytes] diff --git a/cmd/entire/cli/integration_test/hook_bench_test.go b/cmd/entire/cli/integration_test/hook_bench_test.go new file mode 100644 index 000000000..baacea878 --- /dev/null +++ b/cmd/entire/cli/integration_test/hook_bench_test.go @@ -0,0 +1,158 @@ +//go:build integration + +package integration + +import ( + "bytes" + "encoding/json" + "fmt" + "os" + "os/exec" + "testing" + "time" + + "github.com/entireio/cli/cmd/entire/cli/benchutil" +) + +// BenchmarkHookSessionStart measures the end-to-end latency of the +// "entire hooks claude-code session-start" subprocess, which is what +// Claude Code users experience on every startup. +// +// Each sub-benchmark isolates a single scaling dimension while holding +// everything else at a small baseline. +// +// Run all: +// +// go test -tags=integration -bench=BenchmarkHookSessionStart -benchtime=5x -run='^$' -timeout=10m ./cmd/entire/cli/integration_test/... +// +// Run one dimension: +// +// go test -tags=integration -bench=BenchmarkHookSessionStart/Sessions -benchtime=5x -run='^$' ./cmd/entire/cli/integration_test/... +func BenchmarkHookSessionStart(b *testing.B) { + b.Run("Sessions", benchSessionCount) + b.Run("Refs", benchRefCount) + b.Run("RepoFiles", benchRepoFiles) + b.Run("Commits", benchCommitHistory) +} + +// benchSessionCount scales the number of session state files in .git/entire-sessions/. +// Baseline: 10 files, 1 commit, ~2 refs. +func benchSessionCount(b *testing.B) { + for _, n := range []int{0, 1, 5, 20, 50, 100} { + b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { + repo := benchutil.NewBenchRepo(b, benchutil.RepoOpts{ + FileCount: 10, + FeatureBranch: "feature/bench", + }) + for range n { + repo.CreateSessionState(b, benchutil.SessionOpts{ + StepCount: 3, + FilesTouched: []string{"src/file_000.go", "src/file_001.go"}, + }) + } + runSessionStartHook(b, repo) + }) + } +} + +// benchRefCount scales the number of git branches (refs). +// Baseline: 5 session files, 10 files, 1 commit. +func benchRefCount(b *testing.B) { + for _, n := range []int{0, 10, 50, 200, 500} { + b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { + repo := benchutil.NewBenchRepo(b, benchutil.RepoOpts{ + FileCount: 10, + FeatureBranch: "feature/bench", + }) + for range 5 { + repo.CreateSessionState(b, benchutil.SessionOpts{ + StepCount: 3, + FilesTouched: []string{"src/file_000.go"}, + }) + } + if n > 0 { + repo.SeedBranches(b, "feature/team-", n) + repo.PackRefs(b) + } + runSessionStartHook(b, repo) + }) + } +} + +// benchRepoFiles scales the number of tracked files in the repository. +// Baseline: 5 session files, 1 commit, ~2 refs. +func benchRepoFiles(b *testing.B) { + for _, n := range []int{10, 100, 500, 1000} { + b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { + repo := benchutil.NewBenchRepo(b, benchutil.RepoOpts{ + FileCount: n, + FileSizeLines: 50, + FeatureBranch: "feature/bench", + }) + for range 5 { + repo.CreateSessionState(b, benchutil.SessionOpts{ + StepCount: 3, + FilesTouched: []string{"src/file_000.go"}, + }) + } + runSessionStartHook(b, repo) + }) + } +} + +// benchCommitHistory scales the number of commits in the repository. +// Baseline: 5 session files, 10 files, ~2 refs. +func benchCommitHistory(b *testing.B) { + for _, n := range []int{1, 10, 50, 200} { + b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { + repo := benchutil.NewBenchRepo(b, benchutil.RepoOpts{ + FileCount: 10, + CommitCount: n, + FeatureBranch: "feature/bench", + }) + for range 5 { + repo.CreateSessionState(b, benchutil.SessionOpts{ + StepCount: 3, + FilesTouched: []string{"src/file_000.go"}, + }) + } + runSessionStartHook(b, repo) + }) + } +} + +// runSessionStartHook is the shared benchmark loop that invokes the session-start +// hook as a subprocess and reports latency in ms/op. +func runSessionStartHook(b *testing.B, repo *benchutil.BenchRepo) { + b.Helper() + + stdinPayload, err := json.Marshal(map[string]string{ + "session_id": "bench-session", + "transcript_path": "", + }) + if err != nil { + b.Fatalf("marshal stdin: %v", err) + } + + binary := getTestBinary() + claudeProjectDir := b.TempDir() + + b.ResetTimer() + for range b.N { + start := time.Now() + + cmd := exec.Command(binary, "hooks", "claude-code", "session-start") + cmd.Dir = repo.Dir + cmd.Stdin = bytes.NewReader(stdinPayload) + cmd.Env = append(os.Environ(), + "ENTIRE_TEST_CLAUDE_PROJECT_DIR="+claudeProjectDir, + ) + + output, err := cmd.CombinedOutput() + if err != nil { + b.Fatalf("session-start hook failed: %v\nOutput: %s", err, output) + } + + b.ReportMetric(float64(time.Since(start).Milliseconds()), "ms/op") + } +}