Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions cmd/entire/cli/benchutil/benchutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,32 @@ func generateTranscriptMessage(index int, opts TranscriptOpts) map[string]any {
return msg
}

// SeedBranches creates N branches pointing at the current HEAD.
// The branches are named with the given prefix (e.g., "feature/bench-" → "feature/bench-000").
// This simulates a repo with many refs, which affects go-git ref scanning performance.
func (br *BenchRepo) SeedBranches(b *testing.B, prefix string, count int) {
b.Helper()
headHash := plumbing.NewHash(br.HeadHash)
for i := range count {
name := fmt.Sprintf("%s%03d", prefix, i)
ref := plumbing.NewHashReference(plumbing.NewBranchReferenceName(name), headHash)
if err := br.Repo.Storer.SetReference(ref); err != nil {
b.Fatalf("create branch %s: %v", name, err)
}
}
}

// PackRefs runs `git pack-refs --all` to simulate a real repo where most refs
// are in the packed-refs file. Large repos almost always have packed refs.
func (br *BenchRepo) PackRefs(b *testing.B) {
b.Helper()
cmd := exec.CommandContext(context.Background(), "git", "pack-refs", "--all")
cmd.Dir = br.Dir
if output, err := cmd.CombinedOutput(); err != nil {
b.Fatalf("git pack-refs: %v\n%s", err, output)
}
}

func generatePadding(prefix string, targetBytes int) string {
if len(prefix) >= targetBytes {
return prefix[:targetBytes]
Expand Down
158 changes: 158 additions & 0 deletions cmd/entire/cli/integration_test/hook_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
//go:build integration

package integration

import (
"bytes"
"encoding/json"
"fmt"
"os"
"os/exec"
"testing"
"time"

"github.com/entireio/cli/cmd/entire/cli/benchutil"
)

// BenchmarkHookSessionStart measures the end-to-end latency of the
// "entire hooks claude-code session-start" subprocess, which is what
// Claude Code users experience on every startup.
//
// Each sub-benchmark isolates a single scaling dimension while holding
// everything else at a small baseline.
//
// Run all:
//
// go test -tags=integration -bench=BenchmarkHookSessionStart -benchtime=5x -run='^$' -timeout=10m ./cmd/entire/cli/integration_test/...
//
// Run one dimension:
//
// go test -tags=integration -bench=BenchmarkHookSessionStart/Sessions -benchtime=5x -run='^$' ./cmd/entire/cli/integration_test/...
func BenchmarkHookSessionStart(b *testing.B) {
b.Run("Sessions", benchSessionCount)
b.Run("Refs", benchRefCount)
b.Run("RepoFiles", benchRepoFiles)
b.Run("Commits", benchCommitHistory)
}

// benchSessionCount scales the number of session state files in .git/entire-sessions/.
// Baseline: 10 files, 1 commit, ~2 refs.
func benchSessionCount(b *testing.B) {
for _, n := range []int{0, 1, 5, 20, 50, 100} {
b.Run(fmt.Sprintf("%d", n), func(b *testing.B) {
repo := benchutil.NewBenchRepo(b, benchutil.RepoOpts{
FileCount: 10,
FeatureBranch: "feature/bench",
})
for range n {
repo.CreateSessionState(b, benchutil.SessionOpts{
StepCount: 3,
FilesTouched: []string{"src/file_000.go", "src/file_001.go"},
})
}
runSessionStartHook(b, repo)
})
}
}

// benchRefCount scales the number of git branches (refs).
// Baseline: 5 session files, 10 files, 1 commit.
func benchRefCount(b *testing.B) {
for _, n := range []int{0, 10, 50, 200, 500} {
b.Run(fmt.Sprintf("%d", n), func(b *testing.B) {
repo := benchutil.NewBenchRepo(b, benchutil.RepoOpts{
FileCount: 10,
FeatureBranch: "feature/bench",
})
for range 5 {
repo.CreateSessionState(b, benchutil.SessionOpts{
StepCount: 3,
FilesTouched: []string{"src/file_000.go"},
})
}
if n > 0 {
repo.SeedBranches(b, "feature/team-", n)
repo.PackRefs(b)
}
runSessionStartHook(b, repo)
})
}
}

// benchRepoFiles scales the number of tracked files in the repository.
// Baseline: 5 session files, 1 commit, ~2 refs.
func benchRepoFiles(b *testing.B) {
for _, n := range []int{10, 100, 500, 1000} {
b.Run(fmt.Sprintf("%d", n), func(b *testing.B) {
repo := benchutil.NewBenchRepo(b, benchutil.RepoOpts{
FileCount: n,
FileSizeLines: 50,
FeatureBranch: "feature/bench",
})
for range 5 {
repo.CreateSessionState(b, benchutil.SessionOpts{
StepCount: 3,
FilesTouched: []string{"src/file_000.go"},
})
}
runSessionStartHook(b, repo)
})
}
}

// benchCommitHistory scales the number of commits in the repository.
// Baseline: 5 session files, 10 files, ~2 refs.
func benchCommitHistory(b *testing.B) {
for _, n := range []int{1, 10, 50, 200} {
b.Run(fmt.Sprintf("%d", n), func(b *testing.B) {
repo := benchutil.NewBenchRepo(b, benchutil.RepoOpts{
FileCount: 10,
CommitCount: n,
FeatureBranch: "feature/bench",
})
for range 5 {
repo.CreateSessionState(b, benchutil.SessionOpts{
StepCount: 3,
FilesTouched: []string{"src/file_000.go"},
})
}
runSessionStartHook(b, repo)
})
}
}

// runSessionStartHook is the shared benchmark loop that invokes the session-start
// hook as a subprocess and reports latency in ms/op.
func runSessionStartHook(b *testing.B, repo *benchutil.BenchRepo) {
b.Helper()

stdinPayload, err := json.Marshal(map[string]string{
"session_id": "bench-session",
"transcript_path": "",
})
if err != nil {
b.Fatalf("marshal stdin: %v", err)
}

binary := getTestBinary()
claudeProjectDir := b.TempDir()

b.ResetTimer()
for range b.N {
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This benchmark uses the older for range b.N pattern instead of the newer b.Loop() pattern that was introduced in Go 1.24. The codebase is using Go 1.25.6 and all other benchmarks in benchutil_test.go consistently use b.Loop().

While for range b.N still works, using b.Loop() is the modern approach and provides better ergonomics. Consider updating this to match the pattern used throughout the codebase for consistency.

Copilot uses AI. Check for mistakes.
start := time.Now()

cmd := exec.Command(binary, "hooks", "claude-code", "session-start")
cmd.Dir = repo.Dir
cmd.Stdin = bytes.NewReader(stdinPayload)
cmd.Env = append(os.Environ(),
"ENTIRE_TEST_CLAUDE_PROJECT_DIR="+claudeProjectDir,
)

output, err := cmd.CombinedOutput()
if err != nil {
b.Fatalf("session-start hook failed: %v\nOutput: %s", err, output)
}

b.ReportMetric(float64(time.Since(start).Milliseconds()), "ms/op")
Comment on lines +142 to +156
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using manual timing with time.Now() and b.ReportMetric alongside b.ResetTimer() creates competing timing mechanisms. When using b.Loop(), the testing framework automatically handles timing, eliminating the need for manual time.Now()/time.Since() calls and b.ReportMetric().

If you switch to b.Loop() as suggested, you can remove the manual timing code (lines 142, 156) and let the framework handle it automatically. The framework's timing is more accurate as it accounts for framework overhead.

Copilot uses AI. Check for mistakes.
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ReportMetric in loop only keeps last iteration's value

Medium Severity

b.ReportMetric overwrites any previously reported value for the same unit, so calling it inside the for range b.N loop means only the last iteration's latency is reported as ms/op, discarding all earlier measurements. The total time needs to be accumulated across all iterations and divided by b.N after the loop ends, then reported once with a single b.ReportMetric call. With -benchtime=5x, 4 out of 5 samples are silently thrown away.

Fix in Cursor Fix in Web

}