From a6b90dbe7cbe9de60847cdb539316c7a384273c7 Mon Sep 17 00:00:00 2001 From: DarrylWong Date: Thu, 23 May 2024 17:10:52 -0400 Subject: [PATCH] cockroachdb: add benchmarks This change adds a benchmark that spins up a local CRDB cluster and runs the kv workload against it. Change-Id: I69684e5de3dc93d4c5ddeed84294fc3ed376b991 Reviewed-on: https://go-review.googlesource.com/c/benchmarks/+/588135 LUCI-TryBot-Result: Go LUCI Auto-Submit: Michael Knyszek Reviewed-by: Michael Pratt Reviewed-by: Michael Knyszek --- sweet/benchmarks/cockroachdb/main.go | 647 +++++++++++++++++++++ sweet/benchmarks/internal/server/server.go | 5 + sweet/cmd/sweet/benchmark.go | 7 + sweet/cmd/sweet/integration_test.go | 1 + sweet/common/gotool.go | 5 +- sweet/harnesses/cockroachdb.go | 185 ++++++ sweet/harnesses/common.go | 12 + 7 files changed, 860 insertions(+), 2 deletions(-) create mode 100644 sweet/benchmarks/cockroachdb/main.go create mode 100644 sweet/harnesses/cockroachdb.go diff --git a/sweet/benchmarks/cockroachdb/main.go b/sweet/benchmarks/cockroachdb/main.go new file mode 100644 index 0000000..b3794d6 --- /dev/null +++ b/sweet/benchmarks/cockroachdb/main.go @@ -0,0 +1,647 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !wasm + +package main + +import ( + "bytes" + "context" + "errors" + "flag" + "fmt" + "golang.org/x/benchmarks/sweet/benchmarks/internal/driver" + "golang.org/x/benchmarks/sweet/benchmarks/internal/server" + "golang.org/x/benchmarks/sweet/common/diagnostics" + "os" + "os/exec" + "regexp" + "runtime" + "strconv" + "strings" + "sync" + "sync/atomic" + "syscall" + "time" +) + +const ( + // Arbitrarily chosen to match the cockroachdb default. + basePort = 26257 + // The percentage of memory to allocate to the pebble cache. + cacheSize = "0.25" +) + +type config struct { + host string + cockroachdbBin string + tmpDir string + benchName string + isProfiling bool + short bool + procsPerInst int + bench *benchmark +} + +var cliCfg config + +func init() { + driver.SetFlags(flag.CommandLine) + flag.StringVar(&cliCfg.host, "host", "localhost", "hostname of cockroachdb server") + flag.StringVar(&cliCfg.cockroachdbBin, "cockroachdb-bin", "", "path to cockroachdb binary") + flag.StringVar(&cliCfg.tmpDir, "tmp", "", "path to temporary directory") + flag.StringVar(&cliCfg.benchName, "bench", "", "name of the benchmark to run") + flag.BoolVar(&cliCfg.short, "short", false, "whether to run a short version of this benchmark") +} + +type cockroachdbInstance struct { + name string + sqlPort int // Used for intra-cluster communication. + httpPort int // Used to scrape for metrics. + cmd *exec.Cmd + output bytes.Buffer +} + +func clusterAddresses(instances []*cockroachdbInstance) string { + var s []string + for _, inst := range instances { + s = append(s, inst.sqlAddr()) + } + return strings.Join(s, ",") +} + +func launchSingleNodeCluster(cfg *config) ([]*cockroachdbInstance, error) { + var instances []*cockroachdbInstance + instances = append(instances, &cockroachdbInstance{ + name: "roach-node", + sqlPort: basePort, + httpPort: basePort + 1, + }) + inst := instances[0] + + // `cockroach start-single-node` handles both creation of the node + // and initialization. + inst.cmd = exec.Command(cfg.cockroachdbBin, + "start-single-node", + "--insecure", + "--listen-addr", inst.sqlAddr(), + "--http-addr", inst.httpAddr(), + "--cache", cacheSize, + "--store", fmt.Sprintf("%s/%s", cfg.tmpDir, inst.name), + "--logtostderr", + ) + inst.cmd.Env = append(os.Environ(), + fmt.Sprintf("GOMAXPROCS=%d", cfg.procsPerInst), + ) + inst.cmd.Stdout = &inst.output + inst.cmd.Stderr = &inst.output + if err := inst.cmd.Start(); err != nil { + return nil, fmt.Errorf("failed to start instance %q: %v", inst.name, err) + } + return instances, nil +} + +func launchCockroachCluster(cfg *config) ([]*cockroachdbInstance, error) { + if cfg.bench.nodeCount == 1 { + // Use `cockroach start-single-node` instead for single node clusters. + return launchSingleNodeCluster(cfg) + } + var instances []*cockroachdbInstance + for i := 0; i < cfg.bench.nodeCount; i++ { + instances = append(instances, &cockroachdbInstance{ + name: fmt.Sprintf("roach-node-%d", i+1), + sqlPort: basePort + 2*i, + httpPort: basePort + 2*i + 1, + }) + } + + // Start the instances with `cockroach start`. + for n, inst := range instances { + allOtherInstances := append(instances[:n:n], instances[n+1:]...) + join := fmt.Sprintf("--join=%s", clusterAddresses(allOtherInstances)) + + inst.cmd = exec.Command(cfg.cockroachdbBin, + "start", + "--insecure", + "--listen-addr", inst.sqlAddr(), + "--http-addr", inst.httpAddr(), + "--cache", cacheSize, + "--store", fmt.Sprintf("%s/%s", cfg.tmpDir, inst.name), + "--logtostderr", + join, + ) + inst.cmd.Env = append(os.Environ(), + fmt.Sprintf("GOMAXPROCS=%d", cfg.procsPerInst), + ) + inst.cmd.Stdout = &inst.output + inst.cmd.Stderr = &inst.output + if err := inst.cmd.Start(); err != nil { + return nil, fmt.Errorf("failed to start instance %q: %v", inst.name, err) + } + } + + // Initialize the cluster with `cockroach init`. + inst1 := instances[0] + initCmd := exec.Command(cfg.cockroachdbBin, + "init", + "--insecure", + fmt.Sprintf("--host=%s", cfg.host), + fmt.Sprintf("--port=%d", inst1.sqlPort), + ) + initCmd.Env = append(os.Environ(), + fmt.Sprintf("GOMAXPROCS=%d", cfg.procsPerInst), + ) + initCmd.Stdout = &inst1.output + initCmd.Stderr = &inst1.output + if err := initCmd.Start(); err != nil { + return nil, fmt.Errorf("failed to init instance %q: %v", inst1.name, err) + } + return instances, nil +} + +// waitForCluster pings nodes in the cluster until one responds, or +// we time out. We only care to wait for one node to respond as the +// workload will work as long as it can connect to one node initially. +// The --ramp flag will take care of startup noise. +func waitForCluster(instances []*cockroachdbInstance, cfg *config) error { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + for _, inst := range instances { + inst := inst + go func(ctx context.Context) { + for { + select { + case <-ctx.Done(): + return + case <-time.After(5 * time.Second): + // The node will almost certainly not be ready right away, so wait + // 5 seconds first and between pings. 5 seconds was chosen through + // trial and error as a time that nodes are *usually* ready by. + if err := inst.ping(cfg); err == nil { + cancel() + return + } + } + } + }(ctx) + } + + select { + case <-ctx.Done(): + case <-time.After(time.Minute): + return errors.New("benchmark timed out waiting for cluster to be ready") + } + + return nil +} + +func (i *cockroachdbInstance) setClusterSettings(cfg *config) error { + settings := []string{ + // Disable admission control. + // Cockroach normally patches the go runtime to track cpu nanos per goroutine, + // which is used by admission control. However, these benchmarks are run without + // said observability change, and it is unsupported + untested if admission control + // works in this case. It might be fine, but to be safe, we disable it here. + "admission.kv.enabled = false", + "admission.sql_kv_response.enabled = false", + "admission.sql_sql_response.enabled = false", + } + + // Multi-line cluster setting changes aren't allowed. + for _, setting := range settings { + cmd := exec.Command(cfg.cockroachdbBin, + "sql", + "--insecure", + fmt.Sprintf("--host=%s", cfg.host), + fmt.Sprintf("--port=%d", i.sqlPort), + "--execute", fmt.Sprintf("SET CLUSTER SETTING %s;", setting), + ) + cmd.Stdout = &i.output + cmd.Stderr = &i.output + if err := cmd.Run(); err != nil { + return err + } + } + + return nil +} + +func (i *cockroachdbInstance) ping(cfg *config) error { + // Ping the node to see if it is live. The actual command of + // `node status` is a bit arbitrary, if it responds at all + // we know the node is live. Generally it is used to see if + // *other* nodes are live. + cmd := exec.Command(cfg.cockroachdbBin, + "node", + "status", + "--insecure", + fmt.Sprintf("--host=%s", cfg.host), + fmt.Sprintf("--port=%d", i.sqlPort), + ) + cmd.Stdout = &i.output + cmd.Stderr = &i.output + if err := cmd.Run(); err != nil { + return err + } + return nil +} + +func (i *cockroachdbInstance) sqlAddr() string { + return fmt.Sprintf("%s:%d", cliCfg.host, i.sqlPort) +} + +func (i *cockroachdbInstance) httpAddr() string { + return fmt.Sprintf("%s:%d", cliCfg.host, i.httpPort) +} + +func (i *cockroachdbInstance) shutdown() error { + // Only attempt to shut down the process if we got to a point + // that a command was constructed and started. + if i.cmd != nil && i.cmd.Process != nil { + // Send SIGTERM and wait instead of just killing as sending SIGKILL + // bypasses node shutdown logic and will leave the node in a bad state. + // Normally not an issue unless you want to restart the cluster i.e. + // to poke around and see what's wrong. + if err := i.cmd.Process.Signal(syscall.SIGTERM); err != nil { + return err + } + if _, err := i.cmd.Process.Wait(); err != nil { + return err + } + } + return nil +} + +type benchmark struct { + name string + reportName string + workload string + nodeCount int + args []string + longArgs []string // if !config.short + shortArgs []string // if config.short + metricTypes []string + timeout time.Duration +} + +const ( + readMetric = "read" + writeMetric = "write" +) + +func kvBenchmark(readPercent int, nodeCount int) benchmark { + metricTypes := []string{writeMetric} + if readPercent > 0 { + metricTypes = append(metricTypes, readMetric) + } + + return benchmark{ + name: fmt.Sprintf("kv%d/nodes=%d", readPercent, nodeCount), + reportName: fmt.Sprintf("CockroachDBkv%d/nodes=%d", readPercent, nodeCount), + workload: "kv", + nodeCount: 1, + metricTypes: metricTypes, + // Very generous timeout, we don't expect to ever hit this, but just in case. + timeout: 5 * time.Minute, + args: []string{ + "workload", "run", "kv", + fmt.Sprintf("--read-percent=%d", readPercent), + "--min-block-bytes=1024", + "--max-block-bytes=1024", + "--concurrency=10000", + "--max-rate=30000", + //Pre-splitting and scattering the ranges should help stabilize results. + "--scatter", + "--splits=5", + }, + longArgs: []string{ + "--ramp=15s", + "--duration=1m", + }, + // Chosen through trial and error as the shortest time that doesn't + // give extremely fluctuating results. + shortArgs: []string{ + "--ramp=5s", + "--duration=30s", + }, + } +} + +var benchmarks = []benchmark{ + kvBenchmark(0 /* readPercent */, 1 /* nodeCount */), + kvBenchmark(0 /* readPercent */, 3 /* nodeCount */), + kvBenchmark(50 /* readPercent */, 1 /* nodeCount */), + kvBenchmark(50 /* readPercent */, 3 /* nodeCount */), + kvBenchmark(95 /* readPercent */, 1 /* nodeCount */), + kvBenchmark(95 /* readPercent */, 3 /* nodeCount */), +} + +func runBenchmark(b *driver.B, cfg *config, instances []*cockroachdbInstance) (err error) { + var pgurls []string + for _, inst := range instances { + host := inst.sqlAddr() + pgurls = append(pgurls, fmt.Sprintf(`postgres://root@%s?sslmode=disable`, host)) + } + // Load in the schema needed for the workload via `workload init` + initArgs := []string{"workload", "init", cfg.bench.workload} + initArgs = append(initArgs, pgurls...) + initCmd := exec.Command(cfg.cockroachdbBin, initArgs...) + var stdout, stderr bytes.Buffer + initCmd.Stdout = &stdout + initCmd.Stderr = &stderr + if err = initCmd.Run(); err != nil { + return err + } + + // If we try and start the workload right after loading in the schema + // it will spam us with database does not exist errors. We could repeatedly + // retry until the database exists by parsing the output, or we can just + // wait 5 seconds. + time.Sleep(5 * time.Second) + + args := cfg.bench.args + if cfg.short { + args = append(args, cfg.bench.shortArgs...) + } else { + args = append(args, cfg.bench.longArgs...) + } + args = append(args, pgurls...) + + cmd := exec.Command(cfg.cockroachdbBin, args...) + fmt.Fprintln(os.Stderr, cmd.String()) + + cmd.Stdout = &stdout + cmd.Stderr = &stderr + cmd.Env = append(os.Environ(), fmt.Sprintf("GOMAXPROCS=%d", cfg.procsPerInst)) + + defer func() { + if err != nil && stderr.Len() != 0 { + fmt.Fprintln(os.Stderr, "=== Benchmarking tool stderr ===") + fmt.Fprintln(os.Stderr, stderr.String()) + } + }() + + finished := make(chan bool, 1) + var benchmarkErr error + go func() { + b.ResetTimer() + if err = cmd.Run(); err != nil { + benchmarkErr = err + } + b.StopTimer() + finished <- true + }() + + select { + case <-finished: + case <-time.After(cfg.bench.timeout): + if err := cmd.Process.Kill(); err != nil { + return fmt.Errorf("error killing benchmark process, benchmark timed out: %w", err) + } + return errors.New("benchmark timed out") + } + + if benchmarkErr != nil { + return benchmarkErr + } + + return reportFromBenchmarkOutput(b, cfg, stdout.String()) +} + +func reportFromBenchmarkOutput(b *driver.B, cfg *config, output string) (err error) { + defer func() { + if err != nil { + fmt.Fprintln(os.Stderr, "=== Benchmarking tool output ===") + fmt.Fprintln(os.Stderr, output) + } + }() + + for _, metricType := range cfg.bench.metricTypes { + err = getAndReportMetrics(b, metricType, output) + if err != nil { + return err + } + } + + if err != nil { + return err + } + return nil +} + +type benchmarkMetrics struct { + totalOps uint64 + opsPerSecond uint64 + averageLatency uint64 + p50Latency uint64 + p95Latency uint64 + p99Latency uint64 + p100Latency uint64 +} + +func getAndReportMetrics(b *driver.B, metricType string, output string) error { + metrics, err := getMetrics(metricType, output) + if err != nil { + return err + } + reportMetrics(b, metricType, metrics) + return nil +} + +func getMetrics(metricType string, output string) (benchmarkMetrics, error) { + re := regexp.MustCompile(fmt.Sprintf(`.*(__total)\n.*%s`, metricType)) + match := re.FindString(output) + if len(match) == 0 { + return benchmarkMetrics{}, fmt.Errorf("failed to find %s metrics in output", metricType) + } + match = strings.Split(match, "\n")[1] + fields := strings.Fields(match) + + stringToUint64 := func(field string) (uint64, error) { + number, err := strconv.ParseFloat(field, 64) + if err != nil { + return 0, fmt.Errorf("error parsing metrics to uint64: %w", err) + } + return uint64(number), nil + } + + uint64Fields := make([]uint64, len(fields[2:])-1) + for i := range uint64Fields { + var err error + uint64Fields[i], err = stringToUint64(fields[2+i]) + if err != nil { + return benchmarkMetrics{}, err + } + } + + metrics := benchmarkMetrics{ + totalOps: uint64Fields[0], + opsPerSecond: uint64Fields[1], + averageLatency: uint64Fields[2] * 1e6, + p50Latency: uint64Fields[3] * 1e6, + p95Latency: uint64Fields[4] * 1e6, + p99Latency: uint64Fields[5] * 1e6, + p100Latency: uint64Fields[6] * 1e6, + } + return metrics, nil +} + +func reportMetrics(b *driver.B, metricType string, metrics benchmarkMetrics) { + b.Report(fmt.Sprintf("%s-ops/sec", metricType), metrics.opsPerSecond) + b.Report(fmt.Sprintf("%s-ops", metricType), metrics.totalOps) + b.Report(fmt.Sprintf("%s-ns/op", metricType), metrics.averageLatency) + b.Report(fmt.Sprintf("%s-p50-latency-ns", metricType), metrics.p50Latency) + b.Report(fmt.Sprintf("%s-p95-latency-ns", metricType), metrics.p95Latency) + b.Report(fmt.Sprintf("%s-p99-latency-ns", metricType), metrics.p99Latency) + b.Report(fmt.Sprintf("%s-p100-latency-ns", metricType), metrics.p100Latency) +} + +func run(cfg *config) (err error) { + var instances []*cockroachdbInstance + // Launch the server. + instances, err = launchCockroachCluster(cfg) + + if err != nil { + return fmt.Errorf("starting cluster: %v\n", err) + } + + // Clean up the cluster after we're done. + defer func() { + // We only need send SIGTERM to one instance, attempting to + // send it again will cause it to hang. + inst := instances[0] + if r := inst.shutdown(); r != nil { + if err == nil { + err = r + } else { + fmt.Fprintf(os.Stderr, "failed to shutdown %s: %v", inst.name, r) + } + } + if err != nil && inst.output.Len() != 0 { + fmt.Fprintf(os.Stderr, "=== Instance %q stdout+stderr ===\n", inst.name) + fmt.Fprintln(os.Stderr, inst.output.String()) + } + }() + + if err = waitForCluster(instances, cfg); err != nil { + return err + } + + if err = instances[0].setClusterSettings(cfg); err != nil { + return err + } + + opts := []driver.RunOption{ + driver.DoPeakRSS(true), + driver.DoPeakVM(true), + driver.DoDefaultAvgRSS(), + driver.DoCoreDump(true), + driver.BenchmarkPID(instances[0].cmd.Process.Pid), + driver.DoPerf(true), + } + return driver.RunBenchmark(cfg.bench.reportName, func(d *driver.B) error { + // Set up diagnostics. + var finishers []func() uint64 + if driver.DiagnosticEnabled(diagnostics.CPUProfile) { + for _, inst := range instances { + finishers = append(finishers, server.PollDiagnostic( + inst.httpAddr(), + cfg.tmpDir, + cfg.bench.reportName, + diagnostics.CPUProfile, + )) + } + } + if driver.DiagnosticEnabled(diagnostics.Trace) { + var sum atomic.Uint64 + for _, inst := range instances { + stopTrace := server.PollDiagnostic( + inst.httpAddr(), + cfg.tmpDir, + cfg.bench.reportName, + diagnostics.Trace, + ) + finishers = append(finishers, func() uint64 { + n := stopTrace() + sum.Add(n) + return n + }) + } + defer func() { + d.Report("trace-bytes", sum.Load()) + }() + } + if driver.DiagnosticEnabled(diagnostics.MemProfile) { + for _, inst := range instances { + inst := inst + finishers = append(finishers, func() uint64 { + n, err := server.CollectDiagnostic( + inst.httpAddr(), + cfg.tmpDir, + cfg.bench.reportName, + diagnostics.MemProfile, + ) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to read memprofile: %v", err) + } + return uint64(n) + }) + } + } + if len(finishers) != 0 { + // Finish all the diagnostic collections in concurrently. Otherwise we could be waiting a while. + defer func() { + var wg sync.WaitGroup + for _, finish := range finishers { + finish := finish + wg.Add(1) + go func() { + defer wg.Done() + finish() + }() + } + wg.Wait() + }() + } + // Actually run the benchmark. + return runBenchmark(d, cfg, instances) + }, opts...) +} + +func main() { + flag.Parse() + if flag.NArg() != 0 { + fmt.Fprintf(os.Stderr, "error: unexpected args\n") + os.Exit(1) + } + for _, typ := range diagnostics.Types() { + cliCfg.isProfiling = cliCfg.isProfiling || driver.DiagnosticEnabled(typ) + } + for i := range benchmarks { + if benchmarks[i].name == cliCfg.benchName { + cliCfg.bench = &benchmarks[i] + break + } + } + if cliCfg.bench == nil { + fmt.Fprintf(os.Stderr, "error: unknown benchmark %q\n", cliCfg.benchName) + os.Exit(1) + } + + // We're going to launch a bunch of cockroachdb instances. Distribute + // GOMAXPROCS between those and ourselves equally. + procs := runtime.GOMAXPROCS(-1) + procsPerInst := procs / (cliCfg.bench.nodeCount + 1) + if procsPerInst == 0 { + procsPerInst = 1 + } + runtime.GOMAXPROCS(procsPerInst) + cliCfg.procsPerInst = procsPerInst + + if err := run(&cliCfg); err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } +} diff --git a/sweet/benchmarks/internal/server/server.go b/sweet/benchmarks/internal/server/server.go index 7fc6859..75fe10b 100644 --- a/sweet/benchmarks/internal/server/server.go +++ b/sweet/benchmarks/internal/server/server.go @@ -9,6 +9,7 @@ import ( "io" "net/http" "os" + "strings" "sync" "golang.org/x/benchmarks/sweet/benchmarks/internal/driver" @@ -16,6 +17,10 @@ import ( ) func CollectDiagnostic(host, tmpDir, benchName string, typ diagnostics.Type) (int64, error) { + // We attempt to use the benchmark name to create a temp file so replace all + // path separators with "_". + benchName = strings.Replace(benchName, "/", "_", -1) + benchName = strings.Replace(benchName, string(os.PathSeparator), "_", -1) f, err := os.CreateTemp(tmpDir, benchName+"."+string(typ)) if err != nil { return 0, err diff --git a/sweet/cmd/sweet/benchmark.go b/sweet/cmd/sweet/benchmark.go index 482d6a7..51ff0af 100644 --- a/sweet/cmd/sweet/benchmark.go +++ b/sweet/cmd/sweet/benchmark.go @@ -39,6 +39,12 @@ var allBenchmarks = []benchmark{ harness: harnesses.BleveIndex(), generator: generators.BleveIndex(), }, + { + name: "cockroachdb", + description: "Distributed database", + harness: harnesses.CockroachDB{}, + generator: generators.None{}, + }, { name: "etcd", description: "Distributed key-value store", @@ -92,6 +98,7 @@ var benchmarkGroups = func() map[string][]*benchmark { allBenchmarksMap["biogo-igor"], allBenchmarksMap["biogo-krishna"], allBenchmarksMap["bleve-index"], + allBenchmarksMap["cockroachdb"], allBenchmarksMap["etcd"], allBenchmarksMap["go-build"], allBenchmarksMap["gopher-lua"], diff --git a/sweet/cmd/sweet/integration_test.go b/sweet/cmd/sweet/integration_test.go index efba40d..f223e10 100644 --- a/sweet/cmd/sweet/integration_test.go +++ b/sweet/cmd/sweet/integration_test.go @@ -196,6 +196,7 @@ func TestSweetEndToEnd(t *testing.T) { {"go-build", 4}, {"biogo-igor", 1}, {"biogo-krishna", 1}, + {"cockroachdb", 1}, {"etcd", 1}, {"bleve-index", 1}, {"gopher-lua", 1}, diff --git a/sweet/common/gotool.go b/sweet/common/gotool.go index 14d3d58..08dd1a7 100644 --- a/sweet/common/gotool.go +++ b/sweet/common/gotool.go @@ -72,7 +72,7 @@ func (g *Go) BuildPackage(pkg, out string) error { return g.Do("", "build", "-o", out, pkg) } -func (g *Go) BuildPath(path, out string) error { +func (g *Go) BuildPath(path, out string, args ...string) error { if path[0] != '/' && path[0] != '.' { path = "./" + path } @@ -84,7 +84,8 @@ func (g *Go) BuildPath(path, out string) error { if err := chdir(path); err != nil { return fmt.Errorf("failed to enter build directory: %w", err) } - return g.Do("", "build", "-o", out) + args = append([]string{"build", "-o", out}, args...) + return g.Do("", args...) } func chdir(path string) error { diff --git a/sweet/harnesses/cockroachdb.go b/sweet/harnesses/cockroachdb.go new file mode 100644 index 0000000..126c5df --- /dev/null +++ b/sweet/harnesses/cockroachdb.go @@ -0,0 +1,185 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package harnesses + +import ( + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "runtime" + "time" + + "golang.org/x/benchmarks/sweet/common" + "golang.org/x/benchmarks/sweet/common/log" +) + +// CockroachDB implements the Harness interface. +type CockroachDB struct{} + +func (h CockroachDB) CheckPrerequisites() error { + // Cockroachdb is only supported on arm64 and amd64 architectures. + if runtime.GOARCH != "arm64" && runtime.GOARCH != "amd64" { + return fmt.Errorf("requires amd64 or arm64") + } + return nil +} + +func (h CockroachDB) Get(gcfg *common.GetConfig) error { + // Build against a commit that includes https://github.com/cockroachdb/cockroach/pull/125588. + // Recursive clone the repo as we need certain submodules, i.e. + // PROJ, for the build to work. + return gitRecursiveCloneToCommit( + gcfg.SrcDir, + "https://github.com/cockroachdb/cockroach", + "master", + "c4a0d997e0da6ba3ebede61b791607aa452b9bbc", + ) +} + +func (h CockroachDB) Build(cfg *common.Config, bcfg *common.BuildConfig) error { + // Build the cockroach binary. + // We do this by using the cockroach `dev` tool. The dev tool is a bazel + // wrapper normally used for building cockroach, but can also be used to + // generate artifacts that can then be built by `go build`. + + // Install bazel via bazelisk which is used by `dev`. Install it in the + // BinDir to ensure we get a new copy every run and avoid reuse. This is + // done by setting the `GOBIN` env var for the `go install` cmd. + goInstall := cfg.GoTool() + goInstall.Env = goInstall.Env.MustSet(fmt.Sprintf("GOBIN=%s", bcfg.BinDir)) + if err := goInstall.Do(bcfg.BinDir, "install", "github.com/bazelbuild/bazelisk@latest"); err != nil { + return fmt.Errorf("error building bazelisk: %v", err) + } + + // Helper that returns the path to the bazel binary. + bazel := func() string { + return filepath.Join(bcfg.BinDir, "bazelisk") + } + + // Clean up the bazel workspace. If we don't do this, our _bazel directory + // will quickly grow as Bazel treats each run as its own workspace with its + // own artifacts. + defer func() { + cmd := exec.Command(bazel(), "clean", "--expunge") + cmd.Dir = bcfg.SrcDir + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + // Cleanup is best effort, there might not be anything to clean up + // if we fail early enough in the build process. + _ = cmd.Run() + }() + + // Configure the build env. + env := cfg.BuildEnv.Env + env = env.Prefix("PATH", filepath.Join(cfg.GoRoot, "bin")+":") + env = env.MustSet("GOROOT=" + cfg.GoRoot) + + // Use bazel to generate the artifacts needed to enable a `go build`. + cmd := exec.Command(bazel(), "run", "//pkg/gen:code") + cmd.Dir = bcfg.SrcDir + cmd.Env = env.Collapse() + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return err + } + + // Build the c-deps needed. + cmd = exec.Command(bazel(), "run", "//pkg/cmd/generate-cgo:generate-cgo", "--run_under", fmt.Sprintf("cd %s && ", bcfg.SrcDir)) + cmd.Dir = bcfg.SrcDir + cmd.Env = env.Collapse() + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return err + } + + // Finally build the cockroach binary with `go build`. Build the + // cockroach-short binary as it is functionally the same, but + // without the UI, making it much quicker to build. + // + // As of go1.23, we need to pass the `-ldflags=-checklinkname=0` flag + // to build cockroach. However, benchmark release branches are on older + // versions that don't recognize the flag. Try first with the flag and + // again without if there is an error. + if buildWithFlagErr := cfg.GoTool().BuildPath(filepath.Join(bcfg.SrcDir, "pkg/cmd/cockroach-short"), bcfg.BinDir, "-ldflags=-checklinkname=0"); buildWithFlagErr != nil { + if buildWithoutFlagErr := cfg.GoTool().BuildPath(filepath.Join(bcfg.SrcDir, "pkg/cmd/cockroach-short"), bcfg.BinDir); buildWithoutFlagErr != nil { + return errors.Join(buildWithFlagErr, buildWithoutFlagErr) + } + } + + // Rename the binary from cockroach-short to cockroach for + // ease of use. + if err := copyFile(filepath.Join(bcfg.BinDir, "cockroach"), filepath.Join(bcfg.BinDir, "cockroach-short")); err != nil { + return err + } + + // Build the benchmark wrapper. + if err := cfg.GoTool().BuildPath(bcfg.BenchDir, filepath.Join(bcfg.BinDir, "cockroachdb-bench")); err != nil { + return err + } + + return nil +} + +func (h CockroachDB) Run(cfg *common.Config, rcfg *common.RunConfig) error { + benchmarks := []string{"kv0/nodes=1", "kv50/nodes=1", "kv95/nodes=1", "kv0/nodes=3", "kv50/nodes=3", "kv95/nodes=3"} + if rcfg.Short { + benchmarks = []string{"kv0/nodes=3", "kv95/nodes=3"} + } + + for _, bench := range benchmarks { + args := append(rcfg.Args, []string{ + "-bench", bench, + "-cockroachdb-bin", filepath.Join(rcfg.BinDir, "cockroach"), + "-tmp", rcfg.TmpDir, + }...) + if rcfg.Short { + args = append(args, "-short") + } + // The short benchmarks take about 1 minute to run. + // The long benchmarks take about 10 minutes to run. + // We set the timeout to 15 minutes to give some buffer. + cmd := exec.Command( + filepath.Join(rcfg.BinDir, "cockroachdb-bench"), + args..., + ) + cmd.Env = cfg.ExecEnv.Collapse() + cmd.Stdout = rcfg.Results + cmd.Stderr = rcfg.Results + log.TraceCommand(cmd, false) + if err := cmd.Start(); err != nil { + return err + } + if rcfg.Short { + if err := cmd.Wait(); err != nil { + return err + } + } else { + // Wait for 15 minutes. + c := make(chan error) + go func() { + c <- cmd.Wait() + }() + select { + case <-c: + case <-time.After(15 * time.Minute): + if err := cmd.Process.Kill(); err != nil { + return fmt.Errorf("timeout, error killing process: %s", err.Error()) + } + return fmt.Errorf("timeout") + } + } + + // Delete tmp because cockroachdb will have written something there and + // might attempt to reuse it. We don't want to reuse the same cluster. + if err := rmDirContents(rcfg.TmpDir); err != nil { + return err + } + } + return nil +} diff --git a/sweet/harnesses/common.go b/sweet/harnesses/common.go index 92b4dd8..0318167 100644 --- a/sweet/harnesses/common.go +++ b/sweet/harnesses/common.go @@ -20,6 +20,18 @@ func gitShallowClone(dir, url, ref string) error { return err } +func gitRecursiveCloneToCommit(dir, url, branch, hash string) error { + cloneCmd := exec.Command("git", "clone", "--recursive", "--shallow-submodules", "-b", branch, url, dir) + log.TraceCommand(cloneCmd, false) + if _, err := cloneCmd.Output(); err != nil { + return err + } + checkoutCmd := exec.Command("git", "-C", dir, "checkout", hash) + log.TraceCommand(checkoutCmd, false) + _, err := checkoutCmd.Output() + return err +} + func gitCloneToCommit(dir, url, branch, hash string) error { cloneCmd := exec.Command("git", "clone", "-b", branch, url, dir) log.TraceCommand(cloneCmd, false)