Skip to content

Commit

Permalink
sweet: don't sleep until cockroachdb cluster workload is ready
Browse files Browse the repository at this point in the history
Currently there's a time.Sleep to wait until the cluster workload is
ready for the CockroachDB benchmark, but I suspect this is flaky. Run
the benchmarking tool with a really small duration instead to try and
identify whether the cluster is ready.

Change-Id: Ie79c930442e0a7d8c5b1d9951974472f50178fd9
Reviewed-on: https://go-review.googlesource.com/c/benchmarks/+/615418
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
  • Loading branch information
mknyszek authored and gopherbot committed Sep 24, 2024
1 parent ffe4d68 commit fdb6f75
Showing 1 changed file with 28 additions and 11 deletions.
39 changes: 28 additions & 11 deletions sweet/benchmarks/cockroachdb/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ type benchmark struct {
args []string
longArgs []string // if !config.short
shortArgs []string // if config.short
pingArgs []string
metricTypes []string
timeout time.Duration
}
Expand Down Expand Up @@ -367,7 +368,7 @@ func kvBenchmark(readPercent int, nodeCount int) benchmark {
"--max-block-bytes=1024",
"--concurrency=10000",
"--max-rate=30000",
//Pre-splitting and scattering the ranges should help stabilize results.
// Pre-splitting and scattering the ranges should help stabilize results.
"--scatter",
"--splits=5",
},
Expand All @@ -381,6 +382,11 @@ func kvBenchmark(readPercent int, nodeCount int) benchmark {
"--ramp=5s",
"--duration=30s",
},
// Just to ping whether the workload is ready.
pingArgs: []string{
"--ramp=0s",
"--duration=500ms",
},
}
}

Expand All @@ -407,17 +413,28 @@ func runBenchmark(b *driver.B, cfg *config, instances []*cockroachdbInstance) (e
var stdout, stderr bytes.Buffer
initCmd.Stdout = &stdout
initCmd.Stderr = &stderr
if err = initCmd.Run(); err != nil {
if err := initCmd.Run(); err != nil {
return err
}

log.Println("sleeping")

// If we try and start the workload right after loading in the schema
// it will spam us with database does not exist errors. We could repeatedly
// retry until the database exists by parsing the output, or we can just
// wait 5 seconds.
time.Sleep(5 * time.Second)
// Make sure the server is ready to accept work by pinging it with very short
// benchmark runs. If they fail, we assume that the server isn't ready.
log.Println("pinging server with benchmark tool")
pingArgs := cfg.bench.args
pingArgs = append(pingArgs, cfg.bench.pingArgs...)
pingArgs = append(pingArgs, pgurls...)
pingCmd := exec.Command(cfg.cockroachdbBin, pingArgs...)
pingStart := time.Now()
var pingOutput []byte
var pingErr error
for time.Now().Sub(pingStart) < 30*time.Second {
if pingOutput, pingErr = pingCmd.CombinedOutput(); pingErr == nil {
break
}
}
if pingErr != nil {
return fmt.Errorf("workload failed to become available within timeout: error: %v: output:\n%s", pingErr, pingOutput)
}

args := cfg.bench.args
if cfg.short {
Expand All @@ -427,7 +444,7 @@ func runBenchmark(b *driver.B, cfg *config, instances []*cockroachdbInstance) (e
}
args = append(args, pgurls...)

log.Println("running benchmark timeout")
log.Println("running benchmark tool")
cmd := exec.Command(cfg.cockroachdbBin, args...)
fmt.Fprintln(os.Stderr, cmd.String())

Expand All @@ -446,7 +463,7 @@ func runBenchmark(b *driver.B, cfg *config, instances []*cockroachdbInstance) (e
var benchmarkErr error
go func() {
b.ResetTimer()
if err = cmd.Run(); err != nil {
if err := cmd.Run(); err != nil {
benchmarkErr = err
}
b.StopTimer()
Expand Down

0 comments on commit fdb6f75

Please sign in to comment.