elastic
diff --git a/‎.github/workflows/benchmarks.yml‎
Lines changed: 5 additions & 1 deletion b/‎.github/workflows/benchmarks.yml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎.github/workflows/benchmarks/README.md‎
Lines changed: 90 additions & 0 deletions b/‎.github/workflows/benchmarks/README.md‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎.github/workflows/benchmarks/analyze-artifacts.sh‎
Lines changed: 10 additions & 0 deletions b/‎.github/workflows/benchmarks/analyze-artifacts.sh‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎.github/workflows/benchmarks/download-artifacts.sh‎
Lines changed: 26 additions & 0 deletions b/‎.github/workflows/benchmarks/download-artifacts.sh‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎.github/workflows/benchmarks/start-workflows.sh‎
Lines changed: 45 additions & 0 deletions b/‎.github/workflows/benchmarks/start-workflows.sh‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎systemtest/benchtest/expvar/metrics.go‎
Lines changed: 1 addition & 1 deletion b/‎systemtest/benchtest/expvar/metrics.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎systemtest/benchtest/main.go‎
Lines changed: 25 additions & 8 deletions b/‎systemtest/benchtest/main.go‎
Lines changed: 25 additions & 8 deletions
diff --git a/‎systemtest/benchtest/profiles.go‎
Lines changed: 2 additions & 3 deletions b/‎systemtest/benchtest/profiles.go‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎testing/benchmark/Makefile‎
Lines changed: 14 additions & 5 deletions b/‎testing/benchmark/Makefile‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎testing/benchmark/main.tf‎
Lines changed: 7 additions & 2 deletions b/‎testing/benchmark/main.tf‎
Lines changed: 7 additions & 2 deletions
@@ -2,7 +2,7 @@ name: benchmarks
 
 # Secret access requires allowlist in https://github.com/elastic/oblt-infra/tree/main/conf/resources/repos/apm-server
 
-# To call benchmarks workflow programmatically with different params, use the `gh workflow run` command.
+# To call benchmarks workflow programmatically with different params, use the scripts in .github/workflows/benchmarks
 
 on:
   workflow_dispatch:
@@ -203,6 +203,10 @@ jobs:
         if: ${{ env.RUN_STANDALONE == 'true' && failure() }}
         run: make cat-apm-server-logs
 
+      - name: Cat moxy logs
+        if: ${{ env.RUN_STANDALONE == 'true' && failure() }}
+        run: make cat-moxy-logs
+
       - name: Index benchmarks result
         run: make index-benchmark-results
 
 
@@ -0,0 +1,90 @@
+# Benchmark Scripts
+
+This directory contains utility scripts for users to run locally to kickstart and analyze APM Server benchmark workflows in different configurations.
+
+The scripts here are used to generate the TBS benchmark docs at https://www.elastic.co/docs/solutions/observability/apm/transaction-sampling#_tail_based_sampling_performance_and_requirements
+
+To generate non-TBS benchmark, modify the benchmarks workflow inputs in `start-workflows.sh`, and use the other scripts as is.
+
+## Workflow
+
+1. **Run benchmarks**: Execute `start-workflows.sh` to trigger multiple benchmark workflow runs on GitHub and save run IDs to `${BENCH_BRANCH}.txt`
+2. **Wait for completion**: Monitor the workflow runs on GitHub Actions until all benchmarks finish
+3. **Download results**: Run `download-artifacts.sh` to fetch all benchmark artifacts using the saved run IDs
+4. **Analyze results**: Run `analyze-artifacts.sh` to generate benchstat analysis for each result
+5. **Summarize for documentation**: Create a summary table from the benchstat results for the documentation (can be done manually or using an LLM to extract geomean values from each `benchstat.txt` file)
+
+```bash
+# Run benchmarks on the tbs-arm-bench-92 branch
+BENCH_BRANCH=tbs-arm-bench-92 ./start-workflows.sh
+
+# Wait for workflows to complete on GitHub Actions
+
+# Download all benchmark results using the saved run IDs
+BENCH_BRANCH=tbs-arm-bench-92 ./download-artifacts.sh
+
+# Analyze the results by producing benchstat
+./analyze-artifacts.sh
+```
+
+## start-workflows.sh
+
+Triggers multiple benchmark workflows on GitHub Actions with different instance configurations.
+
+### Usage
+
+```bash
+BENCH_BRANCH=tbs-arm-bench-92 ./start-workflows.sh
+```
+
+### Environment Variables
+
+- `BENCH_BRANCH` (required): The git branch/ref to run the benchmarks against
+
+### Description
+
+This script triggers 9 benchmark workflow runs with different configurations:
+- Tests 8GB, 16GB, and 32GB instance profiles
+- Compares 3 different TBS disabled vs enabled and disk configurations
+
+The script runs configurations sequentially with 2-second delays between each and automatically saves each workflow run ID to `${BENCH_BRANCH}.txt` for later use with `download-artifacts.sh`.
+
+The script can be modified to run in any other configurations, e.g. non-TBS.
+
+## download-artifacts.sh
+
+Downloads benchmark result artifacts from completed GitHub workflow runs.
+
+### Usage
+
+```bash
+# Download all benchmark results (after running start-workflows.sh)
+BENCH_BRANCH=tbs-arm-bench-92 ./download-artifacts.sh
+```
+
+### Environment Variables
+
+- `BENCH_BRANCH` (required): The git branch/ref name, used to locate the file containing workflow run IDs
+
+### Input
+
+Reads workflow run IDs from `${BENCH_BRANCH}.txt` file (one per line). Each benchmark result is downloaded to a numbered directory (`benchmark-result-1`, `benchmark-result-2`, etc.).
+
+## analyze-artifacts.sh
+
+Generates benchstat analysis for each downloaded benchmark result.
+
+### Usage
+
+```bash
+./analyze-artifacts.sh
+# Creates benchmark-result-1/benchstat.txt, benchmark-result-2/benchstat.txt, etc.
+```
+
+### Description
+
+For each `benchmark-result-*/benchmark-result.txt` file, runs `benchstat` and saves the output to `benchstat.txt` in the same directory.
+
+### Requirements
+
+- `benchstat` must be installed (`go install golang.org/x/perf/cmd/benchstat@latest`)
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# Analyze each benchmark result
+for dir in benchmark-result-*/; do
+  if [ -f "$dir/benchmark-result.txt" ]; then
+    echo "=== Analyzing $dir ==="
+    benchstat "$dir/benchmark-result.txt" > "$dir/benchstat.txt"
+    echo "Saved to $dir/benchstat.txt"
+  fi
+done
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Validate BENCH_BRANCH environment variable
+if [ -z "$BENCH_BRANCH" ]; then
+  echo "Error: BENCH_BRANCH environment variable is not set or is empty"
+  exit 1
+fi
+
+# Read workflow run IDs from ${BENCH_BRANCH}.txt
+if [ ! -f "${BENCH_BRANCH}.txt" ]; then
+  echo "Error: File ${BENCH_BRANCH}.txt not found"
+  exit 1
+fi
+
+mapfile -t run_ids < "${BENCH_BRANCH}.txt"
+
+# Download each benchmark result and rename
+for i in "${!run_ids[@]}"; do
+  run_id="${run_ids[$i]}"
+  result_num=$((i + 1))
+  
+  echo "Downloading run $run_id as benchmark-result-$result_num..."
+  gh run download "$run_id" -n benchmark-result -D "benchmark-result-$result_num"
+done
+
+echo "Done!"
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+# Validate BENCH_BRANCH environment variable
+if [ -z "$BENCH_BRANCH" ]; then
+  echo "Error: BENCH_BRANCH environment variable is not set or is empty. Set it to the branch on which the workflow should run."
+  exit 1
+fi
+
+# 8GB, TBS disabled
+gh workflow run benchmarks.yml --ref "$BENCH_BRANCH" -f runStandalone=true -f enableTailSampling=false -f tailSamplingStorageLimit=0 -f tailSamplingSampleRate=0.1 -f profile=system-profiles/8GB_ARM-x1zone.tfvars -f pgoExport=false -f benchmarkAgents=1024 -f benchmarkRun=BenchmarkTraces -f warmupTime=5m
+gh run list --workflow=benchmarks.yml --branch "$BENCH_BRANCH" --json 'databaseId' -q '.[0].databaseId' >> "${BENCH_BRANCH}.txt"
+sleep 2
+# 8GB, TBS enabled, gp3 3000IOPS
+gh workflow run benchmarks.yml --ref "$BENCH_BRANCH" -f runStandalone=true -f enableTailSampling=true -f tailSamplingStorageLimit=0 -f tailSamplingSampleRate=0.1 -f profile=system-profiles/8GB_ARM-x1zone.tfvars -f pgoExport=false -f benchmarkAgents=1024 -f benchmarkRun=BenchmarkTraces -f warmupTime=5m
+gh run list --workflow=benchmarks.yml --branch "$BENCH_BRANCH" --json 'databaseId' -q '.[0].databaseId' >> "${BENCH_BRANCH}.txt"
+sleep 2
+# 8GB, TBS enabled, NVMe
+gh workflow run benchmarks.yml --ref "$BENCH_BRANCH" -f runStandalone=true -f enableTailSampling=true -f tailSamplingStorageLimit=0 -f tailSamplingSampleRate=0.1 -f profile=system-profiles/8GB_NVMe_ARM-x1zone.tfvars -f pgoExport=false -f benchmarkAgents=1024 -f benchmarkRun=BenchmarkTraces -f warmupTime=5m
+gh run list --workflow=benchmarks.yml --branch "$BENCH_BRANCH" --json 'databaseId' -q '.[0].databaseId' >> "${BENCH_BRANCH}.txt"
+
+sleep 2
+# 16GB, TBS disabled
+gh workflow run benchmarks.yml --ref "$BENCH_BRANCH" -f runStandalone=true -f enableTailSampling=false -f tailSamplingStorageLimit=0 -f tailSamplingSampleRate=0.1 -f profile=system-profiles/16GB_ARM-x2zone.tfvars -f pgoExport=false -f benchmarkAgents=1024 -f benchmarkRun=BenchmarkTraces -f warmupTime=5m
+gh run list --workflow=benchmarks.yml --branch "$BENCH_BRANCH" --json 'databaseId' -q '.[0].databaseId' >> "${BENCH_BRANCH}.txt"
+sleep 2
+# 16GB, TBS enabled, gp3 3000IOPS
+gh workflow run benchmarks.yml --ref "$BENCH_BRANCH" -f runStandalone=true -f enableTailSampling=true -f tailSamplingStorageLimit=0 -f tailSamplingSampleRate=0.1 -f profile=system-profiles/16GB_ARM-x2zone.tfvars -f pgoExport=false -f benchmarkAgents=1024 -f benchmarkRun=BenchmarkTraces -f warmupTime=5m
+gh run list --workflow=benchmarks.yml --branch "$BENCH_BRANCH" --json 'databaseId' -q '.[0].databaseId' >> "${BENCH_BRANCH}.txt"
+sleep 2
+# 16GB, TBS enabled, NVMe
+gh workflow run benchmarks.yml --ref "$BENCH_BRANCH" -f runStandalone=true -f enableTailSampling=true -f tailSamplingStorageLimit=0 -f tailSamplingSampleRate=0.1 -f profile=system-profiles/16GB_NVMe_ARM-x2zone.tfvars -f pgoExport=false -f benchmarkAgents=1024 -f benchmarkRun=BenchmarkTraces -f warmupTime=5m
+gh run list --workflow=benchmarks.yml --branch "$BENCH_BRANCH" --json 'databaseId' -q '.[0].databaseId' >> "${BENCH_BRANCH}.txt"
+
+sleep 2
+# 32GB, TBS disabled
+gh workflow run benchmarks.yml --ref "$BENCH_BRANCH" -f runStandalone=true -f enableTailSampling=false -f tailSamplingStorageLimit=0 -f tailSamplingSampleRate=0.1 -f profile=system-profiles/32GB_ARM-x2zone.tfvars -f pgoExport=false -f benchmarkAgents=1024 -f benchmarkRun=BenchmarkTraces -f warmupTime=5m
+gh run list --workflow=benchmarks.yml --branch "$BENCH_BRANCH" --json 'databaseId' -q '.[0].databaseId' >> "${BENCH_BRANCH}.txt"
+sleep 2
+# 32GB, TBS enabled, gp3 3000IOPS
+gh workflow run benchmarks.yml --ref "$BENCH_BRANCH" -f runStandalone=true -f enableTailSampling=true -f tailSamplingStorageLimit=0 -f tailSamplingSampleRate=0.1 -f profile=system-profiles/32GB_ARM-x2zone.tfvars -f pgoExport=false -f benchmarkAgents=1024 -f benchmarkRun=BenchmarkTraces -f warmupTime=5m
+gh run list --workflow=benchmarks.yml --branch "$BENCH_BRANCH" --json 'databaseId' -q '.[0].databaseId' >> "${BENCH_BRANCH}.txt"
+sleep 2
+# 32GB, TBS enabled, NVMe
+gh workflow run benchmarks.yml --ref "$BENCH_BRANCH" -f runStandalone=true -f enableTailSampling=true -f tailSamplingStorageLimit=0 -f tailSamplingSampleRate=0.1 -f profile=system-profiles/32GB_NVMe_ARM-x2zone.tfvars -f pgoExport=false -f benchmarkAgents=1024 -f benchmarkRun=BenchmarkTraces -f warmupTime=5m
+gh run list --workflow=benchmarks.yml --branch "$BENCH_BRANCH" --json 'databaseId' -q '.[0].databaseId' >> "${BENCH_BRANCH}.txt"
@@ -263,7 +263,7 @@ func run(ctx context.Context, serverURL string, period time.Duration) (<-chan ex
 				return
 			case <-ticker.C:
 				var e expvar
-				ctxWithTimeout, cancel := context.WithTimeout(ctx, period)
+				ctxWithTimeout, cancel := context.WithTimeout(ctx, period+5*time.Second)
 				err := queryExpvar(ctxWithTimeout, &e, serverURL)
 				cancel()
 				if err != nil {
 
@@ -36,15 +36,14 @@ import (
 
 	"go.elastic.co/apm/v2/stacktrace"
 	"go.uber.org/zap"
-	"go.uber.org/zap/zaptest"
 	"golang.org/x/time/rate"
 
 	"github.com/elastic/apm-perf/loadgen"
 	loadgencfg "github.com/elastic/apm-perf/loadgen/config"
 	"github.com/elastic/apm-server/systemtest/benchtest/expvar"
 )
 
-const waitInactiveTimeout = 60 * time.Second
+const waitInactiveTimeout = 90 * time.Second
 
 // BenchmarkFunc is the benchmark function type accepted by Run.
 type BenchmarkFunc func(*testing.B, *rate.Limiter)
@@ -56,20 +55,35 @@ type benchmark struct {
 	f    BenchmarkFunc
 }
 
+// getLogger returns a logger that does not depend on b.Log because b.Log does not work in testing.Benchmark.
+// See https://github.com/golang/go/issues/32066
+// Log to stdout to avoid interfering with benchmark output in stderr.
+func getLogger() (*zap.Logger, error) {
+	c := zap.NewDevelopmentConfig()
+	c.OutputPaths = []string{"stdout"}
+	return c.Build()
+}
+
 func runBenchmark(f BenchmarkFunc) (testing.BenchmarkResult, bool, bool, error) {
+	logger, err := getLogger()
+	if err != nil {
+		return testing.BenchmarkResult{}, false, false, err
+	}
 	// Run the benchmark. testing.Benchmark will invoke the function
 	// multiple times, but only returns the final result.
 	var failed bool
 	var skipped bool
 	var collector *expvar.Collector
+	var reterr error
 	result := testing.Benchmark(func(b *testing.B) {
 		ctx, cancel := context.WithCancel(context.Background())
 		defer cancel()
 		var err error
 		server := loadgencfg.Config.ServerURL.String()
-		collector, err = expvar.StartNewCollector(ctx, server, 100*time.Millisecond, zaptest.NewLogger(b))
+		collector, err = expvar.StartNewCollector(ctx, server, 100*time.Millisecond, logger)
 		if err != nil {
-			b.Error(err)
+			reterr = fmt.Errorf("expvar.StartNewCollector error: %w", err)
+			b.Error(reterr)
 			failed = b.Failed()
 			return
 		}
@@ -93,9 +107,11 @@ func runBenchmark(f BenchmarkFunc) (testing.BenchmarkResult, bool, bool, error)
 		if !b.Failed() {
 			watcher, err := collector.WatchMetric(expvar.ActiveEvents, 0)
 			if err != nil {
-				b.Error(err)
+				reterr = fmt.Errorf("collector.WatchMetric error: %w", err)
+				b.Error(reterr)
 			} else if status := <-watcher; !status {
-				b.Error("failed to wait for APM server to be inactive")
+				reterr = fmt.Errorf("failed to wait for APM server to be inactive")
+				b.Error(reterr)
 			}
 		}
 		failed = b.Failed()
@@ -104,7 +120,7 @@ func runBenchmark(f BenchmarkFunc) (testing.BenchmarkResult, bool, bool, error)
 	if result.Extra != nil {
 		addExpvarMetrics(&result, collector, benchConfig.Detailed)
 	}
-	return result, failed, skipped, nil
+	return result, failed, skipped, reterr
 }
 
 func addExpvarMetrics(result *testing.BenchmarkResult, collector *expvar.Collector, detailed bool) {
@@ -234,7 +250,8 @@ func Run(allBenchmarks ...BenchmarkFunc) error {
 				profileChan := profiles.record(name)
 				result, failed, skipped, err := runBenchmark(benchmark.f)
 				if err != nil {
-					return err
+					fmt.Fprintf(os.Stderr, "--- FAIL: %s\n", name)
+					return fmt.Errorf("benchmark %q failed: %w", name, err)
 				}
 				if skipped {
 					continue
 
@@ -44,8 +44,7 @@ func fetchProfile(urlPath string, duration time.Duration) (*profile.Profile, err
 		query.Set("seconds", strconv.Itoa(int(duration.Seconds())))
 		req.URL.RawQuery = query.Encode()
 
-		timeout := time.Duration(float64(duration) * 1.5)
-		ctx := req.Context()
+		timeout := duration * 3
 		ctx, cancel := context.WithTimeout(req.Context(), timeout)
 		defer cancel()
 		req = req.WithContext(ctx)
@@ -108,7 +107,7 @@ func (p *profiles) recordCPU() error {
 		}
 		// We don't need the address in the profile, so discard it to reduce the size.
 		if err := profile.Aggregate(true, true, true, true, false); err != nil {
-			return fmt.Errorf("failed to fetch CPU profile: %w", err)
+			return fmt.Errorf("failed to aggregate CPU profile: %w", err)
 		}
 		profile = profile.Compact()
 		p.cpu = append(p.cpu, profile)
 
@@ -1,14 +1,18 @@
+TFVARS_SOURCE ?= terraform.tfvars.example
+
+# Automatically detect architecture based on instance type in TFVARS_SOURCE
+# If we need to change this, remember to grep for other instance type checks in the codebase.
+DETECTED_ARCH = $(shell grep -E "standalone_apm_server_instance_size|standalone_moxy_instance_size|worker_instance_type" $(TFVARS_SOURCE) | head -1 | cut -d'=' -f2 | tr -d ' "' | grep -qE '^(a1|t4g|c6g|c7g|m6g|m7g|r6g|r7g|x2gd)' && echo "arm64" || echo "amd64")
+
 APMBENCH_PATH ?= ../../systemtest/cmd/apmbench
 APMBENCH_GOOS ?= linux
-APMBENCH_GOARCH ?= amd64
+APMBENCH_GOARCH ?= $(DETECTED_ARCH)
 
 MOXY_GOOS ?= linux
-MOXY_GOARCH ?= amd64
+MOXY_GOARCH ?= $(DETECTED_ARCH)
 
 APM_SERVER_GOOS ?= linux
-APM_SERVER_GOARCH ?= amd64
-
-TFVARS_SOURCE ?= terraform.tfvars.example
+APM_SERVER_GOARCH ?= $(DETECTED_ARCH)
 
 BENCHMARK_WARMUP_TIME ?= 5m
 BENCHMARK_AGENTS ?= 64
@@ -137,6 +141,11 @@ endif
 cat-apm-server-logs:
 	@ssh $(SSH_OPTS) -i $(SSH_KEY) $(SSH_USER)@$(APM_SERVER_IP) "cat /var/log/apm-server/*"
 
+.PHONY: cat-moxy-logs
+cat-moxy-logs:
+	$(eval MOXY_IP = $(shell terraform output -raw moxy_ip))
+	@ssh $(SSH_OPTS) -i $(SSH_KEY) $(SSH_USER)@$(MOXY_IP) "cat moxy.log"
+
 $(SSH_KEY):
 	@ssh-keygen -t rsa -b 4096 -C "$(USER)@elastic.co" -N "" -f $(SSH_KEY)
 
 
@@ -44,6 +44,10 @@ provider "aws" {
 
 locals {
   name_prefix = "${coalesce(var.user_name, "unknown-user")}-bench"
+
+  # Detect if standalone APM server instance type is ARM (Graviton) based
+  # If we need to change this, remember to grep for other instance type checks in the codebase.
+  standalone_apm_is_arm = can(regex("^(a1|t4g|c6g|c7g|m6g|m7g|r6g|r7g|x2gd)", var.standalone_apm_server_instance_size))
 }
 
 module "vpc" {
@@ -153,8 +157,9 @@ module "standalone_apm_server" {
   count  = var.run_standalone ? 1 : 0
   source = "../infra/terraform/modules/standalone_apm_server"
 
-  vpc_id              = module.vpc.vpc_id
-  aws_os              = "al2023-ami-2023.*-x86_64"
+  vpc_id = module.vpc.vpc_id
+  # Use appropriate AMI pattern based on instance architecture
+  aws_os              = local.standalone_apm_is_arm ? "al2023-ami-2023" : "al2023-ami-2023.*-x86_64"
   apm_instance_type   = var.standalone_apm_server_instance_size
   apm_volume_type     = var.standalone_apm_server_volume_type
   apm_volume_size     = var.apm_server_tail_sampling ? coalesce(var.standalone_apm_server_volume_size, 60) : var.standalone_apm_server_volume_size