mgomes · mgomes · Feb 23, 2026 · Feb 21, 2026 · Feb 21, 2026 · Feb 21, 2026
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
@@ -5,6 +5,8 @@ on:
     branches: [master]
   pull_request:
   workflow_dispatch:
+  schedule:
+    - cron: "0 6 * * 1"
 
 jobs:
   benchmark:
@@ -18,23 +20,71 @@ jobs:
         with:
           go-version-file: go.mod
 
+      - name: Configure benchmark profile
+        run: |
+          if [[ "${{ github.event_name }}" == "schedule" ]]; then
+            echo "BENCH_COUNT=1" >> "$GITHUB_ENV"
+            echo "BENCH_TIME=2s" >> "$GITHUB_ENV"
+            echo "BENCH_BASELINE=benchmarks/baselines/v0.20.0-full.txt" >> "$GITHUB_ENV"
+            echo "BENCH_PROFILE=full" >> "$GITHUB_ENV"
+          else
+            echo "BENCH_COUNT=1" >> "$GITHUB_ENV"
+            echo "BENCH_TIME=1s" >> "$GITHUB_ENV"
+            echo "BENCH_BASELINE=benchmarks/baselines/v0.20.0-pr.txt" >> "$GITHUB_ENV"
+            echo "BENCH_PROFILE=pr" >> "$GITHUB_ENV"
+          fi
+
+      - name: Benchmark smoke gates
+        if: github.event_name != 'schedule'
+        run: |
+          set -o pipefail
+          mkdir -p .bench
+          ./scripts/bench_smoke_check.sh | tee .bench/smoke.txt
+
       - name: Run benchmarks
         run: |
           mkdir -p .bench
-          go test ./vibes -run '^$' -bench '^BenchmarkExecution' -benchmem | tee .bench/benchmark.txt
+          scripts/bench_runtime.sh \
+            --count "$BENCH_COUNT" \
+            --benchtime "$BENCH_TIME" \
+            --out ".bench/benchmark-$BENCH_PROFILE.txt"
+
+      - name: Compare benchmark trend against baseline
+        run: |
+          ./scripts/bench_compare_baseline.sh \
+            "$BENCH_BASELINE" \
+            ".bench/benchmark-$BENCH_PROFILE.txt" | tee ".bench/trend-$BENCH_PROFILE.txt"
 
       - name: Publish benchmark summary
         run: |
           {
-            echo "## Benchmark Results"
+            if [[ -f .bench/smoke.txt ]]; then
+              echo "## Benchmark Smoke Gates"
+              echo ""
+              echo '```text'
+              cat .bench/smoke.txt
+              echo '```'
+              echo ""
+            fi
+            echo "## Benchmark Trend vs Baseline ($BENCH_PROFILE)"
+            echo ""
+            echo '```text'
+            cat ".bench/trend-$BENCH_PROFILE.txt"
+            echo '```'
+            echo ""
+            echo "## Benchmark Results ($BENCH_PROFILE)"
             echo ""
             echo '```text'
-            cat .bench/benchmark.txt
+            cat ".bench/benchmark-$BENCH_PROFILE.txt"
             echo '```'
           } >> "$GITHUB_STEP_SUMMARY"
 
       - name: Upload benchmark artifact
         uses: actions/upload-artifact@v4
         with:
-          name: benchmark-results
-          path: .bench/benchmark.txt
+          name: benchmark-results-${{ github.event_name }}-${{ github.run_number }}
+          path: |
+            .bench/benchmark-*.txt
+            .bench/trend-*.txt
+            .bench/smoke.txt
+          if-no-files-found: warn
diff --git a/Justfile b/Justfile
@@ -4,7 +4,10 @@ test:
 	go test ./...
 
 bench:
-	go test ./vibes -run '^$' -bench '^BenchmarkExecution' -benchmem
+	scripts/bench_runtime.sh
+
+bench-profile pattern='^BenchmarkExecutionArrayPipeline$':
+	scripts/bench_profile.sh --pattern "{{pattern}}"
 
 lint:
 	gofmt -l . | (! read)

diff --git a/ROADMAP.md b/ROADMAP.md
@@ -367,3 +367,54 @@ Goal: lock the language and embedding API for long-term support.
 - [x] Zero known P0/P1 correctness bugs.
 - [x] CI green across supported platforms and Go versions.
 - [x] Release process rehearsed and repeatable.
+
+---
+
+## v0.20.0 - Performance and Benchmarking (1.0 Push)
+
+Goal: make performance improvements measurable, repeatable, and protected against regressions.
+
+### Runtime Performance
+
+- [x] Profile evaluator hotspots and prioritize top 3 CPU paths by cumulative time.
+- [x] Reduce `Script.Call` overhead for short-running scripts (frame/env setup and teardown).
+- [x] Optimize method dispatch and member access fast paths.
+- [x] Reduce allocations in common collection transforms (`map`, `select`, `reduce`, `chunk`, `window`).
+- [x] Optimize typed argument/return validation for nested composite types.
+
+### Memory and Allocation Discipline
+
+- [x] Reduce transient allocations in stdlib JSON/Regex/String helper paths.
+- [x] Reduce temporary map/array churn in module and capability boundary code paths.
+- [x] Add per-benchmark allocation targets (`allocs/op`) for hot runtime paths.
+- [x] Add focused regression tests for high-allocation call patterns.
+
+### Benchmark Coverage
+
+- [x] Expand benchmark suite for compile, call, control-flow, and typed-runtime workloads.
+- [x] Add capability-heavy benchmarks (db/events/context adapters + contract validation).
+- [x] Add module-system benchmarks (`require`, cache hits, cache misses, cycle paths).
+- [x] Add stdlib benchmarks for JSON/Regex/Time/String/Array/Hash hot operations.
+- [x] Add representative end-to-end benchmarks using `tests/complex/*.vibe` workloads.
+
+### Benchmark Tooling and CI
+
+- [x] Add a single benchmark runner command/script with stable flags and output format.
+- [x] Persist benchmark baselines in versioned artifacts for release comparison.
+- [x] Add PR-time benchmark smoke checks with threshold-based alerts.
+- [x] Add scheduled full benchmark runs with trend reporting.
+- [x] Document benchmark interpretation and triage workflow.
+
+### Profiling and Diagnostics
+
+- [x] Add reproducible CPU profile capture workflow for compile and runtime benchmarks.
+- [x] Add memory profile capture workflow for allocation-heavy scenarios.
+- [x] Add flamegraph generation instructions and hotspot triage checklist.
+- [x] Add a short "performance playbook" for validating optimizations before merge.
+
+### v0.20.0 Definition of Done
+
+- [x] Benchmarks cover runtime, capability, module, and stdlib hot paths.
+- [x] CI reports benchmark deltas for guarded smoke benchmarks.
+- [x] Measurable improvements are achieved before the v1.0.0 release tag.
+- [x] Performance and benchmarking workflows are documented and maintainable.
diff --git a/benchmarks/baselines/README.md b/benchmarks/baselines/README.md
@@ -0,0 +1,26 @@
+# Benchmark Baselines
+
+This directory stores versioned benchmark baseline artifacts for release
+comparison.
+
+## Files
+
+- `v0.20.0-pr.txt`: baseline for PR/push benchmark profile.
+  - Generated with: `scripts/bench_runtime.sh --count 1 --benchtime 1s`
+- `v0.20.0-full.txt`: baseline for scheduled full benchmark profile.
+  - Generated with: `scripts/bench_runtime.sh --count 1 --benchtime 2s`
+
+## Usage
+
+Compare a current run against a baseline:
+
+```bash
+scripts/bench_compare_baseline.sh benchmarks/baselines/v0.20.0-pr.txt benchmarks/latest.txt
+```
+
+## Updating Baselines
+
+1. Run benchmark profile with stable settings for the target release.
+2. Write output to a new versioned file in this directory.
+3. Keep prior baseline files for historical comparison.
+4. Update workflow/docs references if the active baseline changes.
diff --git a/benchmarks/baselines/v0.20.0-full.txt b/benchmarks/baselines/v0.20.0-full.txt
@@ -0,0 +1,41 @@
+# VibeScript benchmark run
+# timestamp: 2026-02-21T18:34:37Z
+# git_commit: cbe5e6a
+# go_version: go version go1.26.0 darwin/arm64
+# package: ./vibes
+# bench_pattern: ^Benchmark
+# count: 1
+# benchtime: 2s
+# cpu: 1
+# command: go test ./vibes -run ^$ -bench ^Benchmark -benchmem -count 1 -benchtime 2s -cpu 1
+
+goos: darwin
+goarch: arm64
+pkg: github.com/mgomes/vibescript/vibes
+cpu: Apple M1 Max
+BenchmarkExecutionArithmeticLoop         	    1971	   1041766 ns/op	    9144 B/op	     553 allocs/op
+BenchmarkExecutionArrayPipeline          	     139	  17398372 ns/op	  928432 B/op	   15350 allocs/op
+BenchmarkExecutionMethodDispatchLoop     	     841	   2829313 ns/op	  199664 B/op	    2779 allocs/op
+BenchmarkExecutionCapabilityFindLoop     	     541	   4433641 ns/op	  395376 B/op	    5571 allocs/op
+BenchmarkExecutionJSONParseLoop          	    2347	   1093538 ns/op	  206192 B/op	    3723 allocs/op
+BenchmarkExecutionJSONStringifyLoop      	    3628	    663761 ns/op	  102724 B/op	    2464 allocs/op
+BenchmarkExecutionRegexReplaceAllLoop    	    3571	    732541 ns/op	  157950 B/op	    2750 allocs/op
+BenchmarkExecutionTallyLoop              	     697	   3442165 ns/op	 2252168 B/op	    1097 allocs/op
+BenchmarkCompileControlFlowWorkload      	  180141	     13414 ns/op	    9664 B/op	     180 allocs/op
+BenchmarkCompileTypedWorkload            	  128024	     18808 ns/op	   12616 B/op	     230 allocs/op
+BenchmarkCompileMassiveWorkload          	   12726	    191940 ns/op	  144816 B/op	    2243 allocs/op
+BenchmarkCallShortScript                 	  427030	      5683 ns/op	    3128 B/op	      25 allocs/op
+BenchmarkCallControlFlowWorkload         	    3946	    622062 ns/op	    5128 B/op	      52 allocs/op
+BenchmarkCallTypedCompositeValidation    	     268	   8891817 ns/op	 5259743 B/op	   55330 allocs/op
+BenchmarkExecutionCapabilityWorkflowLoop 	     270	   8306004 ns/op	 1100952 B/op	   12304 allocs/op
+BenchmarkExecutionTimeParseFormatLoop    	    3877	    656296 ns/op	   33144 B/op	     830 allocs/op
+BenchmarkExecutionStringNormalizeLoop    	    2944	    826423 ns/op	   72824 B/op	    2190 allocs/op
+BenchmarkExecutionHashTransformLoop      	     702	   3594562 ns/op	  534168 B/op	    4736 allocs/op
+BenchmarkModuleRequireCacheHit           	   42799	     56288 ns/op	   16032 B/op	     151 allocs/op
+BenchmarkModuleRequireCacheMiss          	   35266	     67874 ns/op	   20080 B/op	     195 allocs/op
+BenchmarkModuleRequireCyclePath          	   42578	     58229 ns/op	   17136 B/op	     200 allocs/op
+BenchmarkComplexRunAnalytics             	    6595	    317337 ns/op	   35584 B/op	     797 allocs/op
+BenchmarkComplexRunTyped                 	   38694	     63427 ns/op	   10032 B/op	      88 allocs/op
+BenchmarkComplexRunMassive               	     598	   3825029 ns/op	  192832 B/op	    1266 allocs/op
+PASS
+ok  	github.com/mgomes/vibescript/vibes	68.686s
diff --git a/benchmarks/baselines/v0.20.0-pr.txt b/benchmarks/baselines/v0.20.0-pr.txt
@@ -0,0 +1,41 @@
+# VibeScript benchmark run
+# timestamp: 2026-02-21T18:31:59Z
+# git_commit: cbe5e6a
+# go_version: go version go1.26.0 darwin/arm64
+# package: ./vibes
+# bench_pattern: ^Benchmark
+# count: 1
+# benchtime: 1s
+# cpu: 1
+# command: go test ./vibes -run ^$ -bench ^Benchmark -benchmem -count 1 -benchtime 1s -cpu 1
+
+goos: darwin
+goarch: arm64
+pkg: github.com/mgomes/vibescript/vibes
+cpu: Apple M1 Max
+BenchmarkExecutionArithmeticLoop         	     990	   1041161 ns/op	    9144 B/op	     553 allocs/op
+BenchmarkExecutionArrayPipeline          	      72	  17167602 ns/op	  928433 B/op	   15350 allocs/op
+BenchmarkExecutionMethodDispatchLoop     	     430	   2768843 ns/op	  199664 B/op	    2779 allocs/op
+BenchmarkExecutionCapabilityFindLoop     	     272	   4467332 ns/op	  395376 B/op	    5571 allocs/op
+BenchmarkExecutionJSONParseLoop          	    1134	   1093307 ns/op	  206192 B/op	    3723 allocs/op
+BenchmarkExecutionJSONStringifyLoop      	    1914	    664214 ns/op	  102724 B/op	    2464 allocs/op
+BenchmarkExecutionRegexReplaceAllLoop    	    1718	    736594 ns/op	  157950 B/op	    2750 allocs/op
+BenchmarkExecutionTallyLoop              	     348	   3444825 ns/op	 2252168 B/op	    1097 allocs/op
+BenchmarkCompileControlFlowWorkload      	   89504	     13904 ns/op	    9664 B/op	     180 allocs/op
+BenchmarkCompileTypedWorkload            	   61092	     19634 ns/op	   12616 B/op	     230 allocs/op
+BenchmarkCompileMassiveWorkload          	    6313	    191390 ns/op	  144816 B/op	    2243 allocs/op
+BenchmarkCallShortScript                 	  206752	      5783 ns/op	    3128 B/op	      25 allocs/op
+BenchmarkCallControlFlowWorkload         	    1987	    617088 ns/op	    5128 B/op	      52 allocs/op
+BenchmarkCallTypedCompositeValidation    	     134	   8886666 ns/op	 5259744 B/op	   55330 allocs/op
+BenchmarkExecutionCapabilityWorkflowLoop 	     134	   8222791 ns/op	 1100952 B/op	   12304 allocs/op
+BenchmarkExecutionTimeParseFormatLoop    	    1954	    662865 ns/op	   33144 B/op	     830 allocs/op
+BenchmarkExecutionStringNormalizeLoop    	    1449	    833651 ns/op	   72824 B/op	    2190 allocs/op
+BenchmarkExecutionHashTransformLoop      	     348	   3596853 ns/op	  534168 B/op	    4736 allocs/op
+BenchmarkModuleRequireCacheHit           	   21824	     56145 ns/op	   16032 B/op	     151 allocs/op
+BenchmarkModuleRequireCacheMiss          	   17610	     68171 ns/op	   20080 B/op	     195 allocs/op
+BenchmarkModuleRequireCyclePath          	   21298	     55919 ns/op	   17136 B/op	     200 allocs/op
+BenchmarkComplexRunAnalytics             	    4160	    310447 ns/op	   35584 B/op	     797 allocs/op
+BenchmarkComplexRunTyped                 	   19564	     62612 ns/op	   10032 B/op	      88 allocs/op
+BenchmarkComplexRunMassive               	     303	   4060923 ns/op	  192832 B/op	    1266 allocs/op
+PASS
+ok  	github.com/mgomes/vibescript/vibes	36.595s
diff --git a/benchmarks/hotspots_v0.20.0.md b/benchmarks/hotspots_v0.20.0.md
@@ -0,0 +1,33 @@
+# v0.20.0 Hotspot Priorities
+
+Profile run date: 2026-02-21
+
+Command:
+
+```bash
+scripts/bench_profile.sh \
+  --pattern '^(BenchmarkCallShortScript|BenchmarkExecutionCapabilityFindLoop|BenchmarkExecutionArrayPipeline)$' \
+  --benchtime 1s
+```
+
+CPU profile source: `benchmarks/profiles/v0.20.0-hotspots/cpu.top.txt`
+
+## Top 3 CPU Paths (by cumulative time)
+
+1. `(*Execution).estimateMemoryUsage` (~49.77% cumulative)
+2. `(*memoryEstimator).env` (~46.03% cumulative)
+3. `(*memoryEstimator).value` (~35.75% cumulative)
+
+## Supporting allocation signals
+
+From `benchmarks/profiles/v0.20.0-hotspots/mem.top.txt`:
+
+- `newExecutionForCall` is the largest allocator (~35.55% alloc space).
+- `newEnvWithCapacity` is the second largest allocator (~24.87% alloc space).
+- `(*Env).Define` remains a top allocator (~6.96% alloc space).
+
+## Priority order for next optimization passes
+
+1. Cut memory-estimation traversal cost (`estimateMemoryUsage` + estimator walkers).
+2. Reduce env/map churn in call setup (`newExecutionForCall`, `newEnvWithCapacity`, `Env.Define`).
+3. Re-check capability-path call overhead after call-setup optimizations.
diff --git a/benchmarks/smoke_thresholds.txt b/benchmarks/smoke_thresholds.txt
@@ -0,0 +1,18 @@
+# Benchmark smoke thresholds for CI guardrails.
+# Format:
+# <benchmark_name> <max_ns_per_op> <max_allocs_per_op>
+#
+# Keep thresholds intentionally loose enough to avoid platform flake,
+# but tight enough to catch obvious regressions.
+
+BenchmarkExecutionArithmeticLoop 5000000 1000
+BenchmarkExecutionArrayPipeline 30000000 20000
+BenchmarkExecutionMethodDispatchLoop 8000000 6000
+BenchmarkExecutionCapabilityFindLoop 12000000 9000
+BenchmarkExecutionCapabilityWorkflowLoop 15000000 15000
+BenchmarkExecutionJSONParseLoop 3000000 5000
+BenchmarkExecutionJSONStringifyLoop 3000000 4000
+BenchmarkExecutionRegexReplaceAllLoop 4000000 4000
+BenchmarkExecutionTallyLoop 10000000 2000
+BenchmarkCallShortScript 20000 60
+BenchmarkModuleRequireCacheHit 120000 220