From 18f70589475686d058b8a3cdea736c670dfb6d30 Mon Sep 17 00:00:00 2001 From: Arpit Temani Date: Sun, 27 Oct 2024 12:13:54 +0530 Subject: [PATCH] draft for benchmarking --- .../workflows/cron_jerigon_zero_testing.yml | 49 +++++---- scripts/jerigon_zero_benchmark.sh | 94 ++++++++++++------ {test-data => test_data}/erigon-data.tar.gz | Bin 3 files changed, 90 insertions(+), 53 deletions(-) mode change 100644 => 100755 scripts/jerigon_zero_benchmark.sh rename {test-data => test_data}/erigon-data.tar.gz (100%) diff --git a/.github/workflows/cron_jerigon_zero_testing.yml b/.github/workflows/cron_jerigon_zero_testing.yml index 12b9a3d99..c3fa42044 100644 --- a/.github/workflows/cron_jerigon_zero_testing.yml +++ b/.github/workflows/cron_jerigon_zero_testing.yml @@ -6,7 +6,7 @@ on: # # Run every Sunday at 12:00 AM (UTC) # - cron: "0 0 * * 0" push: - branches: [develop, main] + branches: [develop] pull_request: branches: - "**" @@ -48,39 +48,44 @@ jobs: - name: Run erigon network run: | cd .. - tar xf "$(pwd)/zk_evm/test-data/erigon-data.tar.gz" + tar xf "$(pwd)/zk_evm/test_data/erigon-data.tar.gz" docker pull ghcr.io/0xpolygonzero/erigon:feat-zero docker run -d -p 8545:8545 -v $(pwd):/data \ ghcr.io/0xpolygonzero/erigon:feat-zero \ --datadir=/data/erigon/execution-data --http.api=eth,erigon,engine,web3,net,debug,trace,txpool,admin \ --http.vhosts=* --ws --http --http.addr=0.0.0.0 --http.corsdomain=* --http.port=8545 \ --metrics --metrics.addr=0.0.0.0 --metrics.port=9001 --db.size.limit=3000MB + export ETH_RPC_URL="http://localhost:8545" - # TODO - Decide the number of blocks we want to test with + # TODO - Decide the number of blocks we want to test with. Currently choosing any 1 random blocks - name: Regression test with zero tracer in real mode run: | - export ETH_RPC_URL="http://localhost:8545" rm -rf proofs/* circuits/* ./proofs.json test.out verify.out leader.out - random_numbers=($(shuf -i 1-500 -n 10)) + random_numbers=($(shuf -i 1-500 -n 1)) for number in "${random_numbers[@]}"; do - echo $number hex_number="0x$(echo "obase=16; $number" | bc)" - echo $hex_number - OUTPUT_TO_TERMINAL=true RUN_VERIFICATION=true ./scripts/prove_rpc.sh $hex_number $hex_number $ETH_RPC_URL jerigon true 3000 100 + OUTPUT_TO_TERMINAL=true RUN_VERIFICATION=true ./scripts/prove_rpc.sh $hex_number $hex_number $ETH_RPC_URL jerigon true 3000 100 test_only done - # - name: Download previous results for becnhmarking - # uses: dawidd6/action-download-artifact@v6 - # with: - # workflow: cron_jerigon_zero_testing.yml - # workflow_conclusion: success - # name: jerigon_zero_testing_benchmark - # path: ./ - # if_no_artifact_found: ignore + - name: Download previous results + uses: dawidd6/action-download-artifact@v6 + with: + workflow: cron_jerigon_zero_testing.yml + workflow_conclusion: success + name: jerigon_zero_benchmark + path: ./ + if_no_artifact_found: ignore + + # TODO - Put more stats in the output file + - name: Run the benchmark script + run: | + ./scripts/jerigon_zero_benchmark.sh - # - name: Benchmarking test with zero tracer in real mode - # run: | - # export ETH_RPC_URL="http://localhost:8545" - # rm -rf proofs/* circuits/* ./proofs.json test.out verify.out leader.out - # echo "Running the benchmarking script..." - # ./scripts/jerigon_zero_benchmark.sh | tee benchmark_output.log + - name: Upload new results + uses: actions/upload-artifact@v4 + with: + name: jerigon_zero_benchmark + path: | + ./jerigon_zero_output.log + retention-days: 90 + overwrite: true diff --git a/scripts/jerigon_zero_benchmark.sh b/scripts/jerigon_zero_benchmark.sh old mode 100644 new mode 100755 index ba00a9abe..abcbe8f17 --- a/scripts/jerigon_zero_benchmark.sh +++ b/scripts/jerigon_zero_benchmark.sh @@ -2,56 +2,88 @@ # ------------------------------------------------------------------------------ set -exo pipefail -# We're going to set the parallelism in line with the total cpu count +# Args: +# 1 --> Output file (Not used in the current script) + +# Get the number of processors for parallelism if [[ "$OSTYPE" == "darwin"* ]]; then num_procs=$(sysctl -n hw.physicalcpu) else num_procs=$(nproc) fi -# Force the working directory to always be the `tools/` directory. +# Force the working directory to always be the repository root. REPO_ROOT=$(git rev-parse --show-toplevel) PROOF_OUTPUT_DIR="${REPO_ROOT}/proofs" - BLOCK_BATCH_SIZE="${BLOCK_BATCH_SIZE:-8}" -echo "Block batch size: $BLOCK_BATCH_SIZE" -OUTPUT_LOG="${REPO_ROOT}/output.log" +# Logging setup +OUTPUT_LOG="jerigon_zero_output.log" +BLOCK_OUTPUT_LOG="jerigon_zero_block_output.log" PROOFS_FILE_LIST="${PROOF_OUTPUT_DIR}/proof_files.json" -TEST_OUT_PATH="${REPO_ROOT}/test.out" -# Configured Rayon and Tokio with rough defaults +# Ensure necessary directories exist +mkdir -p "$PROOF_OUTPUT_DIR" + +# Set environment variables for parallelism and logging export RAYON_NUM_THREADS=$num_procs export TOKIO_WORKER_THREADS=$num_procs - export RUST_MIN_STACK=33554432 export RUST_BACKTRACE=full export RUST_LOG=info -INPUT_FILE=$1 +# Log the current date and time +echo "$(date +"%Y-%m-%d %H:%M:%S")" &>> "$OUTPUT_LOG" -if [[ $INPUT_FILE == "" ]]; then - echo "Please provide witness json input file, e.g. artifacts/witness_b19240705.json" - exit 1 -fi +# Define the blocks to process +blocks=(1 2) -start_time=$(date +%s%N) -perf stat -e cycles "${REPO_ROOT}/target/release/leader" --runtime in-memory --load-strategy monolithic --block-batch-size $BLOCK_BATCH_SIZE \ - --proof-output-dir $PROOF_OUTPUT_DIR stdio < $INPUT_FILE &> $OUTPUT_LOG -end_time=$(date +%s%N) - -set +o pipefail -cat $OUTPUT_LOG | grep "Successfully wrote to disk proof file " | awk '{print $NF}' | tee $PROOFS_FILE_LIST -if [ ! -s "$PROOFS_FILE_LIST" ]; then - # Some error occurred, display the logs and exit. - cat $OUTPUT_LOG - echo "Proof list not generated, some error happened. For more details check the log file $OUTPUT_LOG" - exit 1 -fi +# Function to process each block +process_block() { + local block=$1 + + echo "Processing block: $block" &>> "$OUTPUT_LOG" + + # Fetch block data + if ! ./target/release/rpc --rpc-url "$ETH_RPC_URL" fetch --start-block "$block" --end-block "$block" > "output_${block}.json"; then + echo "Failed to fetch block data for block: $block" &>> "$OUTPUT_LOG" + exit 1 + fi + + local start_time=$(date +%s%N) + + # Run performance stats + if ! perf stat -e cycles ./target/release/leader --test-only --runtime in-memory --load-strategy monolithic --block-batch-size "$BLOCK_BATCH_SIZE" --proof-output-dir "$PROOF_OUTPUT_DIR" stdio < "output_${block}.json" &> "$BLOCK_OUTPUT_LOG"; then + echo "Performance command failed for block: $block" &>> "$OUTPUT_LOG" + cat "$BLOCK_OUTPUT_LOG" &>> "$OUTPUT_LOG" + exit 1 + fi + + local end_time=$(date +%s%N) + + set +o pipefail + if ! cat "$BLOCK_OUTPUT_LOG" | grep "Successfully wrote to disk proof file " | awk '{print $NF}' | tee "$PROOFS_FILE_LIST"; then + echo "Proof list not generated for block: $block. Check the log for details." &>> "$OUTPUT_LOG" + cat "$BLOCK_OUTPUT_LOG" &>> "$OUTPUT_LOG" + exit 1 + fi + + local duration_sec=$(echo "scale=3; ($end_time - $start_time) / 1000000000" | bc -l) + + # Extract performance timings + local PERF_TIME=$(grep "seconds time elapsed" "$BLOCK_OUTPUT_LOG" | tail -1 | awk '{ print ($1)}') + local PERF_USER_TIME=$(grep "seconds user" "$BLOCK_OUTPUT_LOG" | tail -1 | awk '{ print ($1)}') + local PERF_SYS_TIME=$(grep "seconds sys" "$BLOCK_OUTPUT_LOG" | tail -1 | awk '{ print ($1)}') + + echo "Success for block: $block!" + echo "Proving duration for block $block: $duration_sec seconds, performance time: $PERF_TIME, performance user time: $PERF_USER_TIME, performance system time: $PERF_SYS_TIME" &>> "$OUTPUT_LOG" +} -duration_ns=$((end_time - start_time)) -duration_sec=$(echo "$duration_ns / 1000000000" | bc -l) +# Process each block +for block in "${blocks[@]}"; do + process_block "$block" +done -echo "Success!" -echo "Proving duration:" $duration_sec " seconds" -echo "Note, this duration is inclusive of circuit handling and overall process initialization"; \ No newline at end of file +# Finalize logging +echo "Processing completed at: $(date +"%Y-%m-%d %H:%M:%S")" &>> "$OUTPUT_LOG" +echo "" &>> "$OUTPUT_LOG" diff --git a/test-data/erigon-data.tar.gz b/test_data/erigon-data.tar.gz similarity index 100% rename from test-data/erigon-data.tar.gz rename to test_data/erigon-data.tar.gz