diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 1b6e7cda0..c9d205851 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -70,6 +70,8 @@ jobs: with: tool: 'customBiggerIsBetter' output-file-path: results/output.json + # This is default value, just make it more explicit + benchmark-data-dir-path: dev/bench alert-threshold: "200%" fail-on-alert: true # GitHub API token to make a commit comment @@ -78,6 +80,20 @@ jobs: auto-push: ${{ inputs.publish }} comment-on-alert: true max-items-in-chart: 20 + - name: Check resource utilization + uses: benchmark-action/github-action-benchmark@v1 + with: + tool: 'customSmallerIsBetter' + output-file-path: results/peak_mem_usage.json + benchmark-data-dir-path: dev/bench/peak_mem_usage + alert-threshold: "200%" + fail-on-alert: false + # GitHub API token to make a commit comment + github-token: ${{ secrets.GITHUB_TOKEN }} + # Store the results and deploy GitHub pages automatically if the results are from main branch + auto-push: ${{ inputs.publish }} + comment-on-alert: true + max-items-in-chart: 20 latency-bench: name: Benchmark (Latency) @@ -188,3 +204,17 @@ jobs: auto-push: ${{ inputs.publish }} comment-on-alert: true max-items-in-chart: 20 + - name: Check resource utilization + uses: benchmark-action/github-action-benchmark@v1 + with: + tool: 'customSmallerIsBetter' + output-file-path: results/peak_mem_usage.json + benchmark-data-dir-path: dev/cache_bench/peak_mem_usage + alert-threshold: "200%" + fail-on-alert: false + # GitHub API token to make a commit comment + github-token: ${{ secrets.GITHUB_TOKEN }} + # Store the results and deploy GitHub pages automatically if the results are from main branch + auto-push: ${{ inputs.publish }} + comment-on-alert: true + max-items-in-chart: 20 diff --git a/.github/workflows/bench_s3express.yml b/.github/workflows/bench_s3express.yml index 1a019aefa..dd83d6e6f 100644 --- a/.github/workflows/bench_s3express.yml +++ b/.github/workflows/bench_s3express.yml @@ -79,6 +79,20 @@ jobs: auto-push: ${{ inputs.publish }} comment-on-alert: true max-items-in-chart: 20 + - name: Check resource utilization + uses: benchmark-action/github-action-benchmark@v1 + with: + tool: 'customSmallerIsBetter' + output-file-path: results/peak_mem_usage.json + benchmark-data-dir-path: dev/s3-express/bench/peak_mem_usage + alert-threshold: "200%" + fail-on-alert: false + # GitHub API token to make a commit comment + github-token: ${{ secrets.GITHUB_TOKEN }} + # Store the results and deploy GitHub pages automatically if the results are from main branch + auto-push: ${{ inputs.publish }} + comment-on-alert: true + max-items-in-chart: 20 latency-bench: name: Benchmark (Latency) diff --git a/mountpoint-s3/Cargo.toml b/mountpoint-s3/Cargo.toml index 6b3b7e942..7f8736eab 100644 --- a/mountpoint-s3/Cargo.toml +++ b/mountpoint-s3/Cargo.toml @@ -96,3 +96,7 @@ path = "src/main.rs" name = "mock-mount-s3" path = "src/bin/mock-mount-s3.rs" required-features = ["mountpoint-s3-client/mock"] + +[[bin]] +name = "mount-s3-log-analyzer" +path = "src/bin/mount-s3-log-analyzer.rs" diff --git a/mountpoint-s3/scripts/fs_bench.sh b/mountpoint-s3/scripts/fs_bench.sh index b347c6ea4..a5424cb96 100755 --- a/mountpoint-s3/scripts/fs_bench.sh +++ b/mountpoint-s3/scripts/fs_bench.sh @@ -162,6 +162,9 @@ read_benchmark () { # run the benchmark run_fio_job $job_file $bench_file $mount_dir $log_dir + # collect resource utilization metrics (peak memory usage) + cargo run --bin mount-s3-log-analyzer ${log_dir} ${results_dir}/${job_name}_peak_mem.json ${job_name} + cleanup done @@ -220,6 +223,9 @@ write_benchmark () { # run the benchmark run_fio_job $job_file $bench_file $mount_dir $log_dir + # collect resource utilization metrics (peak memory usage) + cargo run --bin mount-s3-log-analyzer ${log_dir} ${results_dir}/${job_name}_peak_mem.json ${job_name} + cleanup done @@ -229,4 +235,7 @@ read_benchmark write_benchmark # combine all bench results into one json file +echo "Throughput:" jq -n '[inputs]' ${results_dir}/*_parsed.json | tee ${results_dir}/output.json +echo "Peak memory usage:" +jq -n '[inputs]' ${results_dir}/*_peak_mem.json | tee ${results_dir}/peak_mem_usage.json diff --git a/mountpoint-s3/scripts/fs_cache_bench.sh b/mountpoint-s3/scripts/fs_cache_bench.sh index 398d4ba41..9e87dfe51 100755 --- a/mountpoint-s3/scripts/fs_cache_bench.sh +++ b/mountpoint-s3/scripts/fs_cache_bench.sh @@ -196,6 +196,9 @@ cache_benchmark () { # run the benchmark run_fio_job $job_file $bench_file $mount_dir $log_dir + # collect resource utilization metrics (peak memory usage) + cargo run --bin mount-s3-log-analyzer ${log_dir} ${results_dir}/${job_name}_peak_mem.json ${job_name} + cleanup done } @@ -203,4 +206,7 @@ cache_benchmark () { cache_benchmark # combine all bench results into one json file +echo "Throughput:" jq -n '[inputs]' ${results_dir}/*_parsed.json | tee ${results_dir}/output.json +echo "Peak memory usage:" +jq -n '[inputs]' ${results_dir}/*_peak_mem.json | tee ${results_dir}/peak_mem_usage.json diff --git a/mountpoint-s3/src/bin/mount-s3-log-analyzer.rs b/mountpoint-s3/src/bin/mount-s3-log-analyzer.rs new file mode 100644 index 000000000..ff669f624 --- /dev/null +++ b/mountpoint-s3/src/bin/mount-s3-log-analyzer.rs @@ -0,0 +1,79 @@ +//! A helper binary for parsing Mountpoint logs and collecting metrics. +//! Currently, we are only interested in peak memory usage from `process.memory_usage`. +//! +//! This binary is intended only for use in testing and development of Mountpoint. + +use std::{ + fs::{self, File}, + io::{BufRead, BufReader, BufWriter, Write}, + path::PathBuf, +}; + +use anyhow::anyhow; +use clap::Parser; +use regex::Regex; +use serde_json::json; + +#[derive(Parser, Debug)] +struct CliArgs { + #[clap(help = "Log directory to analyze", value_name = "LOG_DIRECTORY")] + log_dir: PathBuf, + + #[clap(help = "Output JSON file name", value_name = "OUTPUT_FILE")] + out_file: PathBuf, + + #[clap(help = "Test name to be reported in JSON file")] + test_name: String, +} + +fn main() -> anyhow::Result<()> { + const MEM_USAGE_LOG_PATTERN: &str = "process\\.memory_usage:\\s\\d+$"; + + let args = CliArgs::parse(); + let paths = fs::read_dir(args.log_dir)?; + let log_pattern = Regex::new(MEM_USAGE_LOG_PATTERN)?; + + let mut metric_values: Vec = Vec::new(); + + // collect metrics from all log files in the given directory + for path in paths { + let path = path?; + let file_type = path.file_type()?; + if file_type.is_file() { + let file = File::open(path.path())?; + let reader = BufReader::new(file); + + for line in reader.lines() { + if line.is_err() { + continue; + } + let line = line.unwrap(); + if log_pattern.is_match(&line) { + let iter = line.split_whitespace(); + if let Some(parsed_result) = iter.last().map(|last| last.parse::()) { + let Ok(value) = parsed_result else { + return Err(anyhow!("Unable to parse metric value: {}", parsed_result.unwrap_err())); + }; + metric_values.push(value); + } + } + } + } + } + + let max = if let Some(value) = metric_values.iter().max() { + *value as f64 / (1024 * 1024) as f64 + } else { + 0.0 + }; + let contents = json!({ + "name": args.test_name, + "value": max, + "unit": "MiB", + }); + let file = File::create(args.out_file)?; + let mut writer = BufWriter::new(file); + serde_json::to_writer(&mut writer, &contents)?; + writer.flush()?; + Ok(()) +}