awslabs · monthonk · Sep 24, 2024 · Sep 23, 2024 · Sep 23, 2024
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -70,6 +70,8 @@ jobs:
       with:
         tool: 'customBiggerIsBetter'
         output-file-path: results/output.json
+        # This is default value, just make it more explicit
+        benchmark-data-dir-path: dev/bench
         alert-threshold: "200%"
         fail-on-alert: true
         # GitHub API token to make a commit comment
@@ -78,6 +80,20 @@ jobs:
         auto-push: ${{ inputs.publish }}
         comment-on-alert: true
         max-items-in-chart: 20
+    - name: Check resource utilization
+      uses: benchmark-action/github-action-benchmark@v1
+      with:
+        tool: 'customSmallerIsBetter'
+        output-file-path: results/peak_mem_usage.json
+        benchmark-data-dir-path: dev/bench/peak_mem_usage
+        alert-threshold: "200%"
+        fail-on-alert: false
+        # GitHub API token to make a commit comment
+        github-token: ${{ secrets.GITHUB_TOKEN }}
+        # Store the results and deploy GitHub pages automatically if the results are from main branch
+        auto-push: ${{ inputs.publish }}
+        comment-on-alert: true
+        max-items-in-chart: 20
 
   latency-bench:
     name: Benchmark (Latency)
@@ -188,3 +204,17 @@ jobs:
         auto-push: ${{ inputs.publish }}
         comment-on-alert: true
         max-items-in-chart: 20
+    - name: Check resource utilization
+      uses: benchmark-action/github-action-benchmark@v1
+      with:
+        tool: 'customSmallerIsBetter'
+        output-file-path: results/peak_mem_usage.json
+        benchmark-data-dir-path: dev/cache_bench/peak_mem_usage
+        alert-threshold: "200%"
+        fail-on-alert: false
+        # GitHub API token to make a commit comment
+        github-token: ${{ secrets.GITHUB_TOKEN }}
+        # Store the results and deploy GitHub pages automatically if the results are from main branch
+        auto-push: ${{ inputs.publish }}
+        comment-on-alert: true
+        max-items-in-chart: 20
diff --git a/.github/workflows/bench_s3express.yml b/.github/workflows/bench_s3express.yml
@@ -79,6 +79,20 @@ jobs:
         auto-push: ${{ inputs.publish }}
         comment-on-alert: true
         max-items-in-chart: 20
+    - name: Check resource utilization
+      uses: benchmark-action/github-action-benchmark@v1
+      with:
+        tool: 'customSmallerIsBetter'
+        output-file-path: results/peak_mem_usage.json
+        benchmark-data-dir-path: dev/s3-express/bench/peak_mem_usage
+        alert-threshold: "200%"
+        fail-on-alert: false
+        # GitHub API token to make a commit comment
+        github-token: ${{ secrets.GITHUB_TOKEN }}
+        # Store the results and deploy GitHub pages automatically if the results are from main branch
+        auto-push: ${{ inputs.publish }}
+        comment-on-alert: true
+        max-items-in-chart: 20
 
   latency-bench:
     name: Benchmark (Latency)

diff --git a/mountpoint-s3/Cargo.toml b/mountpoint-s3/Cargo.toml
@@ -96,3 +96,7 @@ path = "src/main.rs"
 name = "mock-mount-s3"
 path = "src/bin/mock-mount-s3.rs"
 required-features = ["mountpoint-s3-client/mock"]
+
+[[bin]]
+name = "mount-s3-log-analyzer"
+path = "src/bin/mount-s3-log-analyzer.rs"
diff --git a/mountpoint-s3/scripts/fs_bench.sh b/mountpoint-s3/scripts/fs_bench.sh
@@ -162,6 +162,9 @@ read_benchmark () {
     # run the benchmark
     run_fio_job $job_file $bench_file $mount_dir $log_dir
 
+    # collect resource utilization metrics (peak memory usage)
+    cargo run --bin mount-s3-log-analyzer ${log_dir} ${results_dir}/${job_name}_peak_mem.json ${job_name}
+
     cleanup
 
   done
@@ -220,6 +223,9 @@ write_benchmark () {
     # run the benchmark
     run_fio_job $job_file $bench_file $mount_dir $log_dir
 
+    # collect resource utilization metrics (peak memory usage)
+    cargo run --bin mount-s3-log-analyzer ${log_dir} ${results_dir}/${job_name}_peak_mem.json ${job_name}
+
     cleanup
 
   done
@@ -229,4 +235,7 @@ read_benchmark
 write_benchmark
 
 # combine all bench results into one json file
+echo "Throughput:"
 jq -n '[inputs]' ${results_dir}/*_parsed.json | tee ${results_dir}/output.json
+echo "Peak memory usage:"
+jq -n '[inputs]' ${results_dir}/*_peak_mem.json | tee ${results_dir}/peak_mem_usage.json
diff --git a/mountpoint-s3/scripts/fs_cache_bench.sh b/mountpoint-s3/scripts/fs_cache_bench.sh
@@ -196,11 +196,17 @@ cache_benchmark () {
     # run the benchmark
     run_fio_job $job_file $bench_file $mount_dir $log_dir
 
+    # collect resource utilization metrics (peak memory usage)
+    cargo run --bin mount-s3-log-analyzer ${log_dir} ${results_dir}/${job_name}_peak_mem.json ${job_name}
+
     cleanup
   done
 }
 
 cache_benchmark
 
 # combine all bench results into one json file
+echo "Throughput:"
 jq -n '[inputs]' ${results_dir}/*_parsed.json | tee ${results_dir}/output.json
+echo "Peak memory usage:"
+jq -n '[inputs]' ${results_dir}/*_peak_mem.json | tee ${results_dir}/peak_mem_usage.json
diff --git a/mountpoint-s3/src/bin/mount-s3-log-analyzer.rs b/mountpoint-s3/src/bin/mount-s3-log-analyzer.rs
@@ -0,0 +1,79 @@
+//! A helper binary for parsing Mountpoint logs and collecting metrics.
+//! Currently, we are only interested in peak memory usage from `process.memory_usage`.
+//!
+//! This binary is intended only for use in testing and development of Mountpoint.
+
+use std::{
+    fs::{self, File},
+    io::{BufRead, BufReader, BufWriter, Write},
+    path::PathBuf,
+};
+
+use anyhow::anyhow;
+use clap::Parser;
+use regex::Regex;
+use serde_json::json;
+
+#[derive(Parser, Debug)]
+struct CliArgs {
+    #[clap(help = "Log directory to analyze", value_name = "LOG_DIRECTORY")]
+    log_dir: PathBuf,
+
+    #[clap(help = "Output JSON file name", value_name = "OUTPUT_FILE")]
+    out_file: PathBuf,
+
+    #[clap(help = "Test name to be reported in JSON file")]
+    test_name: String,
+}
+
+fn main() -> anyhow::Result<()> {
+    const MEM_USAGE_LOG_PATTERN: &str = "process\\.memory_usage:\\s\\d+$";
+
+    let args = CliArgs::parse();
+    let paths = fs::read_dir(args.log_dir)?;
+    let log_pattern = Regex::new(MEM_USAGE_LOG_PATTERN)?;
+
+    let mut metric_values: Vec<u64> = Vec::new();
+
+    // collect metrics from all log files in the given directory
+    for path in paths {
+        let path = path?;
+        let file_type = path.file_type()?;
+        if file_type.is_file() {
+            let file = File::open(path.path())?;
+            let reader = BufReader::new(file);
+
+            for line in reader.lines() {
+                if line.is_err() {
+                    continue;
+                }
+                let line = line.unwrap();
+                if log_pattern.is_match(&line) {
+                    let iter = line.split_whitespace();
+                    if let Some(parsed_result) = iter.last().map(|last| last.parse::<u64>()) {
+                        let Ok(value) = parsed_result else {
+                            return Err(anyhow!("Unable to parse metric value: {}", parsed_result.unwrap_err()));
+                        };
+                        metric_values.push(value);
+                    }
+                }
+            }
+        }
+    }
+
+    let max = if let Some(value) = metric_values.iter().max() {
+        *value as f64 / (1024 * 1024) as f64
+    } else {
+        0.0
+    };
+    let contents = json!({
+        "name": args.test_name,
+        "value": max,
+        "unit": "MiB",
+    });
+    let file = File::create(args.out_file)?;
+    let mut writer = BufWriter::new(file);
+    serde_json::to_writer(&mut writer, &contents)?;
+    writer.flush()?;
+    Ok(())
+}