From 86b62599957f906d55f870256f55df484ee68262 Mon Sep 17 00:00:00 2001
From: Monthon Klongklaew <monthonk@amazon.com>
Date: Mon, 23 Sep 2024 15:49:36 +0000
Subject: [PATCH 1/2] Check resource utilization

Signed-off-by: Monthon Klongklaew <monthonk@amazon.com>
---
 .github/workflows/bench.yml                   | 30 ++++++++
 .github/workflows/bench_s3express.yml         | 14 ++++
 mountpoint-s3/Cargo.toml                      |  4 +
 mountpoint-s3/scripts/fs_bench.sh             |  9 +++
 mountpoint-s3/scripts/fs_cache_bench.sh       |  6 ++
 .../src/bin/mount-s3-log-analyzer.rs          | 77 +++++++++++++++++++
 6 files changed, 140 insertions(+)
 create mode 100644 mountpoint-s3/src/bin/mount-s3-log-analyzer.rs

diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index 1b6e7cda0..c9d205851 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -70,6 +70,8 @@ jobs:
       with:
         tool: 'customBiggerIsBetter'
         output-file-path: results/output.json
+        # This is default value, just make it more explicit
+        benchmark-data-dir-path: dev/bench
         alert-threshold: "200%"
         fail-on-alert: true
         # GitHub API token to make a commit comment
@@ -78,6 +80,20 @@ jobs:
         auto-push: ${{ inputs.publish }}
         comment-on-alert: true
         max-items-in-chart: 20
+    - name: Check resource utilization
+      uses: benchmark-action/github-action-benchmark@v1
+      with:
+        tool: 'customSmallerIsBetter'
+        output-file-path: results/peak_mem_usage.json
+        benchmark-data-dir-path: dev/bench/peak_mem_usage
+        alert-threshold: "200%"
+        fail-on-alert: false
+        # GitHub API token to make a commit comment
+        github-token: ${{ secrets.GITHUB_TOKEN }}
+        # Store the results and deploy GitHub pages automatically if the results are from main branch
+        auto-push: ${{ inputs.publish }}
+        comment-on-alert: true
+        max-items-in-chart: 20
 
   latency-bench:
     name: Benchmark (Latency)
@@ -188,3 +204,17 @@ jobs:
         auto-push: ${{ inputs.publish }}
         comment-on-alert: true
         max-items-in-chart: 20
+    - name: Check resource utilization
+      uses: benchmark-action/github-action-benchmark@v1
+      with:
+        tool: 'customSmallerIsBetter'
+        output-file-path: results/peak_mem_usage.json
+        benchmark-data-dir-path: dev/cache_bench/peak_mem_usage
+        alert-threshold: "200%"
+        fail-on-alert: false
+        # GitHub API token to make a commit comment
+        github-token: ${{ secrets.GITHUB_TOKEN }}
+        # Store the results and deploy GitHub pages automatically if the results are from main branch
+        auto-push: ${{ inputs.publish }}
+        comment-on-alert: true
+        max-items-in-chart: 20
diff --git a/.github/workflows/bench_s3express.yml b/.github/workflows/bench_s3express.yml
index 1a019aefa..dd83d6e6f 100644
--- a/.github/workflows/bench_s3express.yml
+++ b/.github/workflows/bench_s3express.yml
@@ -79,6 +79,20 @@ jobs:
         auto-push: ${{ inputs.publish }}
         comment-on-alert: true
         max-items-in-chart: 20
+    - name: Check resource utilization
+      uses: benchmark-action/github-action-benchmark@v1
+      with:
+        tool: 'customSmallerIsBetter'
+        output-file-path: results/peak_mem_usage.json
+        benchmark-data-dir-path: dev/s3-express/bench/peak_mem_usage
+        alert-threshold: "200%"
+        fail-on-alert: false
+        # GitHub API token to make a commit comment
+        github-token: ${{ secrets.GITHUB_TOKEN }}
+        # Store the results and deploy GitHub pages automatically if the results are from main branch
+        auto-push: ${{ inputs.publish }}
+        comment-on-alert: true
+        max-items-in-chart: 20
 
   latency-bench:
     name: Benchmark (Latency)
diff --git a/mountpoint-s3/Cargo.toml b/mountpoint-s3/Cargo.toml
index 6b3b7e942..7f8736eab 100644
--- a/mountpoint-s3/Cargo.toml
+++ b/mountpoint-s3/Cargo.toml
@@ -96,3 +96,7 @@ path = "src/main.rs"
 name = "mock-mount-s3"
 path = "src/bin/mock-mount-s3.rs"
 required-features = ["mountpoint-s3-client/mock"]
+
+[[bin]]
+name = "mount-s3-log-analyzer"
+path = "src/bin/mount-s3-log-analyzer.rs"
diff --git a/mountpoint-s3/scripts/fs_bench.sh b/mountpoint-s3/scripts/fs_bench.sh
index b347c6ea4..a5424cb96 100755
--- a/mountpoint-s3/scripts/fs_bench.sh
+++ b/mountpoint-s3/scripts/fs_bench.sh
@@ -162,6 +162,9 @@ read_benchmark () {
     # run the benchmark
     run_fio_job $job_file $bench_file $mount_dir $log_dir
 
+    # collect resource utilization metrics (peak memory usage)
+    cargo run --bin mount-s3-log-analyzer ${log_dir} ${results_dir}/${job_name}_peak_mem.json ${job_name}
+
     cleanup
 
   done
@@ -220,6 +223,9 @@ write_benchmark () {
     # run the benchmark
     run_fio_job $job_file $bench_file $mount_dir $log_dir
 
+    # collect resource utilization metrics (peak memory usage)
+    cargo run --bin mount-s3-log-analyzer ${log_dir} ${results_dir}/${job_name}_peak_mem.json ${job_name}
+
     cleanup
 
   done
@@ -229,4 +235,7 @@ read_benchmark
 write_benchmark
 
 # combine all bench results into one json file
+echo "Throughput:"
 jq -n '[inputs]' ${results_dir}/*_parsed.json | tee ${results_dir}/output.json
+echo "Peak memory usage:"
+jq -n '[inputs]' ${results_dir}/*_peak_mem.json | tee ${results_dir}/peak_mem_usage.json
diff --git a/mountpoint-s3/scripts/fs_cache_bench.sh b/mountpoint-s3/scripts/fs_cache_bench.sh
index 398d4ba41..9e87dfe51 100755
--- a/mountpoint-s3/scripts/fs_cache_bench.sh
+++ b/mountpoint-s3/scripts/fs_cache_bench.sh
@@ -196,6 +196,9 @@ cache_benchmark () {
     # run the benchmark
     run_fio_job $job_file $bench_file $mount_dir $log_dir
 
+    # collect resource utilization metrics (peak memory usage)
+    cargo run --bin mount-s3-log-analyzer ${log_dir} ${results_dir}/${job_name}_peak_mem.json ${job_name}
+
     cleanup
   done
 }
@@ -203,4 +206,7 @@ cache_benchmark () {
 cache_benchmark
 
 # combine all bench results into one json file
+echo "Throughput:"
 jq -n '[inputs]' ${results_dir}/*_parsed.json | tee ${results_dir}/output.json
+echo "Peak memory usage:"
+jq -n '[inputs]' ${results_dir}/*_peak_mem.json | tee ${results_dir}/peak_mem_usage.json
diff --git a/mountpoint-s3/src/bin/mount-s3-log-analyzer.rs b/mountpoint-s3/src/bin/mount-s3-log-analyzer.rs
new file mode 100644
index 000000000..0ba2cac3f
--- /dev/null
+++ b/mountpoint-s3/src/bin/mount-s3-log-analyzer.rs
@@ -0,0 +1,77 @@
+//! A helper binary for parsing Mountpoint logs and collecting metrics.
+//! Currently, we are only interested in peak memory usage from `process.memory_usage`.
+//!
+//! This binary is intended only for use in testing and development of Mountpoint.
+
+use std::{
+    fs::{self, File},
+    io::{BufRead, BufReader, BufWriter, Write},
+    path::PathBuf,
+};
+
+use clap::Parser;
+use serde_json::json;
+
+#[derive(Parser, Debug)]
+struct CliArgs {
+    #[clap(help = "Log directory to analyze", value_name = "LOG_DIRECTORY")]
+    log_dir: PathBuf,
+
+    #[clap(help = "Output JSON file name", value_name = "OUTPUT_FILE")]
+    out_file: PathBuf,
+
+    #[clap(help = "Test name to be reported in JSON file")]
+    test_name: String,
+
+    #[clap(
+        help = "Log filter string [default: process.memory_usage]",
+        default_value = "process.memory_usage"
+    )]
+    log_filter_str: String,
+}
+
+fn main() -> anyhow::Result<()> {
+    let args = CliArgs::parse();
+    let paths = fs::read_dir(args.log_dir)?;
+
+    let mut metric_values: Vec<u64> = Vec::new();
+
+    // collect metrics from all log files in the given directory
+    for path in paths {
+        let path = path?;
+        let file_type = path.file_type()?;
+        if file_type.is_file() {
+            let file = File::open(path.path())?;
+            let reader = BufReader::new(file);
+
+            for line in reader.lines() {
+                if line.is_err() {
+                    continue;
+                }
+                let line = line.unwrap();
+                if line.contains(&args.log_filter_str) {
+                    let iter = line.split_whitespace();
+                    if let Some(Ok(value)) = iter.last().map(|last| last.parse::<u64>()) {
+                        metric_values.push(value);
+                    }
+                }
+            }
+        }
+    }
+
+    let max = if let Some(value) = metric_values.iter().max() {
+        *value as f64 / (1024 * 1024) as f64
+    } else {
+        0.0
+    };
+    let contents = json!({
+        "name": args.test_name,
+        "value": max,
+        "unit": "MiB",
+    });
+    let file = File::create(args.out_file)?;
+    let mut writer = BufWriter::new(file);
+    serde_json::to_writer(&mut writer, &contents)?;
+    writer.flush()?;
+    Ok(())
+}

From b3746db29e3e8282e4617fae3e3570d8ce51fc74 Mon Sep 17 00:00:00 2001
From: Monthon Klongklaew <monthonk@amazon.com>
Date: Mon, 23 Sep 2024 21:03:01 +0000
Subject: [PATCH 2/2] PR comments

Signed-off-by: Monthon Klongklaew <monthonk@amazon.com>
---
 mountpoint-s3/src/bin/mount-s3-log-analyzer.rs | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/mountpoint-s3/src/bin/mount-s3-log-analyzer.rs b/mountpoint-s3/src/bin/mount-s3-log-analyzer.rs
index 0ba2cac3f..ff669f624 100644
--- a/mountpoint-s3/src/bin/mount-s3-log-analyzer.rs
+++ b/mountpoint-s3/src/bin/mount-s3-log-analyzer.rs
@@ -9,7 +9,9 @@ use std::{
     path::PathBuf,
 };
 
+use anyhow::anyhow;
 use clap::Parser;
+use regex::Regex;
 use serde_json::json;
 
 #[derive(Parser, Debug)]
@@ -22,17 +24,14 @@ struct CliArgs {
 
     #[clap(help = "Test name to be reported in JSON file")]
     test_name: String,
-
-    #[clap(
-        help = "Log filter string [default: process.memory_usage]",
-        default_value = "process.memory_usage"
-    )]
-    log_filter_str: String,
 }
 
 fn main() -> anyhow::Result<()> {
+    const MEM_USAGE_LOG_PATTERN: &str = "process\\.memory_usage:\\s\\d+$";
+
     let args = CliArgs::parse();
     let paths = fs::read_dir(args.log_dir)?;
+    let log_pattern = Regex::new(MEM_USAGE_LOG_PATTERN)?;
 
     let mut metric_values: Vec<u64> = Vec::new();
 
@@ -49,9 +48,12 @@ fn main() -> anyhow::Result<()> {
                     continue;
                 }
                 let line = line.unwrap();
-                if line.contains(&args.log_filter_str) {
+                if log_pattern.is_match(&line) {
                     let iter = line.split_whitespace();
-                    if let Some(Ok(value)) = iter.last().map(|last| last.parse::<u64>()) {
+                    if let Some(parsed_result) = iter.last().map(|last| last.parse::<u64>()) {
+                        let Ok(value) = parsed_result else {
+                            return Err(anyhow!("Unable to parse metric value: {}", parsed_result.unwrap_err()));
+                        };
                         metric_values.push(value);
                     }
                 }