From b9dfbce618ec356e11e3f9e2d3fbe70f2ceadf93 Mon Sep 17 00:00:00 2001
From: Saveliy Yusufov <sy2685@columbia.edu>
Date: Thu, 8 Feb 2024 14:02:02 -0500
Subject: [PATCH] Automate findings of latest benchmark results

---
 benches/benchmark.sh       |  0
 benches/benchmark_plots.py |  9 ++++++---
 benches/py_benchmarks.py   |  9 +++++++--
 benches/utils.py           | 33 +++++++++++++++++++++++++++++++++
 4 files changed, 46 insertions(+), 5 deletions(-)
 mode change 100644 => 100755 benches/benchmark.sh

diff --git a/benches/benchmark.sh b/benches/benchmark.sh
old mode 100644
new mode 100755
diff --git a/benches/benchmark_plots.py b/benches/benchmark_plots.py
index 4876467..15ce515 100644
--- a/benches/benchmark_plots.py
+++ b/benches/benchmark_plots.py
@@ -6,7 +6,7 @@
 import numpy as np
 import pandas as pd
 
-from utils import bytes2human
+from utils import bytes2human, find_directory
 
 plt.style.use("fivethirtyeight")
 
@@ -73,11 +73,14 @@ def plot(data: dict[str, list], n_range: range) -> None:
 def main():
     lib_names = ("rustfft", "phastft", "fftw3")
     n_range = range(12, 30)
-
     all_data = {}
 
     for lib in lib_names:
-        data = build_and_clean_data("benchmark-data.2024.02.02.19-10-51", n_range, lib)
+        root_folder = find_directory()
+        if root_folder is None:
+            raise FileNotFoundError("unable to find the benchmark data directory")
+
+        data = build_and_clean_data(root_folder, n_range, lib)
         all_data[lib] = data
 
     assert (
diff --git a/benches/py_benchmarks.py b/benches/py_benchmarks.py
index dd4e52b..8ebb5b1 100644
--- a/benches/py_benchmarks.py
+++ b/benches/py_benchmarks.py
@@ -89,7 +89,12 @@ def main() -> None:
 
 
 def read_csv_to_dict(file_path: str) -> dict:
-    data = {"n": [], "phastft_time": [], "numpy_fft_time": [], "pyfftw_fft_time": []}
+    data: dict[str, list] = {
+        "n": [],
+        "phastft_time": [],
+        "numpy_fft_time": [],
+        "pyfftw_fft_time": [],
+    }
     with open(file_path, newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         for row in reader:
@@ -113,7 +118,7 @@ def plot_elapsed_times(data: dict) -> None:
     phastft_timings = np.asarray(data["phastft_time"])
 
     plt.plot(index, np_fft_timings, label="NumPy FFT", lw=0.8)
-    plt.plot(index, pyfftw_timings,  label="PyFFTW FFT", lw=0.8)
+    plt.plot(index, pyfftw_timings, label="PyFFTW FFT", lw=0.8)
     plt.plot(index, phastft_timings, label="PhastFT", lw=0.98)
 
     plt.title("FFT Elapsed Times Comparison")
diff --git a/benches/utils.py b/benches/utils.py
index c5ad355..ea4e0ce 100644
--- a/benches/utils.py
+++ b/benches/utils.py
@@ -2,6 +2,11 @@
 Utility functions for plotting that are common to all scripts
 """
 
+import os
+import re
+from datetime import datetime
+
+
 SYMBOLS = {
     "customary": ("B", "K", "M", "G", "T", "P", "E", "Z", "Y"),
     "customary_ext": (
@@ -42,3 +47,31 @@ def bytes2human(n, format="%(value).0f %(symbol)s", symbols="customary"):
     return format % dict(symbol=symbols[0], value=n)
 
 
+def find_directory(pattern="benchmark-data"):
+    current_dir = os.getcwd()
+
+    # List all directories in the current directory
+    all_dirs = [
+        d
+        for d in os.listdir(current_dir)
+        if os.path.isdir(os.path.join(current_dir, d))
+    ]
+
+    # Define the regex pattern for matching
+    date_pattern = re.compile(r"\d{4}\.\d{2}\.\d{2}\.\d{2}-\d{2}-\d{2}")
+
+    # Iterate through directories and check if they match the pattern
+    matching_dirs = [d for d in all_dirs if pattern in d and date_pattern.search(d)]
+
+    if matching_dirs:
+        # Sort directories based on the date in the directory name
+        matching_dirs.sort(
+            key=lambda x: datetime.strptime(
+                date_pattern.search(x).group(), "%Y.%m.%d.%H-%M-%S"
+            )
+        )
+        return os.path.join(
+            current_dir, matching_dirs[-1]
+        )  # Return the latest matching directory
+    else:
+        return None  # No matching directory found