From 3bb871a3c59c0b35239de0f47d4734ac70674f48 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Fri, 9 Jan 2026 14:46:51 -0800 Subject: [PATCH 1/5] add helper scripts to process data --- scripts/run_kernels.sh | 25 ++ scripts/study_run_kernels.py | 483 +++++++++++++++++++++++++++++++++++ 2 files changed, 508 insertions(+) create mode 100644 scripts/run_kernels.sh create mode 100644 scripts/study_run_kernels.py diff --git a/scripts/run_kernels.sh b/scripts/run_kernels.sh new file mode 100644 index 000000000..df3e1bbaf --- /dev/null +++ b/scripts/run_kernels.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +#Clean directory +rm -rf *.csv *.txt + +# Collection of float factors between 0.5 and 10 +FACTORS=(0.5 1.0 2.5 5.0 7.5 10.0) + +# List of kernels to run +KERNELS=("MASS3DPA" "DEL_DOT_VEC_2D") + +for KERNEL_NAME in "${KERNELS[@]}"; do + echo "Running kernel: $KERNEL_NAME" + + for factor in "${FACTORS[@]}"; do + echo " Running with sizefact = $factor" + ./bin/raja-perf.exe \ + -k "$KERNEL_NAME" \ + --npasses 3 \ + --npasses-combiners Average Minimum Maximum \ + --outfile "${KERNEL_NAME}_factor_${factor}" \ + --sizefact "$factor" \ + --warmup-perfrun-same + done +done diff --git a/scripts/study_run_kernels.py b/scripts/study_run_kernels.py new file mode 100644 index 000000000..bf972aafd --- /dev/null +++ b/scripts/study_run_kernels.py @@ -0,0 +1,483 @@ +import os +import glob +import numpy as np +import pandas as pd + +# ============= Configuration ============= + +ROOT_DIR = "/usr/WS1/vargas45/RAJAPERF_DEV/RAJAPerf-stage/build_lc_toss4-amdclang-7.1.0-gfx942" # change if needed + +# Use "factor" instead of "mref" in file patterns +GLOB_PATTERNS = [ + "**/*factor*kernel-run-data.csv", # broad match +] + +# Optional filter to only keep specific kernels by substring match (case-insensitive) +# Leave empty to include all kernels discovered. +KERNEL_WHITELIST = [ + # "MASS3DPA", +] + +# Derivative reporting configuration +DERIV_USE_RELATIVE = True +DERIV_EPS_REL = 0.03 # relative threshold on |dy/dx| normalized by (y_range/x_range) +DERIV_EPS_ABS = 1e-4 # absolute threshold on |dy/dx|, only used if DERIV_USE_RELATIVE=False +DERIV_MIN_CONSEC = 3 # minimum consecutive points below threshold to consider a plateau run +DERIV_SMOOTH_WINDOW = 3 # moving average window for smoothing y before derivative +DERIV_MIN_FRAC_OF_MAX_Y = 0.9 # only search after reaching this fraction of max(y) +DERIV_REPORT_MAX_POINTS = 8 # limit how many points to print per series +DERIV_REPORT_ABS = True # print |dy/dx| if True, else print signed dy/dx + +# Output and plotting configuration +OUTPUT_DIR = "./results" +FIG_DIR = os.path.join(OUTPUT_DIR, "figures") +COMBINED_CSV_PATH = os.path.join(OUTPUT_DIR, "combined_table.csv") +FIG_DPI = 300 +SHOW_PLOTS = True # show interactive windows while also saving PNGs + +# Use a non-interactive backend only when not showing plots +if not SHOW_PLOTS: + import matplotlib + matplotlib.use("Agg") + +import matplotlib.pyplot as plt + + +# ============= Helper functions ============= + +def ensure_dir(path): + os.makedirs(path, exist_ok=True) + +def sanitize_filename(text): + return "".join(c if c.isalnum() or c in "-_." else "_" for c in str(text)) + +def find_csv_files(root_dir, patterns): + """Recursively find CSV files matching any of the given patterns.""" + all_files = [] + for pattern in patterns: + search_pattern = os.path.join(root_dir, pattern) + files = glob.glob(search_pattern, recursive=True) + all_files.extend(files) + all_files = sorted(set(all_files)) # Remove duplicates and sort + return all_files + +def _likely_header_score(line): + """ + Score a potential header line based on presence of common column tokens. + Higher is more likely to be the header. + """ + tokens = [ + "Kernel", + "Variant", + "Problem size", + "Problem Size", + "Mean flops", + "GFlop", + "GFLOP", + "GFLOPs", + "GFLOPS", + ] + score = 0 + for t in tokens: + if t in line: + score += 1 + return score + +def read_single_csv(path): + """ + Read one CSV, trying to detect the header row by locating a line + that contains key column names. Returns a DataFrame or None. + """ + try: + with open(path, "r", encoding="utf-8") as f: + lines = f.readlines() + except Exception as e: + print(f"Failed to read {path}: {e}") + return None + + header_idx = None + best_score = -1 + for i, line in enumerate(lines[:50]): # only inspect the first 50 lines + score = _likely_header_score(line) + if score > best_score: + best_score = score + header_idx = i + + if header_idx is None: + print(f"Could not find header in {path}, skipping.") + return None + + try: + df = pd.read_csv(path, header=header_idx) + except Exception as e: + print(f"Failed to parse CSV {path}: {e}") + return None + + df["__source_file__"] = path + return df + +def normalize_columns(df): + """ + Normalize common column names to a standard set if possible. + """ + candidates = { + # Standard name : possible variants + "Kernel": ["Kernel", "Kernel name", "Benchmark", "Test"], + "Variant": ["Variant", "Implementation", "Policy", "Config", "Backend", "Suite"], + "Problem size": [ + "Problem size", "Problem Size", "Size", "N", "DOF", "Elements", + "ProblemSize", "Problem-size" + ], + "Mean flops (gigaFLOP per sec.)": [ + "Mean flops (gigaFLOP per sec.)", + "Mean flops (GFlop/s)", + "Mean Flops (GFlop/s)", + "GFLOP/s", "GFLOPs/s", "GFLOPS", "GFlops/s", "GFlop/s", "GF/s", + "Mean GFLOP/s", "Mean GFLOPs/s" + ], + } + + new_col_map = {} + # strip whitespace from existing columns first + df = df.rename(columns={c: c.strip() for c in df.columns}) + + for standard_name, names in candidates.items(): + for c in names: + if c in df.columns: + new_col_map[c] = standard_name + break # first match wins + + df = df.rename(columns=new_col_map) + return df + +def _moving_average(y, window): + if window is None or window <= 1 or len(y) < 3: + return y + window = max(2, int(window)) + kernel = np.ones(window, dtype=float) / float(window) + return np.convolve(y, kernel, mode="same") + +def _find_first_run(mask, min_len): + """Return the start index and run length of the first run of True with length >= min_len.""" + run = 0 + for i, v in enumerate(mask): + if v: + run += 1 + if run >= min_len: + start = i - run + 1 + j = i + 1 + while j < len(mask) and mask[j]: + j += 1 + return start, j - start + else: + run = 0 + return None, 0 + +def classify_backend_from_variant(variant): + """ + Heuristic classification of backend based on the Variant string. + Captures common cases even if names do not end with specific suffixes. + """ + s = str(variant).strip() + low = s.lower() + if "hip" in low: + return "HIP" + if "cuda" in low: + return "CUDA" + if "openmp" in low or low.endswith("_omp") or " omp" in low or low.startswith("omp"): + return "OpenMP" + if "seq" in low or "serial" in low or "baseline" in low or "sequential" in low: + return "Seq" + return "Unknown" + +def report_near_zero_derivative_points( + x, + y, + backend_label, + kernel, + variant, + use_relative=DERIV_USE_RELATIVE, + eps_rel=DERIV_EPS_REL, + eps_abs=DERIV_EPS_ABS, + min_consecutive=DERIV_MIN_CONSEC, + smooth_window=DERIV_SMOOTH_WINDOW, + min_frac_of_max_y=DERIV_MIN_FRAC_OF_MAX_Y, + max_points=DERIV_REPORT_MAX_POINTS, + report_abs=DERIV_REPORT_ABS, +): + """ + Prints lines "Problem size=, dy/dx=" for points with small enough derivative. + Uses either a relative threshold or an absolute slope threshold. + Focuses on the near-peak region to avoid early flat areas. + """ + x = np.asarray(x, dtype=float) + y = np.asarray(y, dtype=float) + + # Aggregate duplicate x values by averaging y + if len(x) != len(np.unique(x)): + tmp = pd.DataFrame({"x": x, "y": y}).groupby("x", as_index=False)["y"].mean() + x = tmp["x"].values + y = tmp["y"].values + + # Sort by x + order = np.argsort(x) + x = x[order] + y = y[order] + + if len(x) < max(3, min_consecutive): + print( + f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}: not enough points for derivative analysis" + ) + return + + # Optional smoothing + y_sm = _moving_average(y, smooth_window) + + x_range = float(x.max() - x.min()) + if x_range == 0.0: + print( + f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}: zero x-range, cannot compute derivative" + ) + return + + deriv = np.gradient(y_sm, x) # dy/dx, same length as x + + # Restrict to near-peak region if requested + search_mask = np.ones_like(deriv, dtype=bool) + y_range = float(y_sm.max() - y_sm.min()) + if min_frac_of_max_y is not None and 0.0 < min_frac_of_max_y < 1.0 and y_range > 0: + thresh_y = y_sm.max() * float(min_frac_of_max_y) + search_mask = y_sm >= thresh_y + + if use_relative: + # Normalize slope by typical scale y_range/x_range for a dimensionless measure + norm_factor = (y_range / x_range) if y_range > 0 else 1.0 + deriv_norm = np.abs(deriv) / norm_factor + near_zero_mask = (deriv_norm <= float(eps_rel)) & search_mask + else: + near_zero_mask = (np.abs(deriv) <= float(eps_abs)) & search_mask + + # Prefer the first sustained run of small derivatives + start_idx, run_len = _find_first_run(near_zero_mask, int(min_consecutive)) + + if start_idx is not None: + run_indices = np.arange(start_idx, start_idx + run_len) + # Downsample to at most max_points for readability + if len(run_indices) > max_points: + picks_rel = np.linspace(0, len(run_indices) - 1, num=max_points) + pick = run_indices[np.round(picks_rel).astype(int)] + else: + pick = run_indices + print( + f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}: sustained near-zero derivative region found, points={run_len}" + ) + else: + # Fallback: choose up to max_points with smallest slope in the search region + candidates = np.where(search_mask)[0] + if candidates.size == 0: + print( + f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}: no valid search region for derivative analysis" + ) + return + + if use_relative: + norm_factor = (y_range / x_range) if y_range > 0 else 1.0 + deriv_norm = np.abs(deriv) / (norm_factor if norm_factor > 0 else 1.0) + order_c = np.argsort(deriv_norm[candidates]) + else: + order_c = np.argsort(np.abs(deriv[candidates])) + pick = candidates[order_c[:max_points]] + print( + f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}: no sustained plateau, showing {len(pick)} smallest-slope points" + ) + + # Ensure sorted by x before printing + pick = np.array(sorted(pick.tolist())) + + # Print lines in the requested format + for idx in pick: + dy = deriv[idx] + dy_out = abs(dy) if report_abs else dy + print(f" Problem size={x[idx]:.6g}, dy/dx={dy_out:.6g}") + +def plot_backend(df_backend, backend_label, fig_dir, show_plots, fig_dpi): + """ + For a given backend: + - One figure per Kernel with solid line and markers per Variant. + - Prints derivative-based small-slope points per Variant. + - Saves each figure as PNG. + - Shows figures interactively if requested. + """ + if df_backend.empty: + print(f"\nNo data for backend {backend_label}.") + return + + kernels = sorted(df_backend["Kernel"].dropna().unique()) + variants = sorted(df_backend["Variant"].dropna().unique()) + + # Use a larger palette in case many variants exist + cmap = plt.cm.tab20 + colors = [cmap(i % cmap.N) for i in range(max(1, len(variants)))] + color_map = {v: colors[i % len(colors)] for i, v in enumerate(variants)} + + for kernel in kernels: + df_k = df_backend[df_backend["Kernel"] == kernel] + if df_k.empty: + continue + + fig = plt.figure(figsize=(10, 6)) + + for variant, g in df_k.groupby("Variant"): + g_sorted = g.sort_values("Problem size") + x = g_sorted["Problem size"].values + y = g_sorted["Mean flops (gigaFLOP per sec.)"].values + + color = color_map.get(variant, "black") + + # Actual data curve: solid line with markers + plt.plot( + x, + y, + marker="o", + linestyle="-", + color=color, + label=f"{variant}", + ) + + # Derivative-based report of small-slope points + report_near_zero_derivative_points( + x, + y, + backend_label, + kernel, + variant, + use_relative=DERIV_USE_RELATIVE, + eps_rel=DERIV_EPS_REL, + eps_abs=DERIV_EPS_ABS, + min_consecutive=DERIV_MIN_CONSEC, + smooth_window=DERIV_SMOOTH_WINDOW, + min_frac_of_max_y=DERIV_MIN_FRAC_OF_MAX_Y, + max_points=DERIV_REPORT_MAX_POINTS, + report_abs=DERIV_REPORT_ABS, + ) + + plt.xlabel("Problem size") + plt.ylabel("Mean flops (gigaFLOP per sec.)") + plt.title(f"{backend_label} backend, Kernel: {kernel}") + plt.grid(True) + plt.tight_layout() + plt.legend(fontsize="small", bbox_to_anchor=(1.05, 1), loc="upper left") + + # Save figure as PNG + kernel_safe = sanitize_filename(kernel) + fname = f"{backend_label}_Kernel-{kernel_safe}.png" + fig_path = os.path.join(fig_dir, fname) + plt.savefig(fig_path, dpi=fig_dpi, bbox_inches="tight") + print(f"[SAVE] Figure saved to: {fig_path}") + + if show_plots: + plt.show() + else: + plt.close(fig) + +# ============= Main logic ============= + +def main(): + ensure_dir(OUTPUT_DIR) + ensure_dir(FIG_DIR) + + files = find_csv_files(ROOT_DIR, GLOB_PATTERNS) + if not files: + print(f"No files matching patterns {GLOB_PATTERNS} found under '{ROOT_DIR}'") + return + + print("Found CSV files:") + for f in files: + print(" ", f) + + dfs = [] + for path in files: + df = read_single_csv(path) + if df is None: + continue + df = normalize_columns(df) + + # Verify required columns exist post-normalization, else report and skip + required_any = {"Kernel", "Variant", "Problem size", "Mean flops (gigaFLOP per sec.)"} + if not required_any.issubset(set(df.columns)): + print(f"[SKIP] {path} missing required columns after normalization.") + print(" Columns present:", list(df.columns)) + continue + + dfs.append(df) + + if not dfs: + print("No CSV files could be parsed with required columns.") + return + + combined_df = pd.concat(dfs, ignore_index=True) + + # Basic cleaning + combined_df["Kernel"] = combined_df["Kernel"].astype(str).str.strip() + combined_df["Variant"] = combined_df["Variant"].astype(str).str.strip() + + # Optional kernel filter + if KERNEL_WHITELIST: + wl = [w.lower() for w in KERNEL_WHITELIST] + combined_df = combined_df[ + combined_df["Kernel"].str.lower().apply(lambda k: any(w in k for w in wl)) + ] + if combined_df.empty: + print("After applying KERNEL_WHITELIST, no rows remain.") + return + + # Convert numeric columns + combined_df["Problem size"] = pd.to_numeric(combined_df["Problem size"], errors="coerce") + combined_df["Mean flops (gigaFLOP per sec.)"] = pd.to_numeric( + combined_df["Mean flops (gigaFLOP per sec.)"], errors="coerce" + ) + + # Drop rows without x or y + before_drop = len(combined_df) + combined_df = combined_df.dropna(subset=["Problem size", "Mean flops (gigaFLOP per sec.)"]) + dropped = before_drop - len(combined_df) + if dropped > 0: + print(f"[CLEAN] Dropped {dropped} rows with non-numeric Problem size or Mean flops.") + + # Save concatenated table to CSV + ensure_dir(os.path.dirname(COMBINED_CSV_PATH)) + combined_df.to_csv(COMBINED_CSV_PATH, index=False) + print(f"[SAVE] Combined table saved to: {COMBINED_CSV_PATH}") + + # Backend classification + combined_df["Backend"] = combined_df["Variant"].apply(classify_backend_from_variant) + + # Quick summary to help verify MASS3DPA is present + print("\nKernels discovered:") + print(sorted(combined_df["Kernel"].unique())) + + print("\nCounts by Kernel and Backend:") + summary = ( + combined_df.groupby(["Kernel", "Backend"]) + .size() + .reset_index(name="rows") + .sort_values(["Kernel", "Backend"]) + ) + for _, row in summary.iterrows(): + print(f" Kernel={row['Kernel']}, Backend={row['Backend']}: rows={row['rows']}") + + # Plot aggregated "All" view so kernels appear even if backend classification is Unknown + print("\n[Plot] Generating 'All' plots per kernel...") + plot_backend(combined_df, "All", FIG_DIR, SHOW_PLOTS, FIG_DPI) + + # Plot per requested backends, only if data exists + for b in ["CUDA", "HIP", "Seq", "OpenMP"]: + df_b = combined_df[combined_df["Backend"] == b] + if df_b.empty: + print(f"[Plot] Skipping backend {b}, no rows.") + continue + print(f"\n[Plot] Generating '{b}' plots per kernel...") + plot_backend(df_b, b, FIG_DIR, SHOW_PLOTS, FIG_DPI) + +if __name__ == "__main__": + main() From 77b86f2f4d4c699e4df0e674eb8fad0402f4992c Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Fri, 9 Jan 2026 14:58:06 -0800 Subject: [PATCH 2/5] minor --- scripts/study_run_kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/study_run_kernels.py b/scripts/study_run_kernels.py index bf972aafd..15972c4b5 100644 --- a/scripts/study_run_kernels.py +++ b/scripts/study_run_kernels.py @@ -5,7 +5,7 @@ # ============= Configuration ============= -ROOT_DIR = "/usr/WS1/vargas45/RAJAPERF_DEV/RAJAPerf-stage/build_lc_toss4-amdclang-7.1.0-gfx942" # change if needed +ROOT_DIR = "." # change if needed # Use "factor" instead of "mref" in file patterns GLOB_PATTERNS = [ From dcaa3aff1d2592519992819d99d6ffe8a531a2e1 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Tue, 13 Jan 2026 11:51:49 -0800 Subject: [PATCH 3/5] add new script that adds tunings --- scripts/study_run_kernel_tunings.py | 532 ++++++++++++++++++++++++++++ 1 file changed, 532 insertions(+) create mode 100644 scripts/study_run_kernel_tunings.py diff --git a/scripts/study_run_kernel_tunings.py b/scripts/study_run_kernel_tunings.py new file mode 100644 index 000000000..a978c7382 --- /dev/null +++ b/scripts/study_run_kernel_tunings.py @@ -0,0 +1,532 @@ +import os +import glob +import numpy as np +import pandas as pd + +# ============= Configuration ============= + +ROOT_DIR = "." # change if needed + +# Use "factor" instead of "mref" in file patterns +GLOB_PATTERNS = [ + "**/*factor*kernel-run-data.csv", # broad match +] + +# Optional filter to only keep specific kernels by substring match (case-insensitive) +# Leave empty to include all kernels discovered. +KERNEL_WHITELIST = [ + # "MASS3DPA", +] + +# Derivative reporting configuration +DERIV_USE_RELATIVE = True +DERIV_EPS_REL = 0.03 # relative threshold on |dy/dx| normalized by (y_range/x_range) +DERIV_EPS_ABS = 1e-4 # absolute threshold on |dy/dx|, only used if DERIV_USE_RELATIVE=False +DERIV_MIN_CONSEC = 3 # minimum consecutive points below threshold to consider a plateau run +DERIV_SMOOTH_WINDOW = 3 # moving average window for smoothing y before derivative +DERIV_MIN_FRAC_OF_MAX_Y = 0.9 # only search after reaching this fraction of max(y) +DERIV_REPORT_MAX_POINTS = 8 # limit how many points to print per series +DERIV_REPORT_ABS = True # print |dy/dx| if True, else print signed dy/dx + +# Output and plotting configuration +OUTPUT_DIR = "./results" +FIG_DIR = os.path.join(OUTPUT_DIR, "figures") +COMBINED_CSV_PATH = os.path.join(OUTPUT_DIR, "combined_table.csv") +FIG_DPI = 300 +SHOW_PLOTS = True # show interactive windows while also saving PNGs + +# Use a non-interactive backend only when not showing plots +if not SHOW_PLOTS: + import matplotlib + matplotlib.use("Agg") + +import matplotlib.pyplot as plt + + +# ============= Helper functions ============= + +def ensure_dir(path): + os.makedirs(path, exist_ok=True) + +def sanitize_filename(text): + return "".join(c if c.isalnum() or c in "-_." else "_" for c in str(text)) + +def find_csv_files(root_dir, patterns): + """Recursively find CSV files matching any of the given patterns.""" + all_files = [] + for pattern in patterns: + search_pattern = os.path.join(root_dir, pattern) + files = glob.glob(search_pattern, recursive=True) + all_files.extend(files) + all_files = sorted(set(all_files)) # Remove duplicates and sort + return all_files + +def _likely_header_score(line): + """ + Score a potential header line based on presence of common column tokens. + Higher is more likely to be the header. + """ + tokens = [ + "Kernel", + "Variant", + "Problem size", + "Problem Size", + "Mean flops", + "GFlop", + "GFLOP", + "GFLOPs", + "GFLOPS", + ] + score = 0 + for t in tokens: + if t in line: + score += 1 + return score + +def read_single_csv(path): + """ + Read one CSV, trying to detect the header row by locating a line + that contains key column names. Returns a DataFrame or None. + """ + try: + with open(path, "r", encoding="utf-8") as f: + lines = f.readlines() + except Exception as e: + print(f"Failed to read {path}: {e}") + return None + + header_idx = None + best_score = -1 + for i, line in enumerate(lines[:50]): # only inspect the first 50 lines + score = _likely_header_score(line) + if score > best_score: + best_score = score + header_idx = i + + if header_idx is None: + print(f"Could not find header in {path}, skipping.") + return None + + try: + df = pd.read_csv(path, header=header_idx) + except Exception as e: + print(f"Failed to parse CSV {path}: {e}") + return None + + df["__source_file__"] = path + return df + +def normalize_columns(df): + """ + Normalize common column names to a standard set if possible. + """ + candidates = { + # Standard name : possible variants + "Kernel": ["Kernel", "Kernel name", "Benchmark", "Test"], + "Variant": ["Variant", "Implementation", "Policy", "Config", "Backend", "Suite"], + "Problem size": [ + "Problem size", "Problem Size", "Size", "N", "DOF", "Elements", + "ProblemSize", "Problem-size" + ], + "Mean flops (gigaFLOP per sec.)": [ + "Mean flops (gigaFLOP per sec.)", + "Mean flops (GFlop/s)", + "Mean Flops (GFlop/s)", + "GFLOP/s", "GFLOPs/s", "GFLOPS", "GFlops/s", "GFlop/s", "GF/s", + "Mean GFLOP/s", "Mean GFLOPs/s" + ], + } + + new_col_map = {} + # strip whitespace from existing columns first + df = df.rename(columns={c: c.strip() for c in df.columns}) + + for standard_name, names in candidates.items(): + for c in names: + if c in df.columns: + new_col_map[c] = standard_name + break # first match wins + + df = df.rename(columns=new_col_map) + return df + +def _moving_average(y, window): + if window is None or window <= 1 or len(y) < 3: + return y + window = max(2, int(window)) + kernel = np.ones(window, dtype=float) / float(window) + return np.convolve(y, kernel, mode="same") + +def _find_first_run(mask, min_len): + """Return the start index and run length of the first run of True with length >= min_len.""" + run = 0 + for i, v in enumerate(mask): + if v: + run += 1 + if run >= min_len: + start = i - run + 1 + j = i + 1 + while j < len(mask) and mask[j]: + j += 1 + return start, j - start + else: + run = 0 + return None, 0 + +def classify_backend_from_variant(variant): + """ + Heuristic classification of backend based on the Variant string. + Captures common cases even if names do not end with specific suffixes. + """ + s = str(variant).strip() + low = s.lower() + if "hip" in low: + return "HIP" + if "cuda" in low: + return "CUDA" + if "openmp" in low or low.endswith("_omp") or " omp" in low or low.startswith("omp"): + return "OpenMP" + if "seq" in low or "serial" in low or "baseline" in low or "sequential" in low: + return "Seq" + return "Unknown" + +# NEW: classify tuning; adjust logic to match your actual naming scheme +def classify_tuning(row): + """ + Return a tuning label for a row. + You can customize this to use any available columns. + Examples: + - A dedicated 'Tuning' column + - Parsing 'Variant' into backend + tuning + - Using problem-size or factor strings + For now, use: + - If a 'Tuning' column exists, use that + - Else, if '__source_file__' used different settings, use its basename + - Else, return 'default' + """ + # If the CSV already has a Tuning column, use it + if "Tuning" in row and pd.notna(row["Tuning"]): + return str(row["Tuning"]).strip() + + # Otherwise, derive from source file name as a proxy for tuning + src = row.get("__source_file__", "") + if isinstance(src, str) and src: + return os.path.basename(src) + + return "default" + +def report_near_zero_derivative_points( + x, + y, + backend_label, + kernel, + variant, + tuning_label, + use_relative=DERIV_USE_RELATIVE, + eps_rel=DERIV_EPS_REL, + eps_abs=DERIV_EPS_ABS, + min_consecutive=DERIV_MIN_CONSEC, + smooth_window=DERIV_SMOOTH_WINDOW, + min_frac_of_max_y=DERIV_MIN_FRAC_OF_MAX_Y, + max_points=DERIV_REPORT_MAX_POINTS, + report_abs=DERIV_REPORT_ABS, +): + """ + Prints lines "Problem size=, dy/dx=" for points with small enough derivative. + Uses either a relative threshold or an absolute slope threshold. + Focuses on the near-peak region to avoid early flat areas. + """ + x = np.asarray(x, dtype=float) + y = np.asarray(y, dtype=float) + + # Aggregate duplicate x values by averaging y + if len(x) != len(np.unique(x)): + tmp = pd.DataFrame({"x": x, "y": y}).groupby("x", as_index=False)["y"].mean() + x = tmp["x"].values + y = tmp["y"].values + + # Sort by x + order = np.argsort(x) + x = x[order] + y = y[order] + + if len(x) < max(3, min_consecutive): + print( + f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}, Tuning={tuning_label}: not enough points for derivative analysis" + ) + return + + # Optional smoothing + y_sm = _moving_average(y, smooth_window) + + x_range = float(x.max() - x.min()) + if x_range == 0.0: + print( + f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}, Tuning={tuning_label}: zero x-range, cannot compute derivative" + ) + return + + deriv = np.gradient(y_sm, x) # dy/dx, same length as x + + # Restrict to near-peak region if requested + search_mask = np.ones_like(deriv, dtype=bool) + y_range = float(y_sm.max() - y_sm.min()) + if min_frac_of_max_y is not None and 0.0 < min_frac_of_max_y < 1.0 and y_range > 0: + thresh_y = y_sm.max() * float(min_frac_of_max_y) + search_mask = y_sm >= thresh_y + + if use_relative: + # Normalize slope by typical scale y_range/x_range for a dimensionless measure + norm_factor = (y_range / x_range) if y_range > 0 else 1.0 + deriv_norm = np.abs(deriv) / norm_factor + near_zero_mask = (deriv_norm <= float(eps_rel)) & search_mask + else: + near_zero_mask = (np.abs(deriv) <= float(eps_abs)) & search_mask + + # Prefer the first sustained run of small derivatives + start_idx, run_len = _find_first_run(near_zero_mask, int(min_consecutive)) + + if start_idx is not None: + run_indices = np.arange(start_idx, start_idx + run_len) + # Downsample to at most max_points for readability + if len(run_indices) > max_points: + picks_rel = np.linspace(0, len(run_indices) - 1, num=max_points) + pick = run_indices[np.round(picks_rel).astype(int)] + else: + pick = run_indices + print( + f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}, Tuning={tuning_label}: sustained near-zero derivative region found, points={run_len}" + ) + else: + # Fallback: choose up to max_points with smallest slope in the search region + candidates = np.where(search_mask)[0] + if candidates.size == 0: + print( + f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}, Tuning={tuning_label}: no valid search region for derivative analysis" + ) + return + + if use_relative: + norm_factor = (y_range / x_range) if y_range > 0 else 1.0 + deriv_norm = np.abs(deriv) / (norm_factor if norm_factor > 0 else 1.0) + order_c = np.argsort(deriv_norm[candidates]) + else: + order_c = np.argsort(np.abs(deriv[candidates])) + pick = candidates[order_c[:max_points]] + print( + f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}, Tuning={tuning_label}: no sustained plateau, showing {len(pick)} smallest-slope points" + ) + + # Ensure sorted by x before printing + pick = np.array(sorted(pick.tolist())) + + # Print lines in the requested format + for idx in pick: + dy = deriv[idx] + dy_out = abs(dy) if report_abs else dy + print(f" Problem size={x[idx]:.6g}, dy/dx={dy_out:.6g}") + +def plot_backend(df_backend, backend_label, fig_dir, show_plots, fig_dpi): + """ + For a given backend: + - One figure per Kernel with solid line and markers per (Variant, Tuning). + - Prints derivative-based small-slope points per (Variant, Tuning). + - Saves each figure as PNG. + - Shows figures interactively if requested. + """ + if df_backend.empty: + print(f"\nNo data for backend {backend_label}.") + return + + kernels = sorted(df_backend["Kernel"].dropna().unique()) + + # Unique (Variant, Tuning) combos for color/marker assignments + vt_pairs = sorted( + df_backend[["Variant", "Tuning"]] + .dropna() + .drop_duplicates() + .itertuples(index=False, name=None) + ) + + cmap = plt.cm.tab20 + num_pairs = max(1, len(vt_pairs)) + colors = [cmap(i % cmap.N) for i in range(num_pairs)] + markers = ["o", "s", "^", "D", "v", ">", "<", "P", "X"] # repeat if needed + + style_map = {} + for idx, (variant, tuning) in enumerate(vt_pairs): + color = colors[idx % len(colors)] + marker = markers[idx % len(markers)] + style_map[(variant, tuning)] = (color, marker) + + for kernel in kernels: + df_k = df_backend[df_backend["Kernel"] == kernel] + if df_k.empty: + continue + + fig = plt.figure(figsize=(10, 6)) + + # group by Variant and Tuning to get separate curves + for (variant, tuning), g in df_k.groupby(["Variant", "Tuning"]): + g_sorted = g.sort_values("Problem size") + x = g_sorted["Problem size"].values + y = g_sorted["Mean flops (gigaFLOP per sec.)"].values + + color, marker = style_map.get((variant, tuning), ("black", "o")) + + label = f"{variant} | {tuning}" + + plt.plot( + x, + y, + marker=marker, + linestyle="-", + color=color, + label=label, + ) + + # Derivative-based report of small-slope points + report_near_zero_derivative_points( + x, + y, + backend_label, + kernel, + variant, + tuning, + use_relative=DERIV_USE_RELATIVE, + eps_rel=DERIV_EPS_REL, + eps_abs=DERIV_EPS_ABS, + min_consecutive=DERIV_MIN_CONSEC, + smooth_window=DERIV_SMOOTH_WINDOW, + min_frac_of_max_y=DERIV_MIN_FRAC_OF_MAX_Y, + max_points=DERIV_REPORT_MAX_POINTS, + report_abs=DERIV_REPORT_ABS, + ) + + plt.xlabel("Problem size") + plt.ylabel("Mean flops (gigaFLOP per sec.)") + + # TITLE MODIFIED: explicitly list backend + plt.title(f"Kernel: {kernel} | Backend: {backend_label}") + + plt.grid(True) + plt.tight_layout() + plt.legend(fontsize="small", bbox_to_anchor=(1.05, 1), loc="upper left") + + # Save figure as PNG + kernel_safe = sanitize_filename(kernel) + backend_safe = sanitize_filename(backend_label) + fname = f"{backend_safe}_Kernel-{kernel_safe}.png" + fig_path = os.path.join(fig_dir, fname) + plt.savefig(fig_path, dpi=fig_dpi, bbox_inches="tight") + print(f"[SAVE] Figure saved to: {fig_path}") + + if show_plots: + plt.show() + else: + plt.close(fig) + +# ============= Main logic ============= + +def main(): + ensure_dir(OUTPUT_DIR) + ensure_dir(FIG_DIR) + + files = find_csv_files(ROOT_DIR, GLOB_PATTERNS) + if not files: + print(f"No files matching patterns {GLOB_PATTERNS} found under '{ROOT_DIR}'") + return + + print("Found CSV files:") + for f in files: + print(" ", f) + + dfs = [] + for path in files: + df = read_single_csv(path) + if df is None: + continue + df = normalize_columns(df) + + # Verify required columns exist post-normalization, else report and skip + required_any = {"Kernel", "Variant", "Problem size", "Mean flops (gigaFLOP per sec.)"} + if not required_any.issubset(set(df.columns)): + print(f"[SKIP] {path} missing required columns after normalization.") + print(" Columns present:", list(df.columns)) + continue + + dfs.append(df) + + if not dfs: + print("No CSV files could be parsed with required columns.") + return + + combined_df = pd.concat(dfs, ignore_index=True) + + # Basic cleaning + combined_df["Kernel"] = combined_df["Kernel"].astype(str).str.strip() + combined_df["Variant"] = combined_df["Variant"].astype(str).str.strip() + + # Optional kernel filter + if KERNEL_WHITELIST: + wl = [w.lower() for w in KERNEL_WHITELIST] + combined_df = combined_df[ + combined_df["Kernel"].str.lower().apply(lambda k: any(w in k for w in wl)) + ] + if combined_df.empty: + print("After applying KERNEL_WHITELIST, no rows remain.") + return + + # Convert numeric columns + combined_df["Problem size"] = pd.to_numeric(combined_df["Problem size"], errors="coerce") + combined_df["Mean flops (gigaFLOP per sec.)"] = pd.to_numeric( + combined_df["Mean flops (gigaFLOP per sec.)"], errors="coerce" + ) + + # Drop rows without x or y + before_drop = len(combined_df) + combined_df = combined_df.dropna(subset=["Problem size", "Mean flops (gigaFLOP per sec.)"]) + dropped = before_drop - len(combined_df) + if dropped > 0: + print(f"[CLEAN] Dropped {dropped} rows with non-numeric Problem size or Mean flops.") + + # Backend classification + combined_df["Backend"] = combined_df["Variant"].apply(classify_backend_from_variant) + + # NEW: derive Tuning column + combined_df["Tuning"] = combined_df.apply(classify_tuning, axis=1) + + # Save concatenated table to CSV, now including Backend and Tuning + ensure_dir(os.path.dirname(COMBINED_CSV_PATH)) + combined_df.to_csv(COMBINED_CSV_PATH, index=False) + print(f"[SAVE] Combined table saved to: {COMBINED_CSV_PATH}") + + # Quick summary to help verify MASS3DPA is present + print("\nKernels discovered:") + print(sorted(combined_df["Kernel"].unique())) + + print("\nCounts by Kernel and Backend:") + summary = ( + combined_df.groupby(["Kernel", "Backend"]) + .size() + .reset_index(name="rows") + .sort_values(["Kernel", "Backend"]) + ) + for _, row in summary.iterrows(): + print(f" Kernel={row['Kernel']}, Backend={row['Backend']}: rows={row['rows']}") + + # Plot aggregated "All" view so kernels appear even if backend classification is Unknown + print("\n[Plot] Generating 'All' plots per kernel...") + plot_backend(combined_df, "All", FIG_DIR, SHOW_PLOTS, FIG_DPI) + + # Plot per requested backends, only if data exists + for b in ["CUDA", "HIP", "Seq", "OpenMP"]: + df_b = combined_df[combined_df["Backend"] == b] + if df_b.empty: + print(f"[Plot] Skipping backend {b}, no rows.") + continue + print(f"\n[Plot] Generating '{b}' plots per kernel...") + plot_backend(df_b, b, FIG_DIR, SHOW_PLOTS, FIG_DPI) + +if __name__ == "__main__": + main() From daefd53d6ee599532b83d1fee8ab713d69d24bff Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Tue, 13 Jan 2026 12:00:53 -0800 Subject: [PATCH 4/5] Move benchmarking scripts to subdirectory --- scripts/{ => benchmarking}/run_kernels.sh | 0 scripts/{ => benchmarking}/study_run_kernel_tunings.py | 0 scripts/{ => benchmarking}/study_run_kernels.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename scripts/{ => benchmarking}/run_kernels.sh (100%) rename scripts/{ => benchmarking}/study_run_kernel_tunings.py (100%) rename scripts/{ => benchmarking}/study_run_kernels.py (100%) diff --git a/scripts/run_kernels.sh b/scripts/benchmarking/run_kernels.sh similarity index 100% rename from scripts/run_kernels.sh rename to scripts/benchmarking/run_kernels.sh diff --git a/scripts/study_run_kernel_tunings.py b/scripts/benchmarking/study_run_kernel_tunings.py similarity index 100% rename from scripts/study_run_kernel_tunings.py rename to scripts/benchmarking/study_run_kernel_tunings.py diff --git a/scripts/study_run_kernels.py b/scripts/benchmarking/study_run_kernels.py similarity index 100% rename from scripts/study_run_kernels.py rename to scripts/benchmarking/study_run_kernels.py From f5f8bc7fb6c49cd4485bc3337b9dc06b83330dd0 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Tue, 13 Jan 2026 15:48:31 -0800 Subject: [PATCH 5/5] Add new script to run full benchmark with MPI --- .../benchmarking/run_full_benchmark-mpi.sh | 47 +++++++++++++++++++ scripts/benchmarking/run_kernels.sh | 0 .../benchmarking/study_run_kernel_tunings.py | 0 scripts/benchmarking/study_run_kernels.py | 0 4 files changed, 47 insertions(+) create mode 100755 scripts/benchmarking/run_full_benchmark-mpi.sh mode change 100644 => 100755 scripts/benchmarking/run_kernels.sh mode change 100644 => 100755 scripts/benchmarking/study_run_kernel_tunings.py mode change 100644 => 100755 scripts/benchmarking/study_run_kernels.py diff --git a/scripts/benchmarking/run_full_benchmark-mpi.sh b/scripts/benchmarking/run_full_benchmark-mpi.sh new file mode 100755 index 000000000..69aa61b23 --- /dev/null +++ b/scripts/benchmarking/run_full_benchmark-mpi.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +## Run all benchmark kernels for GPU, non-lambda variants only +## on 4 MPI ranks and dump the results in the specified directory. + +flux alloc -xN1 -t 20 bash -c ' + +OUTDIR=RPBenchmarkTestMPI + +# Collection of problem size factors between 0.5 and 6 +FACTORS=(0.5 1.0 2.0 3.0 4.0 5.0 6.0) + +# List of kernels to run +KERNELS=("CONVECTION3DPA" + "DEL_DOT_VEC_2D" + "DIFFUSION3DPA" + "EDGE3D" + "ENERGY" + "INTSC_HEXHEX" + "INTSC_HEXRECT" + "LTIMES" + "MASS3DEA" + "MASSVEC3DPA" + "MATVEC_3D_STENCIL" + "NODAL_ACCUMULATION_3D" + "VOL3D" + "MULTI_REDUCE" + "REDUCE_STRUCT" + "HALO_EXCHANGE_FUSED") + +for KERNEL_NAME in "${KERNELS[@]}"; do + echo "Running kernel: $KERNEL_NAME" + + for factor in "${FACTORS[@]}"; do + echo " Running with sizefact = $factor" + flux run -xN1 -n4 ./bin/raja-perf.exe \ + -k "$KERNEL_NAME" \ + --npasses 1 \ + --npasses-combiners Average Minimum Maximum \ + --outdir ${OUTDIR} \ + --outfile "${KERNEL_NAME}_factor_${factor}" \ + --sizefact "$factor" \ + --warmup-perfrun-same \ + -ev Seq Lambda + done +done +' diff --git a/scripts/benchmarking/run_kernels.sh b/scripts/benchmarking/run_kernels.sh old mode 100644 new mode 100755 diff --git a/scripts/benchmarking/study_run_kernel_tunings.py b/scripts/benchmarking/study_run_kernel_tunings.py old mode 100644 new mode 100755 diff --git a/scripts/benchmarking/study_run_kernels.py b/scripts/benchmarking/study_run_kernels.py old mode 100644 new mode 100755