From deadd64db82a8661e588438171b992c521eca4bd Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Mon, 19 Jan 2026 14:46:34 -0800 Subject: [PATCH 1/5] add script to compare against compilers --- .../fem_benchmarking/compiler_comparison.py | 241 ++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 scripts/fem_benchmarking/compiler_comparison.py diff --git a/scripts/fem_benchmarking/compiler_comparison.py b/scripts/fem_benchmarking/compiler_comparison.py new file mode 100644 index 000000000..a9eafa8d5 --- /dev/null +++ b/scripts/fem_benchmarking/compiler_comparison.py @@ -0,0 +1,241 @@ +import os +import re +from pathlib import Path + +import pandas as pd + + +def find_build_folder(path: Path) -> str: + """ + Given a file path, return the first parent directory whose name starts with 'build_'. + If none is found, return an empty string. + """ + for parent in path.parents: + if parent.name.startswith("build_"): + return parent.name + return "" + + +def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame: + """ + Normalize and deduplicate columns, then merge any duplicate Kernel/Tuning columns + into single columns named 'Kernel' and 'Tuning'. + """ + # Strip leading/trailing whitespace from column names + df = df.copy() + df.columns = [c.strip() for c in df.columns] + + # Merge multiple Kernel columns into one + kernel_cols = [c for c in df.columns if c.lower().startswith("kernel")] + if kernel_cols: + # Create a single 'Kernel' column, taking first non null / non empty value across duplicates + df["Kernel"] = df[kernel_cols].bfill(axis=1).iloc[:, 0] + # Drop all original kernel columns except the unified one + for c in kernel_cols: + if c != "Kernel": + df.drop(columns=c, inplace=True, errors="ignore") + + # Merge multiple Tuning columns into one + tuning_cols = [c for c in df.columns if c.lower().startswith("tuning")] + if tuning_cols: + df["Tuning"] = df[tuning_cols].bfill(axis=1).iloc[:, 0] + for c in tuning_cols: + if c != "Tuning": + df.drop(columns=c, inplace=True, errors="ignore") + + return df + + +def collect_kernel_run_data(root_dir, output_csv=None, verbose=True): + """ + Walk root_dir recursively, find *kernel-run-data.csv files, read and + aggregate them into one DataFrame with: + - first line of each file skipped + - duplicate Kernel/Tuning columns merged into single 'Kernel' and 'Tuning' + - an additional BuildFolder column. + + Parameters + ---------- + root_dir : str or Path + Directory to search recursively. + output_csv : str or Path or None, optional + If provided, the combined DataFrame is written to this CSV path. + If None, no file is written. + verbose : bool, optional + If True, prints progress information. + + Returns + ------- + pandas.DataFrame + Combined DataFrame of all kernel-run-data.csv files found. + """ + root_dir = Path(root_dir).resolve() + if output_csv is not None: + output_csv = Path(output_csv).resolve() + + all_rows = [] + pattern = re.compile(r"kernel-run-data\.csv$") + + if verbose: + print(f"Scanning under: {root_dir}") + + for dirpath, dirnames, filenames in os.walk(root_dir): + for filename in filenames: + if pattern.search(filename): + file_path = Path(dirpath) / filename + build_folder = find_build_folder(file_path) + + if verbose: + print(f"Found: {file_path}") + if not build_folder: + print(" Warning: no build_ folder found in path, leaving BuildFolder empty") + + try: + # Skip the first line, then parse CSV header and data + df = pd.read_csv( + file_path, + skiprows=1, # skip first non CSV line + skipinitialspace=True + ) + except Exception as e: + if verbose: + print(f" Error reading {file_path}: {e}") + continue + + # Normalize and merge duplicate Kernel/Tuning columns + df = _normalize_columns(df) + + # Add build folder column + df["BuildFolder"] = build_folder + all_rows.append(df) + + if not all_rows: + if verbose: + print("No kernel-run-data.csv files found. Returning empty DataFrame.") + return pd.DataFrame() + + combined = pd.concat(all_rows, ignore_index=True) + + if output_csv is not None: + combined.to_csv(output_csv, index=False) + if verbose: + print(f"Written combined CSV to: {output_csv}") + + return combined + +# Search a specific directory and also save to a merged CSV +df = collect_kernel_run_data( + ".", + output_csv="kernel-run-data-merged.csv", + verbose=True, +) + + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from pathlib import Path + +metric_col = "Mean time per rep (sec.)" + +# Check required columns exist +required_cols = ["Kernel", "Variant", "Tuning", "BuildFolder", metric_col] +missing = [c for c in required_cols if c not in df.columns] +if missing: + raise ValueError(f"Missing required columns in df: {missing}") + +df_plot = df.copy() + +# 1) Create a merged Variant + Tuning label +df_plot["VariantTuning"] = ( + df_plot["Variant"].astype(str).str.strip() + + " | " + + df_plot["Tuning"].astype(str).str.strip() +) + +# 2) Aggregate metric by Kernel, VariantTuning, BuildFolder +grouped = ( + df_plot.groupby(["Kernel", "VariantTuning", "BuildFolder"], dropna=False)[metric_col] + .mean() + .reset_index() +) + +kernels = sorted(grouped["Kernel"].dropna().unique()) +build_folders = sorted(grouped["BuildFolder"].dropna().unique()) + +output_dir = Path("kernel_plots_for_ppt_vertical") +output_dir.mkdir(exist_ok=True) + +plt.rcParams.update({ + "figure.figsize": (14, 7), + "axes.titlesize": 18, + "axes.labelsize": 14, + "xtick.labelsize": 11, + "ytick.labelsize": 12, + "legend.fontsize": 12, +}) + +for kernel in kernels: + kernel_df = grouped[grouped["Kernel"] == kernel].copy() + + variant_tunings = sorted(kernel_df["VariantTuning"].dropna().unique()) + n_vt = len(variant_tunings) + if n_vt == 0: + continue + + x_idx = np.arange(n_vt) + n_folders = len(build_folders) + if n_folders == 0: + continue + + total_width = 0.8 + bar_width = total_width / n_folders + + fig, ax = plt.subplots() + + for i, folder in enumerate(build_folders): + sub = kernel_df[kernel_df["BuildFolder"] == folder] + + y_vals = [] + for vt in variant_tunings: + row = sub[sub["VariantTuning"] == vt] + if not row.empty: + y_vals.append(row[metric_col].iloc[0]) + else: + y_vals.append(np.nan) + + offset = (i - n_folders / 2) * bar_width + bar_width / 2 + ax.bar( + x_idx + offset, + y_vals, + width=bar_width, + label=folder, + ) + + ax.set_xticks(x_idx) + ax.set_xticklabels(variant_tunings, rotation=45, ha="right") + ax.set_ylabel(metric_col) + ax.set_xlabel("Variant | Tuning") + + # Add some extra padding under the title + ax.set_title( + f"Kernel: {kernel} - {metric_col}", + pad=20, # increase this if you want more space + ) + + ax.grid(axis="y", linestyle="--", alpha=0.5) + ax.set_axisbelow(True) + + ax.legend(title="BuildFolder", bbox_to_anchor=(1.02, 1), loc="upper left", borderaxespad=0.) + + # Increase the top margin a bit so bars and limits are not clipped by the title + fig.tight_layout() + plt.subplots_adjust(top=0.88) # smaller than 1.0; reduce this number for more space + + safe_kernel_name = str(kernel) + out_path = output_dir / f"kernel_{safe_kernel_name}.png" + fig.savefig(out_path, dpi=200) + + plt.show() + +print(f"Vertical bar plots saved in: {output_dir.resolve()}") From 23f765bacfca0b88f4241e2e9f2a41f4e5aa4a4f Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Mon, 19 Jan 2026 14:57:35 -0800 Subject: [PATCH 2/5] fix output name --- scripts/fem_benchmarking/compiler_comparison.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fem_benchmarking/compiler_comparison.py b/scripts/fem_benchmarking/compiler_comparison.py index a9eafa8d5..6adcd5ced 100644 --- a/scripts/fem_benchmarking/compiler_comparison.py +++ b/scripts/fem_benchmarking/compiler_comparison.py @@ -232,7 +232,7 @@ def collect_kernel_run_data(root_dir, output_csv=None, verbose=True): fig.tight_layout() plt.subplots_adjust(top=0.88) # smaller than 1.0; reduce this number for more space - safe_kernel_name = str(kernel) + safe_kernel_name = str(kernel).replace(" ", "") out_path = output_dir / f"kernel_{safe_kernel_name}.png" fig.savefig(out_path, dpi=200) From 58cf4fc662ccd59feeac3d5e253ebe30dd27fb16 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Mon, 19 Jan 2026 15:05:55 -0800 Subject: [PATCH 3/5] make subplots --- .../fem_benchmarking/compiler_comparison.py | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/scripts/fem_benchmarking/compiler_comparison.py b/scripts/fem_benchmarking/compiler_comparison.py index 6adcd5ced..c0763f2c9 100644 --- a/scripts/fem_benchmarking/compiler_comparison.py +++ b/scripts/fem_benchmarking/compiler_comparison.py @@ -239,3 +239,111 @@ def collect_kernel_run_data(root_dir, output_csv=None, verbose=True): plt.show() print(f"Vertical bar plots saved in: {output_dir.resolve()}") + + +#Plot subfigures +fig_idx = 1 + +for kernel_chunk in chunk_list(list(kernels), PLOTS_PER_FIG): + fig, axes = plt.subplots(N_ROWS, N_COLS, squeeze=False) + fig.set_size_inches(*PPT_FIGSIZE) + + fig.suptitle( + f"Kernel Performance Comparison - {metric_col}", + fontsize=20, + y=0.97, + fontweight="bold", + ) + + shared_handles = None + shared_labels = None + + for ax_idx, kernel in enumerate(kernel_chunk): + row = ax_idx // N_COLS + col = ax_idx % N_COLS + ax = axes[row][col] + + kernel_df = grouped[grouped["Kernel"] == kernel].copy() + variant_tunings = sorted(kernel_df["VariantTuning"].dropna().unique()) + n_vt = len(variant_tunings) + if n_vt == 0: + ax.set_visible(False) + continue + + x_idx = np.arange(n_vt) + n_folders = len(build_folders) + + total_width = 0.8 + bar_width = total_width / max(n_folders, 1) + cmap = plt.get_cmap("tab10") + + for i, folder in enumerate(build_folders): + sub = kernel_df[kernel_df["BuildFolder"] == folder] + + y_vals = [] + for vt in variant_tunings: + row_sel = sub[sub["VariantTuning"] == vt] + if not row_sel.empty: + y_vals.append(row_sel[metric_col].iloc[0]) + else: + y_vals.append(np.nan) + + offset = (i - n_folders / 2) * bar_width + bar_width / 2 + color = cmap(i % 10) + + ax.bar( + x_idx + offset, + y_vals, + width=bar_width, + label=folder, + color=color, + alpha=0.85, + edgecolor="black", + linewidth=0.3, + ) + + ax.set_xticks(x_idx) + ax.set_xticklabels(variant_tunings, rotation=45, ha="right") + ax.set_ylabel(metric_col) + ax.set_xlabel("Variant | Tuning") + ax.set_title(f"Kernel: {kernel}", pad=14, fontweight="semibold") + ax.grid(axis="y", linestyle="--", alpha=0.35) + ax.set_axisbelow(True) + + ymin, ymax = ax.get_ylim() + ax.set_ylim(bottom=0, top=ymax * 1.08 if ymax > 0 else 1) + + if shared_handles is None or shared_labels is None: + shared_handles, shared_labels = ax.get_legend_handles_labels() + + total_axes = N_ROWS * N_COLS + used_axes = len(kernel_chunk) + if used_axes < total_axes: + for empty_idx in range(used_axes, total_axes): + row = empty_idx // N_COLS + col = empty_idx % N_COLS + axes[row][col].set_visible(False) + + fig.tight_layout(rect=[0.03, 0.12, 0.97, 0.90]) + + if shared_handles and shared_labels: + fig.legend( + shared_handles, + shared_labels, + title="BuildFolder", + loc="lower center", + ncol=min(len(build_folders), 5), + bbox_to_anchor=(0.5, 0.04), + frameon=False, + ) + + out_path = output_dir / f"kernels_4up_page_{fig_idx}.png" + fig.savefig(out_path, dpi=PPT_DPI) + + # show instead of close, so you see it in the notebook + plt.show() + + print(f"Saved slide figure: {out_path}") + fig_idx += 1 + +print(f"All 4-up PPT-ready figures saved in: {output_dir.resolve()}") From 710f3bf06b0de837939d09e24b05a0799dea6d58 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Mon, 19 Jan 2026 17:25:55 -0800 Subject: [PATCH 4/5] scripts for throughput studies --- scripts/fem_benchmarking/throughput.py | 698 +++++++++++++++++++++++++ 1 file changed, 698 insertions(+) create mode 100644 scripts/fem_benchmarking/throughput.py diff --git a/scripts/fem_benchmarking/throughput.py b/scripts/fem_benchmarking/throughput.py new file mode 100644 index 000000000..b1c85aa91 --- /dev/null +++ b/scripts/fem_benchmarking/throughput.py @@ -0,0 +1,698 @@ +import os +import re +from pathlib import Path + +import pandas as pd + + +def find_build_folder(path: Path) -> str: + """ + Given a file path, return the first parent directory whose name starts with 'build_'. + If none is found, return an empty string. + """ + for parent in path.parents: + if parent.name.startswith("build_"): + return parent.name + return "" + + +def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame: + """ + Normalize and deduplicate columns, then merge any duplicate Kernel/Tuning columns + into single columns named 'Kernel' and 'Tuning'. + """ + # Strip leading/trailing whitespace from column names + df = df.copy() + df.columns = [c.strip() for c in df.columns] + + # Merge multiple Kernel columns into one + kernel_cols = [c for c in df.columns if c.lower().startswith("kernel")] + if kernel_cols: + # Create a single 'Kernel' column, taking first non null / non empty value across duplicates + df["Kernel"] = df[kernel_cols].bfill(axis=1).iloc[:, 0] + # Drop all original kernel columns except the unified one + for c in kernel_cols: + if c != "Kernel": + df.drop(columns=c, inplace=True, errors="ignore") + + # Merge multiple Tuning columns into one + tuning_cols = [c for c in df.columns if c.lower().startswith("tuning")] + if tuning_cols: + df["Tuning"] = df[tuning_cols].bfill(axis=1).iloc[:, 0] + for c in tuning_cols: + if c != "Tuning": + df.drop(columns=c, inplace=True, errors="ignore") + + return df + + +def collect_kernel_run_data(root_dir, output_csv=None, verbose=True): + """ + Walk root_dir recursively, find *kernel-run-data.csv files, read and + aggregate them into one DataFrame with: + - first line of each file skipped + - duplicate Kernel/Tuning columns merged into single 'Kernel' and 'Tuning' + - an additional BuildFolder column. + Parameters + ---------- + root_dir : str or Path + Directory to search recursively. + output_csv : str or Path or None, optional + If provided, the combined DataFrame is written to this CSV path. + If None, no file is written. + verbose : bool, optional + If True, prints progress information. + Returns + ------- + pandas.DataFrame + Combined DataFrame of all kernel-run-data.csv files found. + """ + root_dir = Path(root_dir).resolve() + if output_csv is not None: + output_csv = Path(output_csv).resolve() + + all_rows = [] + pattern = re.compile(r"kernel-run-data\.csv$") + + if verbose: + print(f"Scanning under: {root_dir}") + + for dirpath, dirnames, filenames in os.walk(root_dir): + for filename in filenames: + if pattern.search(filename): + file_path = Path(dirpath) / filename + build_folder = find_build_folder(file_path) + + if verbose: + print(f"Found: {file_path}") + if not build_folder: + print(" Warning: no build_ folder found in path, leaving BuildFolder empty") + + try: + # Skip the first line, then parse CSV header and data + df = pd.read_csv( + file_path, + skiprows=1, # skip first non CSV line + skipinitialspace=True + ) + except Exception as e: + if verbose: + print(f" Error reading {file_path}: {e}") + continue + + # Normalize and merge duplicate Kernel/Tuning columns + df = _normalize_columns(df) + + # Add build folder column + df["BuildFolder"] = build_folder + all_rows.append(df) + + if not all_rows: + if verbose: + print("No kernel-run-data.csv files found. Returning empty DataFrame.") + return pd.DataFrame() + + combined = pd.concat(all_rows, ignore_index=True) + + if output_csv is not None: + combined.to_csv(output_csv, index=False) + if verbose: + print(f"Written combined CSV to: {output_csv}") + + return combined + +# Search a specific directory and also save to a merged CSV +df = collect_kernel_run_data( + ".", + output_csv="kernel-run-data-merged.csv", + verbose=True, +) + +%matplotlib inline + +from pathlib import Path +from typing import Optional, Union + +import pandas as pd +import matplotlib.pyplot as plt + + +# ====================== STYLE FOR POWERPOINT ====================== + +def set_ppt_style(): + """ + Configure matplotlib defaults for PowerPoint ready figures. + """ + plt.style.use("default") # clean base + + plt.rcParams.update({ + "figure.figsize": (8, 5), # works well on slides + "figure.dpi": 150, # higher DPI for clarity + "savefig.dpi": 300, # high quality export + "axes.titlesize": 16, + "axes.labelsize": 14, + "xtick.labelsize": 12, + "ytick.labelsize": 12, + "legend.fontsize": 11, + "lines.linewidth": 2.0, + "lines.markersize": 6, + "axes.grid": True, + "grid.linestyle": "--", + "grid.alpha": 0.3, + "figure.facecolor": "white", + "axes.facecolor": "white", + "font.family": "sans-serif", + "font.sans-serif": ["Arial", "DejaVu Sans", "Liberation Sans"], + }) + + +# ====================== NORMALIZATION HELPERS ====================== + +def _merge_prefixed_columns(df: pd.DataFrame, prefix: str, unified_name: str) -> None: + cols = [c for c in df.columns if c.lower().startswith(prefix.lower())] + if not cols: + return + + df[cols] = df[cols].replace("", pd.NA) + df[unified_name] = df[cols].bfill(axis=1).iloc[:, 0] + + for c in cols: + if c != unified_name: + df.drop(columns=c, inplace=True, errors="ignore") + + +def normalize_kernel_variant_tuning(df: pd.DataFrame) -> pd.DataFrame: + df = df.copy() + df.columns = [c.strip() for c in df.columns] + + _merge_prefixed_columns(df, "kernel", "Kernel") + _merge_prefixed_columns(df, "variant", "Variant") + _merge_prefixed_columns(df, "tuning", "Tuning") + + return df + + +# ====================== TRANSFORM CSV ====================== + +def transform_and_save( + input_csv: Union[str, Path], + output_csv: Union[str, Path], + build_folder_col: str = "BuildFolder", +) -> pd.DataFrame: + df = pd.read_csv(input_csv) + print("Original columns:", df.columns.tolist()) + + df = normalize_kernel_variant_tuning(df) + + # Create merged "Variant+Tuning" column + df["Variant"] = df.get("Variant", "").fillna("").astype(str).str.strip() + df["Tuning"] = df.get("Tuning", "").fillna("").astype(str).str.strip() + df["Variant+Tuning"] = (df["Variant"] + " - " + df["Tuning"]).str.strip(" -") + + # Keep columns up to and including build_folder_col, plus Variant+Tuning + if build_folder_col not in df.columns: + raise KeyError(f"'{build_folder_col}' column not found. Columns are: {df.columns.tolist()}") + + cols = df.columns.tolist() + build_idx = cols.index(build_folder_col) + keep_cols = cols[:build_idx + 1] + ["Variant+Tuning"] + + df_result = df[keep_cols].copy() + print("Transformed columns:", df_result.columns.tolist()) + + output_csv = Path(output_csv) + df_result.to_csv(output_csv, index=False) + print(f"Saved transformed CSV to: {output_csv.resolve()}") + + return df_result + + +# ====================== THROUGHPUT ====================== + +def add_throughput( + df: pd.DataFrame, + size_col: str = "Problem size", + time_col: str = "Mean time per rep (sec.)", +) -> pd.DataFrame: + df = df.copy() + + if size_col not in df.columns: + raise KeyError(f"Required column '{size_col}' not found.") + if time_col not in df.columns: + raise KeyError(f"Required column '{time_col}' not found.") + + df[size_col] = pd.to_numeric(df[size_col], errors="coerce") + df[time_col] = pd.to_numeric(df[time_col], errors="coerce") + + df["Throughput"] = df[size_col] / df[time_col] + return df + + +# ====================== PLOTTING (POWERPOINT FRIENDLY) ====================== + +def plot_throughput_by_kernel_variant_tuning_ppt( + df: pd.DataFrame, + output_dir: Union[str, Path] = ".", + base_title: Optional[str] = "Throughput vs Problem size", + size_col: str = "Problem size", + time_col: str = "Mean time per rep (sec.)", + log_x: bool = False, + log_y: bool = False, + aggregate_duplicates: bool = True, +) -> None: + set_ppt_style() + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + df = add_throughput(df, size_col=size_col, time_col=time_col) + + if "Kernel" not in df.columns: + raise KeyError("'Kernel' column not found.") + if "Variant+Tuning" not in df.columns: + raise KeyError("'Variant+Tuning' column not found.") + + if aggregate_duplicates: + group_cols = ["Kernel", "Variant+Tuning", size_col] + agg_dict = {"Throughput": "mean"} + if time_col in df.columns: + agg_dict[time_col] = "mean" + + df_plot = df.groupby(group_cols, as_index=False).agg(agg_dict) + else: + df_plot = df.copy() + + df_plot = df_plot.dropna(subset=["Kernel", size_col, "Throughput"]) + df_plot = df_plot.sort_values(by=["Kernel", "Variant+Tuning", size_col]) + + if df_plot.empty: + print("No data to plot after filtering.") + return + + kernels = df_plot["Kernel"].unique() + print("Kernels found:", kernels) + + # A color palette that works well on slides + color_cycle = plt.cm.tab10.colors + + for k_idx, kernel in enumerate(kernels): + df_kernel = df_plot[df_plot["Kernel"] == kernel] + if df_kernel.empty: + continue + + fig, ax = plt.subplots() + + for s_idx, (vt_label, sub) in enumerate(df_kernel.groupby("Variant+Tuning")): + if sub.empty: + continue + color = color_cycle[s_idx % len(color_cycle)] + ax.plot( + sub[size_col], + sub["Throughput"], + marker="o", + linestyle="-", + label=str(vt_label), + color=color, + ) + + ax.set_xlabel(size_col) + ax.set_ylabel("Throughput (DoFs per second)") + + # Use a figure level title, positioned higher so there is space + title = base_title or "Throughput vs Problem size" + fig.suptitle( + f"{title} - Kernel: {kernel}", + y=0.98, # push the main title up (0.98 is near the top) + fontsize=16, + ) + + if log_x: + ax.set_xscale("log") + if log_y: + ax.set_yscale("log") + + # Legend outside plot to avoid clutter on slides + ax.legend( + title="Variant+Tuning", + loc="center left", + bbox_to_anchor=(1.02, 0.5), + borderaxespad=0.0, + ) + + # Let tight_layout pack the axes, then keep some room for suptitle + fig.tight_layout() + # Reserve a bit of space at the top so suptitle is not clipped + fig.subplots_adjust(top=0.90) + + # Save high resolution PNG per kernel + safe_kernel = str(kernel).replace(" ", "") + out_path = output_dir / f"ppt_plot_{safe_kernel}.png" + fig.savefig(out_path, bbox_inches="tight") + print(f"Saved PowerPoint ready plot: {out_path}") + + plt.show() + + +# ====================== RUN THE WORKFLOW ====================== + +INPUT_CSV = "kernel-run-data-merged.csv" +OUTPUT_CSV = "kernel-run-data-merged.csv" # overwrite; change if you want a separate file + +try: + # 1) Transform CSV (keep up to BuildFolder + Variant+Tuning) + df_transformed = transform_and_save(INPUT_CSV, OUTPUT_CSV, build_folder_col="BuildFolder") + + # 2) Plot with PPT ready styling + plot_throughput_by_kernel_variant_tuning_ppt( + df_transformed, + output_dir="ppt_plots", # folder where PNGs are stored + base_title="Kernel throughput comparison", + size_col="Problem size", + time_col="Mean time per rep (sec.)", + log_x=False, + log_y=False, + aggregate_duplicates=True, + ) + +except Exception as e: + print("Error:", e) + + +#subfigures +%matplotlib inline + +from pathlib import Path +from typing import Optional, Union + +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +# ====================== STYLE FOR POWERPOINT ====================== + +def set_ppt_style(): + """ + Configure matplotlib defaults for PowerPoint ready figures. + """ + plt.style.use("default") + + plt.rcParams.update({ + "figure.figsize": (10, 4.5), + "figure.dpi": 150, + "savefig.dpi": 300, + "axes.titlesize": 16, + "axes.labelsize": 14, + "xtick.labelsize": 12, + "ytick.labelsize": 12, + "legend.fontsize": 11, + "lines.linewidth": 2.0, + "lines.markersize": 6, + "axes.grid": True, + "grid.linestyle": "--", + "grid.alpha": 0.3, + "figure.facecolor": "white", + "axes.facecolor": "white", + "font.family": "sans-serif", + "font.sans-serif": ["Arial", "DejaVu Sans", "Liberation Sans"], + }) + + +# ====================== NORMALIZATION HELPERS ====================== + +def _merge_prefixed_columns(df: pd.DataFrame, prefix: str, unified_name: str) -> None: + cols = [c for c in df.columns if c.lower().startswith(prefix.lower())] + if not cols: + return + + df[cols] = df[cols].replace("", pd.NA) + df[unified_name] = df[cols].bfill(axis=1).iloc[:, 0] + + for c in cols: + if c != unified_name: + df.drop(columns=c, inplace=True, errors="ignore") + + +def normalize_kernel_variant_tuning(df: pd.DataFrame) -> pd.DataFrame: + df = df.copy() + df.columns = [c.strip() for c in df.columns] + + _merge_prefixed_columns(df, "kernel", "Kernel") + _merge_prefixed_columns(df, "variant", "Variant") + _merge_prefixed_columns(df, "tuning", "Tuning") + + return df + + +# ====================== TRANSFORM CSV ====================== + +def transform_and_save( + input_csv: Union[str, Path], + output_csv: Union[str, Path], + build_folder_col: str = "BuildFolder", +) -> pd.DataFrame: + df = pd.read_csv(input_csv) + print("Original columns:", df.columns.tolist()) + + df = normalize_kernel_variant_tuning(df) + + df["Variant"] = df.get("Variant", "").fillna("").astype(str).str.strip() + df["Tuning"] = df.get("Tuning", "").fillna("").astype(str).str.strip() + df["Variant+Tuning"] = (df["Variant"] + " - " + df["Tuning"]).str.strip(" -") + + if build_folder_col not in df.columns: + raise KeyError(f"'{build_folder_col}' column not found. Columns are: {df.columns.tolist()}") + + cols = df.columns.tolist() + build_idx = cols.index(build_folder_col) + keep_cols = cols[:build_idx + 1] + ["Variant+Tuning"] + + df_result = df[keep_cols].copy() + print("Transformed columns:", df_result.columns.tolist()) + + output_csv = Path(output_csv) + df_result.to_csv(output_csv, index=False) + print(f"Saved transformed CSV to: {output_csv.resolve()}") + + return df_result + + +# ====================== THROUGHPUT ====================== + +def add_throughput( + df: pd.DataFrame, + size_col: str = "Problem size", + time_col: str = "Mean time per rep (sec.)", +) -> pd.DataFrame: + df = df.copy() + + if size_col not in df.columns: + raise KeyError(f"Required column '{size_col}' not found.") + if time_col not in df.columns: + raise KeyError(f"Required column '{time_col}' not found.") + + df[size_col] = pd.to_numeric(df[size_col], errors="coerce") + df[time_col] = pd.to_numeric(df[time_col], errors="coerce") + + df["Throughput"] = df[size_col] / df[time_col] + return df + + +# ====================== PLOTTING (POWERPOINT FRIENDLY) ====================== + +def plot_throughput_by_kernel_variant_tuning_ppt( + df: pd.DataFrame, + output_dir: Union[str, Path] = ".", + base_title: Optional[str] = "Throughput vs Problem size", + size_col: str = "Problem size", + time_col: str = "Mean time per rep (sec.)", + log_x: bool = False, + log_y: bool = False, + aggregate_duplicates: bool = True, +) -> None: + """ + Plot throughput vs problem size. + + - First N-2 kernels: 1 row x 2 columns, two kernels per figure. + - Last two kernels: each gets its own separate figure (1 subplot). + - Subplots have titles "Kernel: ..."; no figure level titles. + - Legend is outside the plot, bottom right. + """ + set_ppt_style() + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + df = add_throughput(df, size_col=size_col, time_col=time_col) + + if "Kernel" not in df.columns: + raise KeyError("'Kernel' column not found.") + if "Variant+Tuning" not in df.columns: + raise KeyError("'Variant+Tuning' column not found.") + + if aggregate_duplicates: + group_cols = ["Kernel", "Variant+Tuning", size_col] + agg_dict = {"Throughput": "mean"} + if time_col in df.columns: + agg_dict[time_col] = "mean" + + df_plot = df.groupby(group_cols, as_index=False).agg(agg_dict) + else: + df_plot = df.copy() + + df_plot = df_plot.dropna(subset=["Kernel", size_col, "Throughput"]) + df_plot = df_plot.sort_values(by=["Kernel", "Variant+Tuning", size_col]) + + if df_plot.empty: + print("No data to plot after filtering.") + return + + kernels = df_plot["Kernel"].unique().tolist() + print("Kernels found:", kernels) + + color_cycle = plt.cm.tab10.colors + n_kernels = len(kernels) + + if n_kernels == 0: + return + + if n_kernels > 2: + all_but_last_two = kernels[:-2] + last_two = kernels[-2:] + elif n_kernels == 2: + all_but_last_two = [] + last_two = kernels + else: + all_but_last_two = [] + last_two = kernels + + def plot_kernel_list_to_figure(kernel_list, figure_suffix: str): + """ + Plot the given kernels in a single figure (1 row x up to 2 columns). + Legend is outside bottom right. + """ + if not kernel_list: + return + + n_subplots = len(kernel_list) + ncols = min(2, n_subplots) + nrows = int(np.ceil(n_subplots / 2)) + + fig, axes = plt.subplots(nrows=nrows, ncols=ncols, sharex=False, sharey=False) + + if isinstance(axes, np.ndarray): + axes = list(np.ravel(axes)) + else: + axes = [axes] + + used_axes = 0 + + for k_idx, kernel in enumerate(kernel_list): + ax = axes[k_idx] + used_axes += 1 + + df_kernel = df_plot[df_plot["Kernel"] == kernel] + if df_kernel.empty: + continue + + for s_idx, (vt_label, sub) in enumerate(df_kernel.groupby("Variant+Tuning")): + if sub.empty: + continue + color = color_cycle[s_idx % len(color_cycle)] + ax.plot( + sub[size_col], + sub["Throughput"], + marker="o", + linestyle="-", + label=str(vt_label), + color=color, + ) + + ax.set_xlabel(size_col) + ax.set_ylabel("Throughput (DoFs per second)") + ax.set_title( + f"Kernel: {kernel}", + fontsize=14, + pad=15, + ) + + if log_x: + ax.set_xscale("log") + if log_y: + ax.set_yscale("log") + + ax.grid(True) + + for ax in axes[used_axes:]: + fig.delaxes(ax) + + # Collect legend entries from all used axes + all_handles = [] + all_labels = [] + for ax in axes[:used_axes]: + h, l = ax.get_legend_handles_labels() + all_handles.extend(h) + all_labels.extend(l) + + seen = set() + uniq_handles = [] + uniq_labels = [] + for h, l in zip(all_handles, all_labels): + if l not in seen: + seen.add(l) + uniq_handles.append(h) + uniq_labels.append(l) + + fig.tight_layout() + + if uniq_handles: + fig.legend( + uniq_handles, + uniq_labels, + title="Variant+Tuning", + loc="lower right", + bbox_to_anchor=(1.0, -0.02), # outside bottom right + borderaxespad=0.0, + ncol=2, # adjust columns if you have many entries + ) + + # Give space below for the legend + fig.subplots_adjust(bottom=0.25) + + safe_kernels = "_".join(str(k).replace(" ", "") for k in kernel_list) + out_path = output_dir / f"ppt_plot_{figure_suffix}_{safe_kernels}.png" + fig.savefig(out_path, bbox_inches="tight") + print(f"Saved PowerPoint ready plot: {out_path}") + + plt.show() + + # First N-2 kernels in 1x2 blocks + for i in range(0, len(all_but_last_two), 2): + chunk = all_but_last_two[i:i + 2] + if chunk: + plot_kernel_list_to_figure(chunk, figure_suffix="block") + + # Last two kernels, each its own figure + for kernel in last_two: + plot_kernel_list_to_figure([kernel], figure_suffix="single") + + +# ====================== RUN THE WORKFLOW ====================== + +INPUT_CSV = "kernel-run-data-merged.csv" +OUTPUT_CSV = "kernel-run-data-merged.csv" + +try: + df_transformed = transform_and_save(INPUT_CSV, OUTPUT_CSV, build_folder_col="BuildFolder") + + plot_throughput_by_kernel_variant_tuning_ppt( + df_transformed, + output_dir="ppt_plots", + base_title="Kernel throughput comparison", + size_col="Problem size", + time_col="Mean time per rep (sec.)", + log_x=False, + log_y=False, + aggregate_duplicates=True, + ) + +except Exception as e: + print("Error:", e) From 51a78e99e8a82d82e06c3165e39c9ff16ba76347 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Mon, 19 Jan 2026 18:47:35 -0800 Subject: [PATCH 5/5] FEM kernel benchmarking --- scripts/fem_benchmarking/run_kernels.sh | 34 +++++++++++++++++ src/apps/MASSVEC3DPA-Hip.cpp | 51 ++++++++++++++++++------- src/apps/MASSVEC3DPA.hpp | 2 +- 3 files changed, 73 insertions(+), 14 deletions(-) create mode 100644 scripts/fem_benchmarking/run_kernels.sh diff --git a/scripts/fem_benchmarking/run_kernels.sh b/scripts/fem_benchmarking/run_kernels.sh new file mode 100644 index 000000000..0ae23631a --- /dev/null +++ b/scripts/fem_benchmarking/run_kernels.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +## Run all benchmark kernels for GPU, non-lambda variants only +## on 4 MPI ranks and dump the results in the specified directory. +rm -rf RPBenchmarkTestMPI +flux alloc -xN1 -t 20 bash -c ' + +OUTDIR=RPBenchmarkTestMPI + +# Collection of problem size factors between 0.5 and 6 +FACTORS=(0.5 1.0 2.0 3.0 4.0 5.0 6.0) + +#FACTORS=(4.0) + +# List of kernels to run +KERNELS=("MASS3DEA" "DIFFUSION3DPA" "MASS3DPA_ATOMIC" "MASSVEC3DPA" "CONVECTION3DPA" "MASS3DPA") + +for KERNEL_NAME in "${KERNELS[@]}"; do + echo "Running kernel: $KERNEL_NAME" + + for factor in "${FACTORS[@]}"; do + echo " Running with sizefact = $factor" + flux run -xN1 -n4 ./bin/raja-perf.exe \ + -k "$KERNEL_NAME" \ + --npasses 1 \ + --npasses-combiners Average Minimum Maximum \ + --outdir ${OUTDIR} \ + --outfile "${KERNEL_NAME}_factor_${factor}" \ + --sizefact "$factor" \ + --warmup-perfrun-same \ + -ev Seq Lambda + done +done +' diff --git a/src/apps/MASSVEC3DPA-Hip.cpp b/src/apps/MASSVEC3DPA-Hip.cpp index 2440b44a2..b0d473e52 100644 --- a/src/apps/MASSVEC3DPA-Hip.cpp +++ b/src/apps/MASSVEC3DPA-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) Lawrence Livermore National Security, LLC and other +// Copyright (c) Lawrence Livermore National Security, LLC and other // RAJA Project Developers. See top-level LICENSE and COPYRIGHT // files for dates and other details. No copyright assignment is required // to contribute to RAJA Performance Suite. @@ -251,7 +251,7 @@ void MassVec3DPA_DIRECT(const Real_ptr B, } // (c) dimension loop } -template +template void MASSVEC3DPA::runRAJAImpl(RESOURCE &res) { @@ -269,7 +269,7 @@ void MASSVEC3DPA::runRAJAImpl(RESOURCE &res) res, RAJA::LaunchParams(RAJA::Teams(NE), RAJA::Threads(mvpa::Q1D, mvpa::Q1D, mvpa::Q1D)), - [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { + [=] RAJA_HOST_DEVICE(CONTEXT ctx) { RAJA::loop(ctx, RAJA::RangeSegment(0, NE), [&](Index_type e) { @@ -520,6 +520,27 @@ void MASSVEC3DPA::runHipVariantImpl(VariantID vid) if constexpr (tune_idx == 1) { + using inner_x = RAJA::LoopPolicy; + + using inner_y = RAJA::LoopPolicy; + + using inner_z = RAJA::LoopPolicy; + + //LaunchContextDim3Policy + using launch_context = RAJA::LaunchContextT; + + startTimer(); + // Loop counter increment uses macro to quiet C++20 compiler warning + for (RepIndex_type irep = 0; irep < run_reps; RP_REPCOUNTINC(irep)) { + + runRAJAImpl(res); + + } // loop over kernel reps + stopTimer(); + } + + if constexpr (tune_idx == 2) { + using inner_x = RAJA::LoopPolicy>; using inner_y = RAJA::LoopPolicy>; @@ -530,13 +551,13 @@ void MASSVEC3DPA::runHipVariantImpl(VariantID vid) // Loop counter increment uses macro to quiet C++20 compiler warning for (RepIndex_type irep = 0; irep < run_reps; RP_REPCOUNTINC(irep)) { - runRAJAImpl(res); + runRAJAImpl(res); } // loop over kernel reps stopTimer(); } - if constexpr (tune_idx == 2) { + if constexpr (tune_idx == 3) { using inner_x = RAJA::LoopPolicy; @@ -579,14 +600,14 @@ void MASSVEC3DPA::defineHipVariantTunings() if (vid == Base_HIP) { - addVariantTuning<&MASSVEC3DPA::runHipVariantImpl>( - vid, "BLOCKDIM_LOOP_INC_"+std::to_string(block_size)); + addVariantTuning<&MASSVEC3DPA::runHipVariantImpl>( + vid, "BLOCKDIM_LOOP_INC_"+std::to_string(block_size)); - addVariantTuning<&MASSVEC3DPA::runHipVariantImpl>( - vid, "ARGUMENT_LOOP_INC_"+std::to_string(block_size)); + //addVariantTuning<&MASSVEC3DPA::runHipVariantImpl>( + //vid, "ARGUMENT_LOOP_INC_"+std::to_string(block_size)); - addVariantTuning<&MASSVEC3DPA::runHipVariantImpl>( - vid, "COMPILE_LOOP_INC_"+std::to_string(block_size)); + // addVariantTuning<&MASSVEC3DPA::runHipVariantImpl>( + //vid, "COMPILE_LOOP_INC_"+std::to_string(block_size)); addVariantTuning<&MASSVEC3DPA::runHipVariantImpl>( vid, "DIRECT_"+std::to_string(block_size)); @@ -598,10 +619,14 @@ void MASSVEC3DPA::defineHipVariantTunings() addVariantTuning<&MASSVEC3DPA::runHipVariantImpl>( vid, "BLOCKDIM_LOOP_INC_"+std::to_string(block_size)); + addVariantTuning<&MASSVEC3DPA::runHipVariantImpl>( - vid, "COMPILE_LOOP_INC_"+std::to_string(block_size)); + vid, "CACHE_BLOCK_DIM_"+std::to_string(block_size)); - addVariantTuning<&MASSVEC3DPA::runHipVariantImpl>( + // addVariantTuning<&MASSVEC3DPA::runHipVariantImpl>( + //vid, "COMPILE_LOOP_INC_"+std::to_string(block_size)); + + addVariantTuning<&MASSVEC3DPA::runHipVariantImpl>( vid, "DIRECT_"+std::to_string(block_size)); } diff --git a/src/apps/MASSVEC3DPA.hpp b/src/apps/MASSVEC3DPA.hpp index 52f6018f7..158ac05f4 100644 --- a/src/apps/MASSVEC3DPA.hpp +++ b/src/apps/MASSVEC3DPA.hpp @@ -276,7 +276,7 @@ class MASSVEC3DPA : public KernelBase { template void runSyclVariantImpl(VariantID vid); template + typename CONTEXT=RAJA::LaunchContext, typename RESOURCE> void runRAJAImpl(RESOURCE &res); private: