From 7f0d1e83325d02119486420972ddc611e1f3de43 Mon Sep 17 00:00:00 2001 From: Ean Garvey Date: Fri, 20 Sep 2024 15:02:42 -0500 Subject: [PATCH] Add benchmarking minimally, comment out a few more models --- .../custom_models/torchbench/README.md | 6 +++ .../custom_models/torchbench/cmd_opts.py | 10 +++++ .../custom_models/torchbench/export.py | 44 +++++++++++++++---- 3 files changed, 52 insertions(+), 8 deletions(-) diff --git a/models/turbine_models/custom_models/torchbench/README.md b/models/turbine_models/custom_models/torchbench/README.md index 5de88d37e..ae351f1bb 100644 --- a/models/turbine_models/custom_models/torchbench/README.md +++ b/models/turbine_models/custom_models/torchbench/README.md @@ -1,5 +1,11 @@ # SHARK torchbench exports and benchmarks +## Overview + +This directory serves as a place for scripts and utilities to run a suite of benchmarked inference tasks, showing functionality and performance parity between SHARK/IREE and native torch.compile workflows. It is currently under development and benchmark numbers should not be treated as the best possible result with the current state of IREE compiler optimizations. + +Eventually, we want this process to be a plug-in to the upstream torchbench process, and this will be accomplished by exposing the IREE methodology shown here as a compile/runtime backend for the torch benchmark classes. For now, it is set up for developers as a way to get preliminary results and achieve blanket functionality for the models listed in export.py. + ### Setup - pip install torch+rocm packages: diff --git a/models/turbine_models/custom_models/torchbench/cmd_opts.py b/models/turbine_models/custom_models/torchbench/cmd_opts.py index c217bcf4b..b80f6bb7b 100644 --- a/models/turbine_models/custom_models/torchbench/cmd_opts.py +++ b/models/turbine_models/custom_models/torchbench/cmd_opts.py @@ -72,6 +72,16 @@ def is_valid_file(arg): choices=["safetensors", "irpa", "gguf", None], help="Externalizes model weights from the torch dialect IR and its successors", ) +p.add_argument( + "--run_benchmark", + type=bool, + default=True, +) +p.add_argument( + "--output_csv", + type=str, + default="./benchmark_results.csv", +) ############################################################################## # Modeling and Export Options diff --git a/models/turbine_models/custom_models/torchbench/export.py b/models/turbine_models/custom_models/torchbench/export.py index 1e7ac0a24..729f928c4 100644 --- a/models/turbine_models/custom_models/torchbench/export.py +++ b/models/turbine_models/custom_models/torchbench/export.py @@ -9,6 +9,7 @@ import gc from iree.compiler.ir import Context +from iree import runtime as ireert import numpy as np from shark_turbine.aot import * from shark_turbine.dynamo.passes import ( @@ -21,10 +22,12 @@ from safetensors import safe_open import argparse from turbine_models.turbine_tank import turbine_tank +from turbine_models.model_runner import vmfbRunner from pytorch.benchmarks.dynamo.common import parse_args from pytorch.benchmarks.dynamo.torchbench import TorchBenchmarkRunner, setup_torchbench_cwd +import csv torchbench_models_dict = { # "BERT_pytorch": { # "dim": 128, @@ -84,7 +87,7 @@ "resnet50": { "dim": 128, }, - "resnet50_32x4d": { + "resnext50_32x4d": { "dim": 128, }, "shufflenet_v2_x1_0": { @@ -93,9 +96,9 @@ "squeezenet1_1": { "dim": 512, }, - "timm_nfnet": { - "dim": 256, - }, + # "timm_nfnet": { + # "dim": 256, + # }, "timm_efficientnet": { "dim": 128, }, @@ -163,9 +166,13 @@ def export_torchbench_model( model_id, f"_{static_dim}_{precision}", ) + safe_name = os.path.join("generated", safe_name) if decomp_attn: safe_name += "_decomp_attn" + if not os.path.exists("generated"): + os.mkdir("generated") + if input_mlir: vmfb_path = utils.compile_to_vmfb( input_mlir, @@ -179,6 +186,7 @@ def export_torchbench_model( ) return vmfb_path + _, model_name, model, forward_args, _ = get_model_and_inputs(model_id, batch_size, tb_dir, tb_args) if dtype == torch.float16: @@ -188,7 +196,8 @@ def export_torchbench_model( if not isinstance(forward_args, dict): forward_args = [i.type(dtype) for i in forward_args] for idx, i in enumerate(forward_args): - np.save(f"{model_id}_input{idx}", i.clone().detach().cpu()) + np.save( + os.path.join("generated", f"{model_id}_input{idx}"), i.clone().detach().cpu()) else: for idx, i in enumerate(forward_args.values()): np.save(f"{model_id}_input{idx}", i.clone().detach().cpu()) @@ -199,7 +208,8 @@ def export_torchbench_model( if not os.path.exists(external_weights_dir): os.mkdir(external_weights_dir) external_weight_path = os.path.join(external_weights_dir, f"{model_id}_{precision}.irpa") - + else: + external_weight_path = None decomp_list = [torch.ops.aten.reflection_pad2d] if decomp_attn == True: @@ -265,11 +275,26 @@ class CompiledTorchbenchModel(CompiledModule): return_path=not exit_on_vmfb, attn_spec=attn_spec, ) - return vmfb_path + return vmfb_path, external_weight_path, forward_args + +def run_benchmark(device, vmfb_path, weights_path, example_args, model_id, csv_path): + if "rocm" in device: + device = "hip" + device.split("rocm")[-1] + mod_runner = vmfbRunner(device, vmfb_path, weights_path) + inputs = [ireert.asdevicearray(mod_runner.config.device, i) for i in example_args] + start = time.time() + results = runner.ctx.modules.compiled_torchbench_model["main"](*inputs) + latency = time.time() - start + with open(csv_path, "a") as csvfile: + fieldnames = ["model", "latency"] + data = [{"model": model_id, "latency": latency}] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writerows(data) + def run_main(model_id, args, tb_dir, tb_args): print(f"exporting {model_id}") - mod_str = export_torchbench_model( + mod_str, weights_path, example_args = export_torchbench_model( model_id, tb_dir, tb_args, @@ -293,6 +318,9 @@ def run_main(model_id, args, tb_dir, tb_args): with open(f"{safe_name}.mlir", "w+") as f: f.write(mod_str) print("Saved to", safe_name + ".mlir") + elif args.run_benchmark: + run_benchmark(args.device, mod_str, weights_path, example_args, model_id, args.output_csv) + gc.collect() if __name__ == "__main__":