Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Mar 20, 2024
1 parent 9330f57 commit f9161a4
Showing 1 changed file with 103 additions and 59 deletions.
162 changes: 103 additions & 59 deletions thunder/benchmarks/test_benchmark_litgpt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
'''
"""
Script to run all lit-GPT models available as a parametrized test using abseil's unittest framework.
Runs a parametrized product over all configs specified, compiler options, distributed modes etc.
Uses environment variables to modify default behavior
Expand All @@ -8,7 +8,7 @@
between each test.
BENCHMARK_OUT_FORMAT - use this env variable to control the format in which the results are presented.
Uses 'xlsx' by default. More format support to come soon.
'''
"""

import torch
from absl.testing import parameterized
Expand All @@ -19,21 +19,18 @@
import pandas as pd
from datetime import datetime


class Runner:
'''
"""
Benchmark Runner class to
a) Launch the training benchmarking run,
b) Store results from all tests,
c) Compile results as xlsx file
'''

def __init__(self,
benchmark_file,
mid_benchmark_out,
output_format):
"""

def __init__(self, benchmark_file, mid_benchmark_out, output_format):
self.dataframe_data = []
self.json_file_path = '/tmp/benchmark_litgpt_data.json'
self.json_file_path = "/tmp/benchmark_litgpt_data.json"
self.benchmark_file = benchmark_file
self.mid_benchmark_out = mid_benchmark_out
self.output_format = output_format
Expand All @@ -43,36 +40,61 @@ def __enter__(self):

def add_to_dataframe(self):
if self.perf_metrics_dict:
if 'tokens_per_sec_per_gpu' not in self.perf_metrics_dict.keys(): #In case of OutofMemory error, this is already marked 'OOM'
self.perf_metrics_dict['tokens_per_sec_per_gpu'] = self.perf_metrics_dict['tokens_per_sec'] / self.perf_metrics_dict['Num GPUS']
if (
"tokens_per_sec_per_gpu" not in self.perf_metrics_dict.keys()
): # In case of OutofMemory error, this is already marked 'OOM'
self.perf_metrics_dict["tokens_per_sec_per_gpu"] = (
self.perf_metrics_dict["tokens_per_sec"] / self.perf_metrics_dict["Num GPUS"]
)
self.dataframe_data.append(self.perf_metrics_dict)

def complete_dataframe(self, is_teardown):
#Called when tearing down the parametrized test
#This generates a summarized dataframe for each perf metric and saves as a xlsx file
# Called when tearing down the parametrized test
# This generates a summarized dataframe for each perf metric and saves as a xlsx file
df = pd.DataFrame(self.dataframe_data)
df['Sharding Size'] = df['Sharding Size'].fillna('none') #Convert None Type to string so that pivot table can group.
index_list = ['model_name', 'Num GPUS', 'Seq Len', 'Micro BS', 'Global BS', 'GA', 'Distributed Mode', 'Sharding Size']
df["Sharding Size"] = df["Sharding Size"].fillna(
"none"
) # Convert None Type to string so that pivot table can group.
index_list = [
"model_name",
"Num GPUS",
"Seq Len",
"Micro BS",
"Global BS",
"GA",
"Distributed Mode",
"Sharding Size",
]

self.iter_time_df = df.pivot_table(index=index_list, columns='compiler', values='average_iter_time', aggfunc='first').reset_index()
self.tokens_per_sec_df = df.pivot_table(index=index_list, columns='compiler', values='tokens_per_sec', aggfunc='first').reset_index()
self.tokens_per_sec_per_gpu_df = df.pivot_table(index=index_list, columns='compiler', values='tokens_per_sec_per_gpu', aggfunc='first').reset_index()
self.memory_used_GB_df = df.pivot_table(index=index_list, columns='compiler', values='memory_used_GB', aggfunc='first').reset_index()
self.iter_time_df = df.pivot_table(
index=index_list, columns="compiler", values="average_iter_time", aggfunc="first"
).reset_index()
self.tokens_per_sec_df = df.pivot_table(
index=index_list, columns="compiler", values="tokens_per_sec", aggfunc="first"
).reset_index()
self.tokens_per_sec_per_gpu_df = df.pivot_table(
index=index_list, columns="compiler", values="tokens_per_sec_per_gpu", aggfunc="first"
).reset_index()
self.memory_used_GB_df = df.pivot_table(
index=index_list, columns="compiler", values="memory_used_GB", aggfunc="first"
).reset_index()

if self.output_format not in ('none', 'print'):
output_ext = {'xlsx': '.xlsx', }[self.output_format]
if self.output_format not in ("none", "print"):
output_ext = {
"xlsx": ".xlsx",
}[self.output_format]
if not is_teardown:
filename = 'mid_output_parameterized_results' + str(output_ext)
filename = "mid_output_parameterized_results" + str(output_ext)
else:
current_time = datetime.now().strftime('%Y-%m-%d_%H-%M')
current_time = datetime.now().strftime("%Y-%m-%d_%H-%M")
filename = f"{current_time}_litgpt_benchmark" + str(output_ext)

with pd.ExcelWriter(filename, engine='xlsxwriter') as writer:
self.iter_time_df.to_excel(writer, sheet_name='Average Iter Time (ms)')
self.tokens_per_sec_df.to_excel(writer, sheet_name='Tokens per sec')
self.tokens_per_sec_per_gpu_df.to_excel(writer, sheet_name='Tokens per sec per GPU')
self.memory_used_GB_df.to_excel(writer, sheet_name='Memory allocated GB')
elif self.output_format == 'print':
with pd.ExcelWriter(filename, engine="xlsxwriter") as writer:
self.iter_time_df.to_excel(writer, sheet_name="Average Iter Time (ms)")
self.tokens_per_sec_df.to_excel(writer, sheet_name="Tokens per sec")
self.tokens_per_sec_per_gpu_df.to_excel(writer, sheet_name="Tokens per sec per GPU")
self.memory_used_GB_df.to_excel(writer, sheet_name="Memory allocated GB")
elif self.output_format == "print":
print("\nAVERAGE ITERATION TIME (ms)")
print(self.iter_time_df)
print("\nTHROUGHPUT (tokens/s)")
Expand All @@ -87,12 +109,24 @@ def run_benchmark(self, kwargs):
command_list = []
for key, val in kwargs.items():
command_list.append("--" + str(key) + "=" + str(val))
if kwargs['distributed_mode'] != 'none':
if kwargs["distributed_mode"] != "none":
nproc_per_node = torch.cuda.device_count()
subprocess_cmd = ["torchrun", f"--nproc_per_node={nproc_per_node}", "--nnodes=1", "{}".format(self.benchmark_file), "--return_metrics_as_json=True", "--json_path={}".format(self.json_file_path)]
subprocess_cmd = [
"torchrun",
f"--nproc_per_node={nproc_per_node}",
"--nnodes=1",
f"{self.benchmark_file}",
"--return_metrics_as_json=True",
f"--json_path={self.json_file_path}",
]
subprocess_cmd.extend(command_list)
else:
subprocess_cmd = ["python", "{}".format(self.benchmark_file), "--return_metrics_as_json=True", "--json_path={}".format(self.json_file_path)]
subprocess_cmd = [
"python",
f"{self.benchmark_file}",
"--return_metrics_as_json=True",
f"--json_path={self.json_file_path}",
]
subprocess_cmd.extend(command_list)

print(f'Running {" ".join(subprocess_cmd)!r}')
Expand All @@ -102,18 +136,18 @@ def run_benchmark(self, kwargs):
print(proc_output.stderr)
proc_output.check_returncode()

with open(self.json_file_path, 'r') as file:
with open(self.json_file_path) as file:
self.perf_metrics_dict = json.load(file)
os.remove(self.json_file_path) #cleanup after test finishes

if self.perf_metrics_dict['average_iter_time'] is None:
if 'CUDA out of memory' in proc_output.stdout:
self.perf_metrics_dict['average_iter_time'] = 'OOM'
self.perf_metrics_dict['model_flops'] = 'OOM'
self.perf_metrics_dict['model_flop_per_sec'] = 'OOM'
self.perf_metrics_dict['tokens_per_sec'] = 'OOM'
self.perf_metrics_dict['tokens_per_sec_per_gpu'] = 'OOM'
self.perf_metrics_dict['memory_used_GB'] = 'OOM'
os.remove(self.json_file_path) # cleanup after test finishes

if self.perf_metrics_dict["average_iter_time"] is None:
if "CUDA out of memory" in proc_output.stdout:
self.perf_metrics_dict["average_iter_time"] = "OOM"
self.perf_metrics_dict["model_flops"] = "OOM"
self.perf_metrics_dict["model_flop_per_sec"] = "OOM"
self.perf_metrics_dict["tokens_per_sec"] = "OOM"
self.perf_metrics_dict["tokens_per_sec_per_gpu"] = "OOM"
self.perf_metrics_dict["memory_used_GB"] = "OOM"
pass_str = "TestCase did not finish reporting metrics due to CUDA out of memory error. Reporting OOM and triggering test success."
return True, pass_str
else:
Expand All @@ -126,26 +160,28 @@ def run_benchmark(self, kwargs):


class Test(parameterized.TestCase):

@classmethod
def setUpClass(cls):
super(Test, cls).setUpClass()
super().setUpClass()

def get_installed_thunder_path():
import thunder

thunder_init = thunder.__file__
thunder_benchmark_file = str(thunder_init).replace('__init__.py', 'benchmarks/benchmark_litgpt.py')
thunder_benchmark_file = str(thunder_init).replace("__init__.py", "benchmarks/benchmark_litgpt.py")
return thunder_benchmark_file

benchmark_file = os.getenv("BENCHMARK_FILE", get_installed_thunder_path())
mid_benchmark_out = bool(os.getenv("MID_BENCHMARK_OUT", 0))
output_format = str(os.getenv("BENCHMARK_OUT_FORMAT", "xlsx")) # Can take none, print, xlsx as of 03/12
cls.runner = Runner(benchmark_file=benchmark_file, mid_benchmark_out=mid_benchmark_out, output_format=output_format)
output_format = str(os.getenv("BENCHMARK_OUT_FORMAT", "xlsx")) # Can take none, print, xlsx as of 03/12
cls.runner = Runner(
benchmark_file=benchmark_file, mid_benchmark_out=mid_benchmark_out, output_format=output_format
)

@classmethod
def tearDownClass(cls):
cls.runner.complete_dataframe(is_teardown=True)
super(Test, cls).tearDownClass()
super().tearDownClass()

# @parameterized.product(
# (dict(distributed_mode = "fsdp", shard_mode = "zero2"),
Expand Down Expand Up @@ -186,16 +222,23 @@ def tearDownClass(cls):
# )

@parameterized.product(
distributed_mode = ("fsdp", ),
shard_mode = ("zero2", ),
model_name = ("Llama-2-7b-hf", ),
micro_batch_size = (1, 4, ),
compile = ("eager", "inductor", "thunder", "thunder_inductor",)
distributed_mode=("fsdp",),
shard_mode=("zero2",),
model_name=("Llama-2-7b-hf",),
micro_batch_size=(
1,
4,
),
compile=(
"eager",
"inductor",
"thunder",
"thunder_inductor",
),
)

def test(self, **kwargs):
kwargs['nsys_enabled'] = False
kwargs['dynamic'] = False
kwargs["nsys_enabled"] = False
kwargs["dynamic"] = False
self.__file__ = __file__

try:
Expand All @@ -212,5 +255,6 @@ def test(self, **kwargs):
else:
self.fail(run_msg)

if __name__ == '__main__':

if __name__ == "__main__":
absltest.main()

0 comments on commit f9161a4

Please sign in to comment.