[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
Lightning-AI · Mar 20, 2024 · f9161a4 · f9161a4
1 parent 9330f57
commit f9161a4
Showing 1 changed file with 103 additions and 59 deletions.
diff --git a/thunder/benchmarks/test_benchmark_litgpt.py b/thunder/benchmarks/test_benchmark_litgpt.py
@@ -1,4 +1,4 @@
-'''
+"""
 Script to run all lit-GPT models available as a parametrized test using abseil's unittest framework.
 Runs a parametrized product over all configs specified, compiler options, distributed modes etc.
 Uses environment variables to modify default behavior
@@ -8,7 +8,7 @@
                     between each test.
 BENCHMARK_OUT_FORMAT - use this env variable to control the format in which the results are presented.
                     Uses 'xlsx' by default. More format support to come soon.
-'''
+"""
 
 import torch
 from absl.testing import parameterized
@@ -19,21 +19,18 @@
 import pandas as pd
 from datetime import datetime
 
+
 class Runner:
-    '''
+    """
     Benchmark Runner class to
         a) Launch the training benchmarking run,
         b) Store results from all tests,
         c) Compile results as xlsx file
-    '''
-
-    def __init__(self,
-                 benchmark_file,
-                 mid_benchmark_out,
-                 output_format):
+    """
 
+    def __init__(self, benchmark_file, mid_benchmark_out, output_format):
         self.dataframe_data = []
-        self.json_file_path = '/tmp/benchmark_litgpt_data.json'
+        self.json_file_path = "/tmp/benchmark_litgpt_data.json"
         self.benchmark_file = benchmark_file
         self.mid_benchmark_out = mid_benchmark_out
         self.output_format = output_format
@@ -43,36 +40,61 @@ def __enter__(self):
 
     def add_to_dataframe(self):
         if self.perf_metrics_dict:
-            if 'tokens_per_sec_per_gpu' not in self.perf_metrics_dict.keys(): #In case of OutofMemory error, this is already marked 'OOM'
-                self.perf_metrics_dict['tokens_per_sec_per_gpu'] = self.perf_metrics_dict['tokens_per_sec'] / self.perf_metrics_dict['Num GPUS']
+            if (
+                "tokens_per_sec_per_gpu" not in self.perf_metrics_dict.keys()
+            ):  # In case of OutofMemory error, this is already marked 'OOM'
+                self.perf_metrics_dict["tokens_per_sec_per_gpu"] = (
+                    self.perf_metrics_dict["tokens_per_sec"] / self.perf_metrics_dict["Num GPUS"]
+                )
             self.dataframe_data.append(self.perf_metrics_dict)
 
     def complete_dataframe(self, is_teardown):
-        #Called when tearing down the parametrized test
-        #This generates a summarized dataframe for each perf metric and saves as a xlsx file
+        # Called when tearing down the parametrized test
+        # This generates a summarized dataframe for each perf metric and saves as a xlsx file
         df = pd.DataFrame(self.dataframe_data)
-        df['Sharding Size'] = df['Sharding Size'].fillna('none') #Convert None Type to string so that pivot table can group.
-        index_list = ['model_name', 'Num GPUS', 'Seq Len', 'Micro BS', 'Global BS', 'GA', 'Distributed Mode', 'Sharding Size']
+        df["Sharding Size"] = df["Sharding Size"].fillna(
+            "none"
+        )  # Convert None Type to string so that pivot table can group.
+        index_list = [
+            "model_name",
+            "Num GPUS",
+            "Seq Len",
+            "Micro BS",
+            "Global BS",
+            "GA",
+            "Distributed Mode",
+            "Sharding Size",
+        ]
 
-        self.iter_time_df                = df.pivot_table(index=index_list, columns='compiler', values='average_iter_time', aggfunc='first').reset_index()
-        self.tokens_per_sec_df           = df.pivot_table(index=index_list, columns='compiler', values='tokens_per_sec', aggfunc='first').reset_index()
-        self.tokens_per_sec_per_gpu_df   = df.pivot_table(index=index_list, columns='compiler', values='tokens_per_sec_per_gpu', aggfunc='first').reset_index()
-        self.memory_used_GB_df           = df.pivot_table(index=index_list, columns='compiler', values='memory_used_GB', aggfunc='first').reset_index()
+        self.iter_time_df = df.pivot_table(
+            index=index_list, columns="compiler", values="average_iter_time", aggfunc="first"
+        ).reset_index()
+        self.tokens_per_sec_df = df.pivot_table(
+            index=index_list, columns="compiler", values="tokens_per_sec", aggfunc="first"
+        ).reset_index()
+        self.tokens_per_sec_per_gpu_df = df.pivot_table(
+            index=index_list, columns="compiler", values="tokens_per_sec_per_gpu", aggfunc="first"
+        ).reset_index()
+        self.memory_used_GB_df = df.pivot_table(
+            index=index_list, columns="compiler", values="memory_used_GB", aggfunc="first"
+        ).reset_index()
 
-        if self.output_format not in ('none', 'print'):
-            output_ext = {'xlsx': '.xlsx', }[self.output_format]
+        if self.output_format not in ("none", "print"):
+            output_ext = {
+                "xlsx": ".xlsx",
+            }[self.output_format]
             if not is_teardown:
-                filename = 'mid_output_parameterized_results' + str(output_ext)
+                filename = "mid_output_parameterized_results" + str(output_ext)
             else:
-                current_time = datetime.now().strftime('%Y-%m-%d_%H-%M')
+                current_time = datetime.now().strftime("%Y-%m-%d_%H-%M")
                 filename = f"{current_time}_litgpt_benchmark" + str(output_ext)
 
-            with pd.ExcelWriter(filename, engine='xlsxwriter') as writer:
-                self.iter_time_df.to_excel(writer, sheet_name='Average Iter Time (ms)')
-                self.tokens_per_sec_df.to_excel(writer, sheet_name='Tokens per sec')
-                self.tokens_per_sec_per_gpu_df.to_excel(writer, sheet_name='Tokens per sec per GPU')
-                self.memory_used_GB_df.to_excel(writer, sheet_name='Memory allocated GB')
-        elif self.output_format == 'print':
+            with pd.ExcelWriter(filename, engine="xlsxwriter") as writer:
+                self.iter_time_df.to_excel(writer, sheet_name="Average Iter Time (ms)")
+                self.tokens_per_sec_df.to_excel(writer, sheet_name="Tokens per sec")
+                self.tokens_per_sec_per_gpu_df.to_excel(writer, sheet_name="Tokens per sec per GPU")
+                self.memory_used_GB_df.to_excel(writer, sheet_name="Memory allocated GB")
+        elif self.output_format == "print":
             print("\nAVERAGE ITERATION TIME (ms)")
             print(self.iter_time_df)
             print("\nTHROUGHPUT (tokens/s)")
@@ -87,12 +109,24 @@ def run_benchmark(self, kwargs):
         command_list = []
         for key, val in kwargs.items():
             command_list.append("--" + str(key) + "=" + str(val))
-        if kwargs['distributed_mode'] != 'none':
+        if kwargs["distributed_mode"] != "none":
             nproc_per_node = torch.cuda.device_count()
-            subprocess_cmd = ["torchrun", f"--nproc_per_node={nproc_per_node}", "--nnodes=1", "{}".format(self.benchmark_file), "--return_metrics_as_json=True", "--json_path={}".format(self.json_file_path)]
+            subprocess_cmd = [
+                "torchrun",
+                f"--nproc_per_node={nproc_per_node}",
+                "--nnodes=1",
+                f"{self.benchmark_file}",
+                "--return_metrics_as_json=True",
+                f"--json_path={self.json_file_path}",
+            ]
             subprocess_cmd.extend(command_list)
         else:
-            subprocess_cmd = ["python", "{}".format(self.benchmark_file), "--return_metrics_as_json=True", "--json_path={}".format(self.json_file_path)]
+            subprocess_cmd = [
+                "python",
+                f"{self.benchmark_file}",
+                "--return_metrics_as_json=True",
+                f"--json_path={self.json_file_path}",
+            ]
             subprocess_cmd.extend(command_list)
 
         print(f'Running {" ".join(subprocess_cmd)!r}')
@@ -102,18 +136,18 @@ def run_benchmark(self, kwargs):
             print(proc_output.stderr)
             proc_output.check_returncode()
 
-        with open(self.json_file_path, 'r') as file:
+        with open(self.json_file_path) as file:
             self.perf_metrics_dict = json.load(file)
-        os.remove(self.json_file_path) #cleanup after test finishes
-
-        if self.perf_metrics_dict['average_iter_time'] is None:
-            if 'CUDA out of memory' in proc_output.stdout:
-                self.perf_metrics_dict['average_iter_time'] = 'OOM'
-                self.perf_metrics_dict['model_flops'] = 'OOM'
-                self.perf_metrics_dict['model_flop_per_sec'] = 'OOM'
-                self.perf_metrics_dict['tokens_per_sec'] = 'OOM'
-                self.perf_metrics_dict['tokens_per_sec_per_gpu'] = 'OOM'
-                self.perf_metrics_dict['memory_used_GB'] = 'OOM'
+        os.remove(self.json_file_path)  # cleanup after test finishes
+
+        if self.perf_metrics_dict["average_iter_time"] is None:
+            if "CUDA out of memory" in proc_output.stdout:
+                self.perf_metrics_dict["average_iter_time"] = "OOM"
+                self.perf_metrics_dict["model_flops"] = "OOM"
+                self.perf_metrics_dict["model_flop_per_sec"] = "OOM"
+                self.perf_metrics_dict["tokens_per_sec"] = "OOM"
+                self.perf_metrics_dict["tokens_per_sec_per_gpu"] = "OOM"
+                self.perf_metrics_dict["memory_used_GB"] = "OOM"
                 pass_str = "TestCase did not finish reporting metrics due to CUDA out of memory error. Reporting OOM and triggering test success."
                 return True, pass_str
             else:
@@ -126,26 +160,28 @@ def run_benchmark(self, kwargs):
 
 
 class Test(parameterized.TestCase):
-
     @classmethod
     def setUpClass(cls):
-        super(Test, cls).setUpClass()
+        super().setUpClass()
 
         def get_installed_thunder_path():
             import thunder
+
             thunder_init = thunder.__file__
-            thunder_benchmark_file = str(thunder_init).replace('__init__.py', 'benchmarks/benchmark_litgpt.py')
+            thunder_benchmark_file = str(thunder_init).replace("__init__.py", "benchmarks/benchmark_litgpt.py")
             return thunder_benchmark_file
 
         benchmark_file = os.getenv("BENCHMARK_FILE", get_installed_thunder_path())
         mid_benchmark_out = bool(os.getenv("MID_BENCHMARK_OUT", 0))
-        output_format = str(os.getenv("BENCHMARK_OUT_FORMAT", "xlsx")) # Can take none, print, xlsx as of 03/12
-        cls.runner = Runner(benchmark_file=benchmark_file, mid_benchmark_out=mid_benchmark_out, output_format=output_format)
+        output_format = str(os.getenv("BENCHMARK_OUT_FORMAT", "xlsx"))  # Can take none, print, xlsx as of 03/12
+        cls.runner = Runner(
+            benchmark_file=benchmark_file, mid_benchmark_out=mid_benchmark_out, output_format=output_format
+        )
 
     @classmethod
     def tearDownClass(cls):
         cls.runner.complete_dataframe(is_teardown=True)
-        super(Test, cls).tearDownClass()
+        super().tearDownClass()
 
     # @parameterized.product(
     #     (dict(distributed_mode = "fsdp", shard_mode = "zero2"),
@@ -186,16 +222,23 @@ def tearDownClass(cls):
     # )
 
     @parameterized.product(
-        distributed_mode = ("fsdp", ),
-        shard_mode = ("zero2", ),
-        model_name = ("Llama-2-7b-hf", ),
-        micro_batch_size = (1, 4, ),
-        compile = ("eager", "inductor", "thunder", "thunder_inductor",)
+        distributed_mode=("fsdp",),
+        shard_mode=("zero2",),
+        model_name=("Llama-2-7b-hf",),
+        micro_batch_size=(
+            1,
+            4,
+        ),
+        compile=(
+            "eager",
+            "inductor",
+            "thunder",
+            "thunder_inductor",
+        ),
     )
-
     def test(self, **kwargs):
-        kwargs['nsys_enabled'] = False
-        kwargs['dynamic'] = False
+        kwargs["nsys_enabled"] = False
+        kwargs["dynamic"] = False
         self.__file__ = __file__
 
         try:
@@ -212,5 +255,6 @@ def test(self, **kwargs):
         else:
             self.fail(run_msg)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     absltest.main()