[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
Lightning-AI · Oct 21, 2024 · eedd8a6 · eedd8a6
1 parent 8ed7d8a
commit eedd8a6
Showing 1 changed file with 24 additions and 27 deletions.
diff --git a/thunder/tests/test_mistral_nemo.py b/thunder/tests/test_mistral_nemo.py
@@ -4,6 +4,7 @@
 import transformers
 import datasets
 
+
 @thunder.tests.framework.requiresCUDA
 def test_thunderfx_mistral_nemo_small():
     """
@@ -14,9 +15,9 @@ def test_thunderfx_mistral_nemo_small():
     model_id = "mistralai/Mistral-Nemo-Base-2407"
 
     tokenizer = transformers.AutoTokenizer.from_pretrained(
-      model_id,
-      torch_dtype=torch.bfloat16,
-      ignore_mismatched_sizes=True,
+        model_id,
+        torch_dtype=torch.bfloat16,
+        ignore_mismatched_sizes=True,
     )
 
     # Setup a "small" version of NeMo-Mistral that does not require downloading
@@ -27,17 +28,17 @@ def test_thunderfx_mistral_nemo_small():
     #   transformers.AutoConfig.from_pretrained(model_id)
     # until they lined up.
     config = transformers.models.mistral.configuration_mistral.MistralConfig(
-      num_hidden_layers = 2,
-      torch_dtype = torch.bfloat16,
-      max_position_embeddings=1024,
-      architectures = ["MistralForCausalLM"],
-      hidden_size = 5120,
-      rms_norm_eps=1e-05,
-      rope_theta=1000000.0,
-      sliding_window = None,
-      vocab_size = 131072,
-      head_dim = 128,
-      _name_or_path = model_id,
+        num_hidden_layers=2,
+        torch_dtype=torch.bfloat16,
+        max_position_embeddings=1024,
+        architectures=["MistralForCausalLM"],
+        hidden_size=5120,
+        rms_norm_eps=1e-05,
+        rope_theta=1000000.0,
+        sliding_window=None,
+        vocab_size=131072,
+        head_dim=128,
+        _name_or_path=model_id,
     )
     model = transformers.AutoModelForCausalLM.from_config(config)
     device = torch.device("cuda")
@@ -47,24 +48,20 @@ def test_thunderfx_mistral_nemo_small():
 
     # Add a padding token to the tokenizer
     if tokenizer.pad_token is None:
-        tokenizer.add_special_tokens({'pad_token': '[PAD]'})
+        tokenizer.add_special_tokens({"pad_token": "[PAD]"})
         mdl.resize_token_embeddings(len(tokenizer))
 
-    dataset = datasets.load_dataset("tiny_shakespeare", split='train',
-                                    trust_remote_code=False)
+    dataset = datasets.load_dataset("tiny_shakespeare", split="train", trust_remote_code=False)
 
     def tokenize_function(examples):
-        return tokenizer(examples["text"], padding="max_length",
-                         truncation=True, max_length=2)
-    tokenized_dataset = dataset.map(tokenize_function, batched=True,
-                                    remove_columns=["text"])
+        return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=2)
+
+    tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["text"])
 
     # Convert the dataset to PyTorch format and specify columns to return as tensors
-    tokenized_dataset.set_format(type='torch',
-                                 columns=['input_ids', 'attention_mask'])
+    tokenized_dataset.set_format(type="torch", columns=["input_ids", "attention_mask"])
 
-    dataloader = torch.utils.data.DataLoader(tokenized_dataset, batch_size=1,
-                                             shuffle=True)
+    dataloader = torch.utils.data.DataLoader(tokenized_dataset, batch_size=1, shuffle=True)
 
     # Define optimizer and learning rate scheduler
     optimizer = torch.optim.AdamW(mdl.parameters(), lr=5e-5)
@@ -77,8 +74,8 @@ def tokenize_function(examples):
     )
 
     # Move model to GPU if available
-    #device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
-    #model.to(device)
+    # device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    # model.to(device)
 
     mdl.train()
     for epoch in range(num_epochs):