From eedd8a6ad87ee683bc1c7fd5000d3f32f86fb1c1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2024 23:02:33 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- thunder/tests/test_mistral_nemo.py | 51 ++++++++++++++---------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/thunder/tests/test_mistral_nemo.py b/thunder/tests/test_mistral_nemo.py index 84e1a2779e..c00d7fae64 100644 --- a/thunder/tests/test_mistral_nemo.py +++ b/thunder/tests/test_mistral_nemo.py @@ -4,6 +4,7 @@ import transformers import datasets + @thunder.tests.framework.requiresCUDA def test_thunderfx_mistral_nemo_small(): """ @@ -14,9 +15,9 @@ def test_thunderfx_mistral_nemo_small(): model_id = "mistralai/Mistral-Nemo-Base-2407" tokenizer = transformers.AutoTokenizer.from_pretrained( - model_id, - torch_dtype=torch.bfloat16, - ignore_mismatched_sizes=True, + model_id, + torch_dtype=torch.bfloat16, + ignore_mismatched_sizes=True, ) # Setup a "small" version of NeMo-Mistral that does not require downloading @@ -27,17 +28,17 @@ def test_thunderfx_mistral_nemo_small(): # transformers.AutoConfig.from_pretrained(model_id) # until they lined up. config = transformers.models.mistral.configuration_mistral.MistralConfig( - num_hidden_layers = 2, - torch_dtype = torch.bfloat16, - max_position_embeddings=1024, - architectures = ["MistralForCausalLM"], - hidden_size = 5120, - rms_norm_eps=1e-05, - rope_theta=1000000.0, - sliding_window = None, - vocab_size = 131072, - head_dim = 128, - _name_or_path = model_id, + num_hidden_layers=2, + torch_dtype=torch.bfloat16, + max_position_embeddings=1024, + architectures=["MistralForCausalLM"], + hidden_size=5120, + rms_norm_eps=1e-05, + rope_theta=1000000.0, + sliding_window=None, + vocab_size=131072, + head_dim=128, + _name_or_path=model_id, ) model = transformers.AutoModelForCausalLM.from_config(config) device = torch.device("cuda") @@ -47,24 +48,20 @@ def test_thunderfx_mistral_nemo_small(): # Add a padding token to the tokenizer if tokenizer.pad_token is None: - tokenizer.add_special_tokens({'pad_token': '[PAD]'}) + tokenizer.add_special_tokens({"pad_token": "[PAD]"}) mdl.resize_token_embeddings(len(tokenizer)) - dataset = datasets.load_dataset("tiny_shakespeare", split='train', - trust_remote_code=False) + dataset = datasets.load_dataset("tiny_shakespeare", split="train", trust_remote_code=False) def tokenize_function(examples): - return tokenizer(examples["text"], padding="max_length", - truncation=True, max_length=2) - tokenized_dataset = dataset.map(tokenize_function, batched=True, - remove_columns=["text"]) + return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=2) + + tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["text"]) # Convert the dataset to PyTorch format and specify columns to return as tensors - tokenized_dataset.set_format(type='torch', - columns=['input_ids', 'attention_mask']) + tokenized_dataset.set_format(type="torch", columns=["input_ids", "attention_mask"]) - dataloader = torch.utils.data.DataLoader(tokenized_dataset, batch_size=1, - shuffle=True) + dataloader = torch.utils.data.DataLoader(tokenized_dataset, batch_size=1, shuffle=True) # Define optimizer and learning rate scheduler optimizer = torch.optim.AdamW(mdl.parameters(), lr=5e-5) @@ -77,8 +74,8 @@ def tokenize_function(examples): ) # Move model to GPU if available - #device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") - #model.to(device) + # device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") + # model.to(device) mdl.train() for epoch in range(num_epochs):