Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Oct 21, 2024
1 parent 8ed7d8a commit eedd8a6
Showing 1 changed file with 24 additions and 27 deletions.
51 changes: 24 additions & 27 deletions thunder/tests/test_mistral_nemo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import transformers
import datasets


@thunder.tests.framework.requiresCUDA
def test_thunderfx_mistral_nemo_small():
"""
Expand All @@ -14,9 +15,9 @@ def test_thunderfx_mistral_nemo_small():
model_id = "mistralai/Mistral-Nemo-Base-2407"

tokenizer = transformers.AutoTokenizer.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
ignore_mismatched_sizes=True,
model_id,
torch_dtype=torch.bfloat16,
ignore_mismatched_sizes=True,
)

# Setup a "small" version of NeMo-Mistral that does not require downloading
Expand All @@ -27,17 +28,17 @@ def test_thunderfx_mistral_nemo_small():
# transformers.AutoConfig.from_pretrained(model_id)
# until they lined up.
config = transformers.models.mistral.configuration_mistral.MistralConfig(
num_hidden_layers = 2,
torch_dtype = torch.bfloat16,
max_position_embeddings=1024,
architectures = ["MistralForCausalLM"],
hidden_size = 5120,
rms_norm_eps=1e-05,
rope_theta=1000000.0,
sliding_window = None,
vocab_size = 131072,
head_dim = 128,
_name_or_path = model_id,
num_hidden_layers=2,
torch_dtype=torch.bfloat16,
max_position_embeddings=1024,
architectures=["MistralForCausalLM"],
hidden_size=5120,
rms_norm_eps=1e-05,
rope_theta=1000000.0,
sliding_window=None,
vocab_size=131072,
head_dim=128,
_name_or_path=model_id,
)
model = transformers.AutoModelForCausalLM.from_config(config)
device = torch.device("cuda")
Expand All @@ -47,24 +48,20 @@ def test_thunderfx_mistral_nemo_small():

# Add a padding token to the tokenizer
if tokenizer.pad_token is None:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
mdl.resize_token_embeddings(len(tokenizer))

dataset = datasets.load_dataset("tiny_shakespeare", split='train',
trust_remote_code=False)
dataset = datasets.load_dataset("tiny_shakespeare", split="train", trust_remote_code=False)

def tokenize_function(examples):
return tokenizer(examples["text"], padding="max_length",
truncation=True, max_length=2)
tokenized_dataset = dataset.map(tokenize_function, batched=True,
remove_columns=["text"])
return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=2)

tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["text"])

# Convert the dataset to PyTorch format and specify columns to return as tensors
tokenized_dataset.set_format(type='torch',
columns=['input_ids', 'attention_mask'])
tokenized_dataset.set_format(type="torch", columns=["input_ids", "attention_mask"])

dataloader = torch.utils.data.DataLoader(tokenized_dataset, batch_size=1,
shuffle=True)
dataloader = torch.utils.data.DataLoader(tokenized_dataset, batch_size=1, shuffle=True)

# Define optimizer and learning rate scheduler
optimizer = torch.optim.AdamW(mdl.parameters(), lr=5e-5)
Expand All @@ -77,8 +74,8 @@ def tokenize_function(examples):
)

# Move model to GPU if available
#device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
#model.to(device)
# device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# model.to(device)

mdl.train()
for epoch in range(num_epochs):
Expand Down

0 comments on commit eedd8a6

Please sign in to comment.