align model seq len with data seq len (#26)

samsja · web-flow · commit 80d050aa91b4 · 2024-09-30T17:41:22.000-07:00
diff --git a/src/zeroband/models/llama/__init__.py b/src/zeroband/models/llama/__init__.py
@@ -61,7 +61,7 @@
 }
 
 
-def get_model(name_model: str, type_model: str, vocab_size: int) -> tuple[Transformer, ModelArgs]:
+def get_model(name_model: str, type_model: str, vocab_size: int, seq_length: int) -> tuple[Transformer, ModelArgs]:
     """get the transformer model"""
 
     if type_model == "llama2":
@@ -72,4 +72,5 @@ def get_model(name_model: str, type_model: str, vocab_size: int) -> tuple[Transf
         raise ValueError(f"Model type {type_model} not supported")
 
     config.vocab_size = vocab_size
+    config.max_seq_len = seq_length
     return Transformer(config), config
diff --git a/src/zeroband/train.py b/src/zeroband/train.py
@@ -116,6 +116,7 @@ def train(config: Config):
         vocab_size=tokenizer.vocab_size
         if config.name_model != "debugmodel" or not config.data.fake
         else TEST_VOCAB_SIZE,
+        seq_length=config.data.seq_length,
     )
 
     if config.train.log_model_hash:

Original file line number	Diff line number	Diff line change
`@@ -116,6 +116,7 @@ def train(config: Config):`
`116`	`116`	`vocab_size=tokenizer.vocab_size`
`117`	`117`	`if config.name_model != "debugmodel" or not config.data.fake`
`118`	`118`	`else TEST_VOCAB_SIZE,`
	`119`	`+ seq_length=config.data.seq_length,`
`119`	`120`	`)`
`120`	`121`
`121`	`122`	`if config.train.log_model_hash:`