fix for "two devices" issue due to RoPE changes (#630)

casper-hansen · Nov 14, 2024 · 12c91b7 · 12c91b7
1 parent 7954766
commit 12c91b7
Show file tree

Hide file tree

Showing 6 changed files with 6 additions and 0 deletions.
diff --git a/awq/models/cohere.py b/awq/models/cohere.py
@@ -29,6 +29,7 @@ def get_act_for_scaling(module: OldCohereDecoderLayer):
 
     @staticmethod
     def move_embed(model: OldCohereForCausalLM, device: str):
+        model.model.rotary_emb = model.model.rotary_emb.to(device)
         model.model.embed_tokens = model.model.embed_tokens.to(device)
 
     @staticmethod

diff --git a/awq/models/gpt_neox.py b/awq/models/gpt_neox.py
@@ -24,6 +24,7 @@ def get_act_for_scaling(module: GPTNeoXLayer):
 
     @staticmethod
     def move_embed(model: GPTNeoXForCausalLM, device: str):
+        model.gpt_neox.rotary_emb = model.gpt_neox.rotary_emb.to(device)
         model.gpt_neox.embed_in = model.gpt_neox.embed_in.to(device)
 
     @staticmethod

diff --git a/awq/models/llama.py b/awq/models/llama.py
@@ -30,6 +30,7 @@ def get_act_for_scaling(module: OldLlamaDecoderLayer):
 
     @staticmethod
     def move_embed(model: OldLlamaForCausalLM, device: str):
+        model.model.rotary_emb = model.model.rotary_emb.to(device)
         model.model.embed_tokens = model.model.embed_tokens.to(device)
 
     @staticmethod

diff --git a/awq/models/qwen2.py b/awq/models/qwen2.py
@@ -30,6 +30,7 @@ def get_act_for_scaling(module: OldQwen2DecoderLayer):
 
     @staticmethod
     def move_embed(model: OldQwen2ForCausalLM, device: str):
+        model.model.rotary_emb = model.model.rotary_emb.to(device)
         model.model.embed_tokens = model.model.embed_tokens.to(device)
 
     @staticmethod

diff --git a/awq/models/stablelm.py b/awq/models/stablelm.py
@@ -30,6 +30,7 @@ def get_act_for_scaling(module: OldStableLmForCausalLM):
 
     @staticmethod
     def move_embed(model: OldStableLmForCausalLM, device: str):
+        model.model.rotary_emb = model.model.rotary_emb.to(device)
         model.model.embed_tokens = model.model.embed_tokens.to(device)
 
     @staticmethod

diff --git a/awq/models/starcoder2.py b/awq/models/starcoder2.py
@@ -36,6 +36,7 @@ def get_act_for_scaling(module: OldStarcoder2DecoderLayer):
 
     @staticmethod
     def move_embed(model: OldStarcoder2ForCausalLM, device):
+        model.model.rotary_emb = model.model.rotary_emb.to(device)
         model.model.embed_tokens = model.model.embed_tokens.to(device)
 
     @staticmethod