From 0187ac1646b791cf7734a36f9ccdccce8ba5b82a Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Thu, 14 Nov 2024 15:05:11 +0800 Subject: [PATCH] Add support for Phi-3-vision series model (#596) Co-authored-by: Casper --- awq/models/__init__.py | 1 + awq/models/auto.py | 1 + awq/models/base.py | 1 + awq/models/phi3_v.py | 72 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 75 insertions(+) create mode 100644 awq/models/phi3_v.py diff --git a/awq/models/__init__.py b/awq/models/__init__.py index 9b3a4f27..7f6ff55a 100644 --- a/awq/models/__init__.py +++ b/awq/models/__init__.py @@ -20,6 +20,7 @@ from .starcoder2 import Starcoder2AWQForCausalLM from .llava_next import LlavaNextAWQForCausalLM from .phi3 import Phi3AWQForCausalLM +from .phi3_v import Phi3VAWQForCausalLM from .cohere import CohereAWQForCausalLM from .deepseek_v2 import DeepseekV2AWQForCausalLM from .minicpm import MiniCPMAWQForCausalLM diff --git a/awq/models/auto.py b/awq/models/auto.py index 495722ab..df67844a 100644 --- a/awq/models/auto.py +++ b/awq/models/auto.py @@ -30,6 +30,7 @@ "starcoder2": Starcoder2AWQForCausalLM, "llava_next": LlavaNextAWQForCausalLM, "phi3": Phi3AWQForCausalLM, + "phi3_v": Phi3VAWQForCausalLM, "cohere": CohereAWQForCausalLM, "deepseek_v2": DeepseekV2AWQForCausalLM, "minicpm": MiniCPMAWQForCausalLM, diff --git a/awq/models/base.py b/awq/models/base.py index abfb9b38..71f45d1d 100644 --- a/awq/models/base.py +++ b/awq/models/base.py @@ -81,6 +81,7 @@ "starcoder2": "AutoModelForCausalLM", "llava_next": "AutoModelForVision2Seq", "phi3": "AutoModelForCausalLM", + "phi3_v": "AutoModelForCausalLM", "cohere": "AutoModelForCausalLM", "deepseek_v2": "AutoModelForCausalLM", "minicpm": "AutoModelForCausalLM", diff --git a/awq/models/phi3_v.py b/awq/models/phi3_v.py new file mode 100644 index 00000000..5b1305c9 --- /dev/null +++ b/awq/models/phi3_v.py @@ -0,0 +1,72 @@ +import tqdm +from typing import List, Tuple +from .base import BaseAWQForCausalLM +from awq.modules.fused.block import Phi3Block +from awq.modules.fused.model import Phi3Model as AWQPhi3Model +from transformers.models.phi3.modeling_phi3 import ( + Phi3DecoderLayer as OldPhi3DecoderLayer +) +from awq.modules.fused.norm import FasterTransformerRMSNorm + + +class Phi3VAWQForCausalLM(BaseAWQForCausalLM): + layer_type = "Phi3DecoderLayer" + max_seq_len_key = "max_position_embeddings" + modules_to_not_convert = ["vision_embed_tokens"] + + @staticmethod + def get_model_layers(model): + return model.model.layers + + @staticmethod + def get_act_for_scaling(module: OldPhi3DecoderLayer): + return dict(is_scalable=False) + + @staticmethod + def move_embed(model, device: str): + model.model.embed_tokens = model.model.embed_tokens.to(device) + + @staticmethod + def get_layers_for_scaling(module: OldPhi3DecoderLayer, input_feat, module_kwargs): + layers = [] + + # attention input + layers.append( + dict( + prev_op=module.input_layernorm, + layers=[module.self_attn.qkv_proj], + inp=input_feat["self_attn.qkv_proj"], + module2inspect=module.self_attn, + kwargs=module_kwargs, + ) + ) + + # attention out + layers.append( + dict( + prev_op=module.self_attn.qkv_proj, + layers=[module.self_attn.o_proj], + inp=input_feat["self_attn.o_proj"], + ) + ) + + # linear 1 + layers.append( + dict( + prev_op=module.post_attention_layernorm, + layers=[module.mlp.gate_up_proj], + inp=input_feat["mlp.gate_up_proj"], + module2inspect=module.mlp, + ) + ) + + # linear 2 + layers.append( + dict( + prev_op=module.mlp.gate_up_proj, + layers=[module.mlp.down_proj], + inp=input_feat["mlp.down_proj"], + ) + ) + + return layers