Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/openvino/models.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ Here is the list of the supported architectures :
- Qwen2VL
- Qwen2.5VL
- Qwen3VL
- Qwen3.5
- Qwen3-Next
- ResNet
- Roberta
Expand Down
10 changes: 10 additions & 0 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,16 @@ def ts_patched_forward(*args, **kwargs):
extension=conversion_extensions,
)

if patch_16bit_model:
# Undo __make_16bit_traceable patching on sub-modules to avoid corrupting
# forward methods of modules shared across export behaviors (e.g. pos_embed
# Embedding in VLMs that is also exported separately as vision_embeddings_pos).
_orig_forward_attr = "_openvino_module_extension_patch_orig_forward"
for module in model.modules():
if hasattr(module, _orig_forward_attr):
module.forward = getattr(module, _orig_forward_attr)
delattr(module, _orig_forward_attr)

ov_model.validate_nodes_and_infer_types() # TODO: remove as unnecessary validation?

output_names = list(config.outputs.keys())
Expand Down
314 changes: 314 additions & 0 deletions optimum/exporters/openvino/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@
Qwen2MoEPatcher,
Qwen2VLLanguageModelPatcher,
Qwen2VLVisionEmbMergerPatcher,
Qwen3_5ModelPatcher,
Qwen3_5MoeModelPatcher,
Qwen3_5VisionEmbMergerPatcher,
Qwen3MoeModelPatcher,
Qwen3NextModelPatcher,
Qwen3VLLanguageModelPatcher,
Expand Down Expand Up @@ -253,6 +256,14 @@ def init_model_configs():
"transformers",
"AutoModelForCausalLM",
)
TasksManager._CUSTOM_CLASSES[("pt", "qwen3_5", "image-text-to-text")] = (
"transformers",
"AutoModelForImageTextToText",
)
TasksManager._CUSTOM_CLASSES[("pt", "qwen3_5_moe", "image-text-to-text")] = (
"transformers",
"AutoModelForImageTextToText",
)

# since transformers v4.46, model can be loaded using default AutoModelForImageTextToText
# https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/models/auto/modeling_auto.py#L776
Expand Down Expand Up @@ -5497,3 +5508,306 @@ def generate_dummy_inputs(self, framework: str = "pt", **kwargs):
)

return dummy_inputs


class Qwen3_5DummyPastKeyValuesGenerator(DummyPastKeyValuesGenerator):
"""
Generates dummy cache_params inputs for Qwen3.5 architectures.
"""

SUPPORTED_INPUT_NAMES = ("cache_params",)

def __init__(
self,
task: str,
normalized_config,
batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"],
sequence_length: int = DEFAULT_DUMMY_SHAPES["sequence_length"],
**kwargs,
):
super().__init__(
task=task,
normalized_config=normalized_config,
batch_size=batch_size,
sequence_length=sequence_length,
**kwargs,
)

config = normalized_config.config
self.num_full_attn_layers = config.layer_types.count("full_attention")
self.num_linear_attn_layers = config.layer_types.count("linear_attention")
self.conv_kernel_size = config.linear_conv_kernel_dim
self.head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
self.head_k_dim = config.linear_key_head_dim
self.head_v_dim = config.linear_value_head_dim
self.num_v_heads = config.linear_num_value_heads
self.num_k_heads = config.linear_num_key_heads
self.num_key_value_heads = config.num_key_value_heads

def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
cache_params = []

for idx in range(self.num_linear_attn_layers):
d_inner = self.num_k_heads * (2 * self.head_k_dim + self.head_v_dim * self.num_v_heads // self.num_k_heads)
conv_state_shape = (
self.batch_size,
d_inner,
self.conv_kernel_size,
)
conv_state = self.random_float_tensor(conv_state_shape, framework=framework, dtype=float_dtype)
cache_params.append(conv_state)
num_heads = self.num_v_heads
recurrent_state_shape = (self.batch_size, num_heads, self.head_k_dim, self.head_v_dim)
recurrent_state = self.random_float_tensor(recurrent_state_shape, framework=framework, dtype=float_dtype)
cache_params.append(recurrent_state)

for idx in range(self.num_full_attn_layers):
kv_shape = (self.batch_size, self.num_key_value_heads, self.sequence_length, self.head_dim)
k = self.random_float_tensor(kv_shape, framework=framework, dtype=float_dtype)
v = self.random_float_tensor(kv_shape, framework=framework, dtype=float_dtype)
cache_params.append(k)
cache_params.append(v)

return cache_params


@register_in_tasks_manager(
"qwen3_5_text",
*["text-generation", "text-generation-with-past"],
library_name="transformers",
)
class Qwen3_5TextOpenVINOConfig(Qwen3VLTextOpenVINOConfig):
DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, Qwen3_5DummyPastKeyValuesGenerator)
DUMMY_PKV_GENERATOR_CLASS = Qwen3_5DummyPastKeyValuesGenerator
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
MIN_TRANSFORMERS_VERSION = "4.57.0"
_MODEL_PATCHER = Qwen3_5ModelPatcher

def add_past_key_values(self, inputs_or_outputs: Dict[str, Dict[int, str]], direction: str):
if direction not in ["inputs", "outputs"]:
raise ValueError(f'direction must either be "inputs" or "outputs", but {direction} was given')

if direction == "inputs":
decoder_sequence_name = "past_sequence_length"
cache_name_prefix = "cache_params.past"
else:
decoder_sequence_name = "past_sequence_length + sequence_length"
cache_name_prefix = "cache_params.present"

self.num_full_attn_layers = self._normalized_config.layer_types.count("full_attention")
self.num_linear_attn_layers = self._normalized_config.layer_types.count("linear_attention")

for i in range(self.num_linear_attn_layers):
inputs_or_outputs[f"{cache_name_prefix}.conv.{i}"] = {0: "batch_size"}
inputs_or_outputs[f"{cache_name_prefix}.ssm.{i}"] = {0: "batch_size"}

for i in range(self.num_full_attn_layers):
inputs_or_outputs[f"{cache_name_prefix}.key.{i}"] = {0: "batch_size", 2: decoder_sequence_name}
inputs_or_outputs[f"{cache_name_prefix}.value.{i}"] = {0: "batch_size", 2: decoder_sequence_name}

@property
def inputs(self) -> Dict[str, Dict[int, str]]:
common_inputs = {
"input_ids": {0: "batch_size", 1: "sequence_length"},
"attention_mask": {0: "batch_size", 1: "sequence_length"},
}
if self.use_past_in_inputs:
self.add_past_key_values(common_inputs, direction="inputs")
return common_inputs

def generate_dummy_inputs(self, framework: str = "pt", **kwargs):
dummy_inputs_generators = self._create_dummy_input_generator_classes(**kwargs)

dummy_inputs = {}
input_names = [key for key in self.inputs.keys() if not key.startswith("cache_params")]
if self.use_past_in_inputs:
input_names.extend(["cache_params"])

for input_name in input_names:
input_was_inserted = False
for dummy_input_gen in dummy_inputs_generators:
if dummy_input_gen.supports_input(input_name):
dummy_inputs[input_name] = self.overwrite_shape_and_generate_input(
dummy_input_gen,
input_name,
framework,
input_shapes=kwargs,
)
input_was_inserted = True
break
if not input_was_inserted:
raise RuntimeError(
f'Could not generate dummy input for "{input_name}". Try adding a proper dummy input generator to the model ONNX config.'
)

return dummy_inputs


@register_in_tasks_manager(
"qwen3_5",
*["image-text-to-text"],
library_name="transformers",
)
class Qwen3_5OpenVINOConfig(Qwen3VLOpenVINOConfig):
SUPPORTED_BEHAVIORS = [model_type.value for model_type in QwenVLConfigBehavior]
DUMMY_INPUT_GENERATOR_CLASSES = (DummyQwen3VLVisionEmbedInputGenerator,)
MIN_TRANSFORMERS_VERSION = "4.57.0"

def __init__(
self,
config: "PretrainedConfig",
task: str = "feature-extraction",
int_dtype: str = "int64",
float_dtype: str = "fp32",
behavior: QwenVLConfigBehavior = QwenVLConfigBehavior.VISION_EMBEDDINGS,
preprocessors: Optional[List[Any]] = None,
):
super().__init__(
config=config,
task=task,
int_dtype=int_dtype,
float_dtype=float_dtype,
preprocessors=preprocessors,
behavior=behavior,
)
if self._behavior == QwenVLConfigBehavior.VISION_EMBEDDINGS_POS and hasattr(config, "vision_config"):
self._config = config.vision_config
self._normalized_config = self.NORMALIZED_CONFIG_CLASS(self._config)
self._normalized_config.use_embed_dim = True

def with_behavior(
self,
behavior: Union[str, QwenVLConfigBehavior],
):
"""
Creates a config for different behaviour.
Args:
behavior ([`ConfigBehavior`]):
The behavior to use for the new instance.
"""
if isinstance(behavior, str) and not isinstance(behavior, QwenVLConfigBehavior):
behavior = QwenVLConfigBehavior(behavior)

if behavior == QwenVLConfigBehavior.TEXT_EMBEDDINGS:
return get_vlm_text_embeddings_config(
"qwen3_5_text", self._orig_config.text_config, self.int_dtype, self.float_dtype
)

if behavior == QwenVLConfigBehavior.LANGUAGE:
return get_vlm_text_generation_config(
"qwen3_5_text",
self._orig_config.text_config,
self.int_dtype,
self.float_dtype,
model_patcher=Qwen3_5ModelPatcher,
#dummy_input_generator=DummyQwen2VLLMInputGenerator,
#inputs_update={"position_ids": {1: "batch_size", 2: "sequence_length"}},
)

if behavior in (
QwenVLConfigBehavior.VISION_EMBEDDINGS,
QwenVLConfigBehavior.VISION_EMBEDDINGS_MERGER,
QwenVLConfigBehavior.VISION_EMBEDDINGS_POS,
):
return self.__class__(
self._orig_config,
task=self.task,
int_dtype=self.int_dtype,
float_dtype=self.float_dtype,
behavior=behavior,
preprocessors=self._preprocessors,
)

def patch_model_for_export(self, model: Union["PreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None):
model_kwargs = model_kwargs or {}
if self._behavior == QwenVLConfigBehavior.VISION_EMBEDDINGS_MERGER:
return Qwen3_5VisionEmbMergerPatcher(self, model, model_kwargs)
if (
self._behavior == QwenVLConfigBehavior.VISION_EMBEDDINGS
or self._behavior == QwenVLConfigBehavior.VISION_EMBEDDINGS_POS
):
return ModelPatcher(self, model, model_kwargs=model_kwargs)
return super().patch_model_for_export(model, model_kwargs)

@property
def inputs(self) -> Dict[str, Dict[int, str]]:
if self._behavior == QwenVLConfigBehavior.VISION_EMBEDDINGS_POS:
return {
"input": {1: "sequence_length"},
}
return super().inputs

@property
def outputs(self) -> Dict[str, Dict[int, str]]:
if self._behavior == QwenVLConfigBehavior.VISION_EMBEDDINGS:
return super().outputs
if self._behavior == QwenVLConfigBehavior.VISION_EMBEDDINGS_MERGER:
return {"last_hidden_state": {0: "seq_len"}}
if self._behavior == QwenVLConfigBehavior.VISION_EMBEDDINGS_POS:
return {"last_hidden_state": {0: "seq_len", 1: "seq_len"}}
if self._behavior == QwenVLConfigBehavior.TEXT_EMBEDDINGS:
return {"inputs_embeds": {0: "batch_size", 1: "sequence_length"}}
if self._behavior == QwenVLConfigBehavior.LANGUAGE:
return get_vlm_internal_text_generation_config(
"qwen3_5_text", self._orig_config.text_config, self.int_dtype, self.float_dtype
).outputs
raise Exception("Unknown Qwen3.5 behavior type.")


@register_in_tasks_manager(
"qwen3_5_moe_text",
*["text-generation", "text-generation-with-past"],
library_name="transformers",
)
class Qwen3_5MoeTextOpenVINOConfig(Qwen3_5TextOpenVINOConfig):
_MODEL_PATCHER = Qwen3_5MoeModelPatcher


@register_in_tasks_manager(
"qwen3_5_moe",
*["image-text-to-text"],
library_name="transformers",
)
class Qwen3_5MoeOpenVINOConfig(Qwen3_5OpenVINOConfig):
def with_behavior(
self,
behavior: Union[str, QwenVLConfigBehavior],
):
if isinstance(behavior, str) and not isinstance(behavior, QwenVLConfigBehavior):
behavior = QwenVLConfigBehavior(behavior)

if behavior == QwenVLConfigBehavior.TEXT_EMBEDDINGS:
return get_vlm_text_embeddings_config(
"qwen3_5_moe_text", self._orig_config.text_config, self.int_dtype, self.float_dtype
)

if behavior == QwenVLConfigBehavior.LANGUAGE:
return get_vlm_text_generation_config(
"qwen3_5_moe_text",
self._orig_config.text_config,
self.int_dtype,
self.float_dtype,
model_patcher=Qwen3_5MoeModelPatcher,
)

if behavior in (
QwenVLConfigBehavior.VISION_EMBEDDINGS,
QwenVLConfigBehavior.VISION_EMBEDDINGS_MERGER,
QwenVLConfigBehavior.VISION_EMBEDDINGS_POS,
):
return self.__class__(
self._orig_config,
task=self.task,
int_dtype=self.int_dtype,
float_dtype=self.float_dtype,
behavior=behavior,
preprocessors=self._preprocessors,
)

@property
def outputs(self) -> Dict[str, Dict[int, str]]:
if self._behavior == QwenVLConfigBehavior.LANGUAGE:
return get_vlm_internal_text_generation_config(
"qwen3_5_moe_text", self._orig_config.text_config, self.int_dtype, self.float_dtype
).outputs
return super().outputs
Loading