diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py index 6cdc5a66a0..f2926f63bd 100644 --- a/optimum/commands/export/openvino.py +++ b/optimum/commands/export/openvino.py @@ -227,13 +227,19 @@ def _get_default_int4_config(model_id_or_path, library_name): return _DEFAULT_4BIT_CONFIG - library_name = TasksManager.infer_library_from_model(self.args.model, library_name=self.args.library) - if library_name == "sentence_transformers" and self.args.library is None: - logger.warning( - "Library name is not specified. There are multiple possible variants: `sentence_transformers`, `transformers`." - "`transformers` will be selected. If you want to load your model with the `sentence-transformers` library instead, please set --library sentence_transformers" + if self.args.library is None: + # TODO: add revision, subfolder and token to args + library_name = TasksManager._infer_library_from_model_name_or_path( + model_name_or_path=self.args.model, cache_dir=self.args.cache_dir ) - library_name = "transformers" + if library_name == "sentence_transformers": + logger.warning( + "Library name is not specified. There are multiple possible variants: `sentence_transformers`, `transformers`." + "`transformers` will be selected. If you want to load your model with the `sentence-transformers` library instead, please set --library sentence_transformers" + ) + library_name = "transformers" + else: + library_name = self.args.library if self.args.weight_format is None: ov_config = None diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 757244df55..f2afd6535f 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -47,11 +47,24 @@ logger = logging.getLogger(__name__) -def infer_task(task, model_name_or_path): +def infer_task( + task, + model_name_or_path, + subfolder: str = "", + revision: Optional[str] = None, + cache_dir: str = HUGGINGFACE_HUB_CACHE, + token: Optional[Union[bool, str]] = None, +): task = TasksManager.map_from_synonym(task) if task == "auto": try: - task = TasksManager.infer_task_from_model(model_name_or_path) + task = TasksManager._infer_task_from_model_name_or_path( + model_name_or_path=model_name_or_path, + subfolder=subfolder, + revision=revision, + cache_dir=cache_dir, + token=token, + ) except KeyError as e: raise KeyError( f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" @@ -193,19 +206,27 @@ def main_export( ov_config = OVConfig(quantization_config=q_config) original_task = task - task = infer_task(task, model_name_or_path) - framework = TasksManager.determine_framework(model_name_or_path, subfolder=subfolder, framework=framework) - library_name_is_not_provided = library_name is None - library_name = TasksManager.infer_library_from_model( - model_name_or_path, subfolder=subfolder, library_name=library_name + task = infer_task( + task, model_name_or_path, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token + ) + framework = TasksManager.determine_framework( + model_name_or_path, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token ) - if library_name == "sentence_transformers" and library_name_is_not_provided: - logger.warning( - "Library name is not specified. There are multiple possible variants: `sentence_tenasformers`, `transformers`." - "`transformers` will be selected. If you want to load your model with the `sentence-transformers` library instead, please set --library sentence_transformers" + if library_name is None: + library_name = TasksManager._infer_library_from_model_name_or_path( + model_name_or_path=model_name_or_path, + subfolder=subfolder, + revision=revision, + cache_dir=cache_dir, + token=token, ) - library_name = "transformers" + if library_name == "sentence_transformers": + logger.warning( + "Library name is not specified. There are multiple possible variants: `sentence_tenasformers`, `transformers`." + "`transformers` will be selected. If you want to load your model with the `sentence-transformers` library instead, please set --library sentence_transformers" + ) + library_name = "transformers" do_gptq_patching = False custom_architecture = False @@ -317,9 +338,7 @@ class StoreAttr(object): ) model.config.pad_token_id = pad_token_id - if "stable-diffusion" in task: - model_type = "stable-diffusion" - elif hasattr(model.config, "export_model_type"): + if hasattr(model.config, "export_model_type"): model_type = model.config.export_model_type.replace("_", "-") else: model_type = model.config.model_type.replace("_", "-") diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 3b214f77e4..83c031435e 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -64,7 +64,7 @@ from transformers.modeling_utils import PreTrainedModel if is_diffusers_available(): - from diffusers import ModelMixin + from diffusers import DiffusionPipeline, ModelMixin if is_tf_available(): from transformers.modeling_tf_utils import TFPreTrainedModel @@ -74,7 +74,7 @@ from optimum.intel.openvino.configuration import OVConfig -def _save_model(model, path: str, ov_config: Optional["OVConfig"] = None): +def _save_model(model, path: str, ov_config: Optional["OVConfig"] = None, library_name: Optional[str] = None): compress_to_fp16 = False if ov_config is not None: @@ -90,13 +90,12 @@ def _save_model(model, path: str, ov_config: Optional["OVConfig"] = None): compress_to_fp16 = ov_config.dtype == "fp16" - library_name = TasksManager.infer_library_from_model(Path(path).parent) model = _add_version_info_to_model(model, library_name) save_model(model, path, compress_to_fp16) def export( - model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], + model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin", "DiffusionPipeline"], config: OnnxConfig, output: Path, opset: Optional[int] = None, @@ -139,7 +138,7 @@ def export( ) if "diffusers" in str(model.__class__) and not is_diffusers_available(): - raise ImportError("The pip package `diffusers` is required to export stable diffusion models to ONNX.") + raise ImportError("The package `diffusers` is required to export diffusion models to OpenVINO.") if stateful: # This will be checked anyway after the model conversion, but checking it earlier will save time for a user if not suitable version is used @@ -198,7 +197,19 @@ def export_tensorflow( onnx_path = Path(output).with_suffix(".onnx") input_names, output_names = export_tensorflow_onnx(model, config, opset, onnx_path) ov_model = convert_model(str(onnx_path)) - _save_model(ov_model, output.parent / output, ov_config=ov_config) + + if model.__class__.__module__.startswith("optimum"): + # for wrapped models + library_name = TasksManager._infer_library_from_model_or_model_class(model=model.model) + else: + library_name = TasksManager._infer_library_from_model_or_model_class(model=model) + + _save_model( + ov_model, + output.parent / output, + ov_config=ov_config, + library_name=library_name, + ) return input_names, output_names, True @@ -251,7 +262,19 @@ def export_pytorch_via_onnx( ) torch.onnx.export = orig_torch_onnx_export ov_model = convert_model(str(onnx_output)) - _save_model(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output, ov_config=ov_config) + + if model.__class__.__module__.startswith("optimum"): + # for wrapped models + library_name = TasksManager._infer_library_from_model_or_model_class(model=model.model) + else: + library_name = TasksManager._infer_library_from_model_or_model_class(model=model) + + _save_model( + ov_model, + output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output, + ov_config=ov_config, + library_name=library_name, + ) return input_names, output_names, True @@ -413,7 +436,18 @@ def ts_patched_forward(*args, **kwargs): if stateful: patch_stateful(model.config, ov_model) - _save_model(ov_model, output, ov_config=ov_config) + if model.__module__.startswith("optimum"): + # for wrapped models like timm in optimum.intel.openvino.modeling_timm + library_name = TasksManager._infer_library_from_model_or_model_class(model=model.model) + else: + library_name = TasksManager._infer_library_from_model_or_model_class(model=model) + + _save_model( + ov_model, + output, + ov_config=ov_config, + library_name=library_name, + ) clear_class_registry() del model gc.collect() @@ -422,7 +456,7 @@ def ts_patched_forward(*args, **kwargs): def export_models( models_and_export_configs: Dict[ - str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], "OnnxConfig"] + str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin", "DiffusionPipeline"], "OnnxConfig"] ], output_dir: Path, opset: Optional[int] = None, @@ -491,7 +525,7 @@ def export_models( def export_from_model( - model: Union["PreTrainedModel", "TFPreTrainedModel"], + model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin", "DiffusionPipeline"], output: Union[str, Path], task: Optional[str] = None, ov_config: Optional["OVConfig"] = None, @@ -505,14 +539,15 @@ def export_from_model( trust_remote_code: bool = False, **kwargs_shapes, ): + model_kwargs = model_kwargs or {} + if ov_config is not None and ov_config.quantization_config and not is_nncf_available(): raise ImportError( f"Compression of the weights to {ov_config.quantization_config} requires nncf, please install it with `pip install nncf`" ) - model_kwargs = model_kwargs or {} - library_name = TasksManager._infer_library_from_model(model) - TasksManager.standardize_model_attributes(model, library_name) + library_name = TasksManager._infer_library_from_model_or_model_class(model=model) + TasksManager.standardize_model_attributes(model) if hasattr(model.config, "export_model_type"): model_type = model.config.export_model_type.replace("_", "-") @@ -521,7 +556,7 @@ def export_from_model( custom_architecture = library_name == "transformers" and model_type not in TasksManager._SUPPORTED_MODEL_TYPE - if task is not None: + if task is not None and task != "auto": task = TasksManager.map_from_synonym(task) else: try: diff --git a/optimum/intel/openvino/utils.py b/optimum/intel/openvino/utils.py index 69a750fb65..e256e19520 100644 --- a/optimum/intel/openvino/utils.py +++ b/optimum/intel/openvino/utils.py @@ -18,11 +18,12 @@ import os from glob import glob from pathlib import Path -from typing import Tuple, Union +from typing import Tuple, Type, Union import numpy as np from huggingface_hub import model_info -from openvino.runtime import Core, Type, properties +from openvino.runtime import Core, properties +from openvino.runtime import Type as OVType from transformers import AutoTokenizer, CLIPTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast from transformers.onnx.utils import ParameterFormat, compute_serialized_parameters_size @@ -70,19 +71,19 @@ STR_TO_OV_TYPE = { - "boolean": Type.boolean, - "f16": Type.f16, - "f32": Type.f32, - "f64": Type.f64, - "i8": Type.i8, - "i16": Type.i16, - "i32": Type.i32, - "i64": Type.i64, - "u8": Type.u8, - "u16": Type.u16, - "u32": Type.u32, - "u64": Type.u64, - "bf16": Type.bf16, + "boolean": OVType.boolean, + "f16": OVType.f16, + "f32": OVType.f32, + "f64": OVType.f64, + "i8": OVType.i8, + "i16": OVType.i16, + "i32": OVType.i32, + "i64": OVType.i64, + "u8": OVType.u8, + "u16": OVType.u16, + "u32": OVType.u32, + "u64": OVType.u64, + "bf16": OVType.bf16, } @@ -110,7 +111,7 @@ } -NEED_CONVERT_TO_FAST_TOKENIZER: Tuple[type(PreTrainedTokenizer)] = (CLIPTokenizer,) +NEED_CONVERT_TO_FAST_TOKENIZER: Tuple[Type[PreTrainedTokenizer]] = (CLIPTokenizer,) def maybe_convert_tokenizer_to_fast( diff --git a/setup.py b/setup.py index 110df03015..6665389309 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ INSTALL_REQUIRE = [ "torch>=1.11", "transformers>=4.36.0,<4.43.0", - "optimum>=1.21.2,<1.22.0", + "optimum@git+https://github.com/huggingface/optimum.git", "datasets>=1.4.0", "sentencepiece", "setuptools", diff --git a/tests/openvino/test_stable_diffusion.py b/tests/openvino/test_diffusion.py similarity index 100% rename from tests/openvino/test_stable_diffusion.py rename to tests/openvino/test_diffusion.py diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index 07e2672e68..6a8005ad67 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -61,9 +61,9 @@ class OVCLIExportTestCase(unittest.TestCase): ("audio-classification", "wav2vec2"), ("fill-mask", "bert"), ("feature-extraction", "blenderbot"), - ("stable-diffusion", "stable-diffusion"), - ("stable-diffusion-xl", "stable-diffusion-xl"), - ("stable-diffusion-xl", "stable-diffusion-xl-refiner"), + ("text-to-image", "stable-diffusion"), + ("text-to-image", "stable-diffusion-xl"), + ("image-to-image", "stable-diffusion-xl-refiner"), ) EXPECTED_NUMBER_OF_TOKENIZER_MODELS = { "gpt2": 2, @@ -139,7 +139,11 @@ def test_exporters_cli(self, task: str, model_type: str): check=True, ) model_kwargs = {"use_cache": task.endswith("with-past")} if "generation" in task else {} - eval(_HEAD_TO_AUTOMODELS[task.replace("-with-past", "")]).from_pretrained(tmpdir, **model_kwargs) + eval( + _HEAD_TO_AUTOMODELS[task.replace("-with-past", "")] + if task.replace("-with-past", "") in _HEAD_TO_AUTOMODELS + else _HEAD_TO_AUTOMODELS[model_type.replace("-refiner", "")] + ).from_pretrained(tmpdir, **model_kwargs) @parameterized.expand( arch @@ -176,7 +180,11 @@ def test_exporters_cli_fp16(self, task: str, model_type: str): check=True, ) model_kwargs = {"use_cache": task.endswith("with-past")} if "generation" in task else {} - eval(_HEAD_TO_AUTOMODELS[task.replace("-with-past", "")]).from_pretrained(tmpdir, **model_kwargs) + eval( + _HEAD_TO_AUTOMODELS[task.replace("-with-past", "")] + if task.replace("-with-past", "") in _HEAD_TO_AUTOMODELS + else _HEAD_TO_AUTOMODELS[model_type.replace("-refiner", "")] + ).from_pretrained(tmpdir, **model_kwargs) @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_exporters_cli_int8(self, task: str, model_type: str): @@ -187,15 +195,19 @@ def test_exporters_cli_int8(self, task: str, model_type: str): check=True, ) model_kwargs = {"use_cache": task.endswith("with-past")} if "generation" in task else {} - model = eval(_HEAD_TO_AUTOMODELS[task.replace("-with-past", "")]).from_pretrained(tmpdir, **model_kwargs) + model = eval( + _HEAD_TO_AUTOMODELS[task.replace("-with-past", "")] + if task.replace("-with-past", "") in _HEAD_TO_AUTOMODELS + else _HEAD_TO_AUTOMODELS[model_type.replace("-refiner", "")] + ).from_pretrained(tmpdir, **model_kwargs) if task.startswith("text2text-generation"): models = [model.encoder, model.decoder] if task.endswith("with-past"): models.append(model.decoder_with_past) - elif task.startswith("stable-diffusion"): + elif model_type.startswith("stable-diffusion"): models = [model.unet, model.vae_encoder, model.vae_decoder] - models.append(model.text_encoder if task == "stable-diffusion" else model.text_encoder_2) + models.append(model.text_encoder if model_type == "stable-diffusion" else model.text_encoder_2) else: models = [model] @@ -212,7 +224,7 @@ def test_exporters_cli_hybrid_quantization(self, model_type: str, exp_num_fq: in shell=True, check=True, ) - model = eval(_HEAD_TO_AUTOMODELS[model_type]).from_pretrained(tmpdir) + model = eval(_HEAD_TO_AUTOMODELS[model_type.replace("-refiner", "")]).from_pretrained(tmpdir) num_fq, num_int8, _ = get_num_quantized_nodes(model.unet) self.assertEqual(exp_num_int8, num_int8) self.assertEqual(exp_num_fq, num_fq) @@ -227,7 +239,11 @@ def test_exporters_cli_int4(self, task: str, model_type: str, option: str, expec capture_output=True, ) model_kwargs = {"use_cache": task.endswith("with-past")} if "generation" in task else {} - model = eval(_HEAD_TO_AUTOMODELS[task.replace("-with-past", "")]).from_pretrained(tmpdir, **model_kwargs) + model = eval( + _HEAD_TO_AUTOMODELS[task.replace("-with-past", "")] + if task.replace("-with-past", "") in _HEAD_TO_AUTOMODELS + else _HEAD_TO_AUTOMODELS[model_type.replace("-refiner", "")] + ).from_pretrained(tmpdir, **model_kwargs) _, num_int8, num_int4 = get_num_quantized_nodes(model) self.assertEqual(expected_int8, num_int8) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index e36ea47df2..c5af9c2fb9 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -525,7 +525,10 @@ def test_ovmodel_load_large_model_with_default_compressed_weights(self): MODEL_NAMES["llama"], export=True, compile=False, use_cache=False ) save_model_patch.assert_called_with( - unittest.mock.ANY, unittest.mock.ANY, ov_config=OVConfig(quantization_config={"bits": 8}) + unittest.mock.ANY, + unittest.mock.ANY, + ov_config=OVConfig(quantization_config={"bits": 8}), + library_name="transformers", ) def test_ovmodel_load_large_model_with_uncompressed_weights(self): @@ -540,7 +543,10 @@ def test_ovmodel_load_large_model_with_uncompressed_weights(self): MODEL_NAMES["llama"], export=True, load_in_8bit=False, compile=False, use_cache=False ) save_model_patch.assert_called_with( - unittest.mock.ANY, unittest.mock.ANY, ov_config=OVConfig(dtype="fp32") + unittest.mock.ANY, + unittest.mock.ANY, + ov_config=OVConfig(dtype="fp32"), + library_name="transformers", ) def test_ovmodel_load_large_model_with_additional_quantization_config(self): @@ -560,7 +566,10 @@ def test_ovmodel_load_large_model_with_additional_quantization_config(self): ) # quantization will be performed later, using load_model save_model_patch.assert_called_with( - unittest.mock.ANY, unittest.mock.ANY, ov_config=OVConfig(dtype="fp32") + unittest.mock.ANY, + unittest.mock.ANY, + ov_config=OVConfig(dtype="fp32"), + library_name="transformers", ) compression_params = { "mode": nncf.CompressWeightsMode.INT4_SYM,