diff --git a/optimum/intel/openvino/modeling.py b/optimum/intel/openvino/modeling.py index f6d3061a7a..c098e987fd 100644 --- a/optimum/intel/openvino/modeling.py +++ b/optimum/intel/openvino/modeling.py @@ -124,12 +124,12 @@ def __init__(self, model: openvino.runtime.Model, config: transformers.Pretraine self.auto_model_class.register(AutoConfig, self.__class__) self.device = torch.device("cpu") - def to(self, device: str): + def to(self, device: Union["torch.device", str]): """ Use the specified `device` for inference. For example: "cpu" or "gpu". `device` can be in upper or lower case. To speed up first inference, call `.compile()` after `.to()`. """ - self._device = device.upper() + self._device = str(device).upper() self.request = None return self diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 05dc3af9b5..7f64604520 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -22,22 +22,18 @@ from huggingface_hub import hf_hub_download from openvino import Core, convert_model from openvino._offline_transformations import apply_moc_transformations, compress_model_transformation -from transformers import PretrainedConfig +from transformers import GenerationConfig, PretrainedConfig from transformers.file_utils import add_start_docstrings +from transformers.generation import GenerationMixin from optimum.exporters.onnx import OnnxConfig from optimum.modeling_base import OptimizedModel from ...exporters.openvino import export, main_export -from ..utils.import_utils import is_nncf_available, is_transformers_version +from ..utils.import_utils import is_nncf_available from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, _print_compiled_model_properties -if is_transformers_version("<", "4.25.0"): - from transformers.generation_utils import GenerationMixin -else: - from transformers.generation import GenerationMixin - core = Core() logger = logging.getLogger(__name__) @@ -92,12 +88,7 @@ def __init__( if enable_compilation: self.compile() - if is_transformers_version("<=", "4.25.1"): - self.generation_config = None - else: - from transformers import GenerationConfig - - self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None + self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None @staticmethod def load_model(file_name: Union[str, Path], load_in_8bit: bool = False): @@ -247,7 +238,7 @@ def _cached_file( return model_cache_path @classmethod - def _from_transformers( + def _export( cls, model_id: str, config: PretrainedConfig, @@ -304,6 +295,12 @@ def _from_transformers( config.save_pretrained(save_dir_path) return cls._from_pretrained(model_id=save_dir_path, config=config, load_in_8bit=False, **kwargs) + @classmethod + def _from_transformers(cls, *args, **kwargs): + # TODO : add warning when from_pretrained_method is set to cls._export instead of cls._from_transformers when export=True + # logger.warning("The method `_from_transformers` is deprecated, please use `_export` instead") + return cls._export(*args, **kwargs) + @classmethod def _to_load( cls, diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py index 599491277c..4870e331ed 100644 --- a/optimum/intel/openvino/modeling_base_seq2seq.py +++ b/optimum/intel/openvino/modeling_base_seq2seq.py @@ -21,11 +21,10 @@ import openvino from huggingface_hub import hf_hub_download from openvino._offline_transformations import apply_moc_transformations, compress_model_transformation -from transformers import PretrainedConfig +from transformers import GenerationConfig, PretrainedConfig from transformers.file_utils import add_start_docstrings from ...exporters.openvino import main_export -from ..utils.import_utils import is_transformers_version from .modeling_base import OVBaseModel from .utils import ( ONNX_DECODER_NAME, @@ -75,13 +74,7 @@ def __init__( self.encoder_model = encoder self.decoder_model = decoder self.decoder_with_past_model = decoder_with_past - - if is_transformers_version("<=", "4.25.1"): - self.generation_config = None - else: - from transformers import GenerationConfig - - self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None + self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None def _save_pretrained(self, save_directory: Union[str, Path]): """ diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 8a2167eae4..584342e98d 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -25,24 +25,18 @@ from openvino.runtime import Core, Tensor, Type from transformers import AutoModelForCausalLM, PretrainedConfig from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward +from transformers.generation import GenerationMixin from transformers.modeling_outputs import CausalLMOutputWithPast from optimum.utils import NormalizedConfigManager from ...exporters.openvino import ensure_stateful_is_available, main_export, patch_stateful from ...exporters.openvino.stateful import model_has_state -from ..utils.import_utils import is_transformers_version from ..utils.modeling_utils import MULTI_QUERY_ATTN_MODELS from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, STR_TO_OV_TYPE -if is_transformers_version("<", "4.25.0"): - from transformers.generation_utils import GenerationMixin -else: - from transformers.generation import GenerationMixin - - logger = logging.getLogger(__name__) core = Core() diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index fa48a5df68..8511cded52 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -329,8 +329,8 @@ def _from_transformers( **kwargs, ) - def to(self, device: str): - self._device = device.upper() + def to(self, device: Union["torch.device", str]): + self._device = str(device).upper() self.clear_requests() return self diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py index 9a7f913ab2..cca1c5381d 100644 --- a/optimum/intel/openvino/modeling_seq2seq.py +++ b/optimum/intel/openvino/modeling_seq2seq.py @@ -32,20 +32,15 @@ WhisperForConditionalGeneration, ) from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward +from transformers.generation import GenerationMixin from transformers.generation.logits_process import WhisperTimeStampLogitsProcessor from transformers.modeling_outputs import BaseModelOutput, Seq2SeqLMOutput from transformers.models.whisper.tokenization_whisper import TASK_IDS, TO_LANGUAGE_CODE -from ..utils.import_utils import is_transformers_version from .modeling_base_seq2seq import OVBaseModelForSeq2SeqLM from .utils import _print_compiled_model_properties -if is_transformers_version("<", "4.25.0"): - from transformers.generation_utils import GenerationMixin -else: - from transformers.generation import GenerationMixin - if TYPE_CHECKING: from transformers import PretrainedConfig @@ -304,8 +299,8 @@ def __init__( except AttributeError: pass - def to(self, device: str): - self._device = device.upper() + def to(self, device: Union["torch.device", str]): + self._device = str(device).upper() self.encoder._device = self._device self.decoder._device = self._device if self.use_cache: @@ -755,12 +750,7 @@ class _OVModelForWhisper(OVModelForSpeechSeq2Seq): auto_model_class = WhisperForConditionalGeneration @classmethod - def _from_pretrained( - cls, - model_id: Union[str, Path], - config: "PretrainedConfig", - **kwargs, - ): + def _from_pretrained(cls, model_id: Union[str, Path], config: "PretrainedConfig", **kwargs): return super(OVModelForSpeechSeq2Seq, cls)._from_pretrained(model_id, config, **kwargs) # Adapted from transformers.models.whisper.modeling_whisper diff --git a/tests/openvino/test_modeling_basic.py b/tests/openvino/test_modeling_basic.py index a443c5fea7..03a0d26c7d 100644 --- a/tests/openvino/test_modeling_basic.py +++ b/tests/openvino/test_modeling_basic.py @@ -1,6 +1,6 @@ """ The goal of the test in this file is to test that basic functionality of optimum[openvino] works: -- Load the model with `from_transformers=True` +- Load the model with `export=True` - Do inference with appropriate pipeline - Save the model to disk @@ -58,7 +58,7 @@ def test_pipeline(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) model_class_str = MODEL_NAMES[model_id] model_class = eval(model_class_str) - model = model_class.from_pretrained(model_id, from_transformers=True) + model = model_class.from_pretrained(model_id, export=True) model.save_pretrained(f"{model_id}_ov") model = model_class.from_pretrained(f"{model_id}_ov") @@ -80,7 +80,7 @@ def test_openvino_methods(self): """ model_id = "hf-internal-testing/tiny-random-distilbert" tokenizer = AutoTokenizer.from_pretrained(model_id) - model = OVModelForSequenceClassification.from_pretrained(model_id, from_transformers=True) + model = OVModelForSequenceClassification.from_pretrained(model_id, export=True) model.reshape(1, 16) model.half() model.to("cpu")