Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new _export method #507

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions optimum/intel/openvino/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,12 @@ def __init__(self, model: openvino.runtime.Model, config: transformers.Pretraine
self.auto_model_class.register(AutoConfig, self.__class__)
self.device = torch.device("cpu")

def to(self, device: str):
def to(self, device: Union["torch.device", str]):
"""
Use the specified `device` for inference. For example: "cpu" or "gpu". `device` can
be in upper or lower case. To speed up first inference, call `.compile()` after `.to()`.
"""
self._device = device.upper()
self._device = str(device).upper()
self.request = None
return self

Expand Down
25 changes: 11 additions & 14 deletions optimum/intel/openvino/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,18 @@
from huggingface_hub import hf_hub_download
from openvino import Core, convert_model
from openvino._offline_transformations import apply_moc_transformations, compress_model_transformation
from transformers import PretrainedConfig
from transformers import GenerationConfig, PretrainedConfig
from transformers.file_utils import add_start_docstrings
from transformers.generation import GenerationMixin

from optimum.exporters.onnx import OnnxConfig
from optimum.modeling_base import OptimizedModel

from ...exporters.openvino import export, main_export
from ..utils.import_utils import is_nncf_available, is_transformers_version
from ..utils.import_utils import is_nncf_available
from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, _print_compiled_model_properties


if is_transformers_version("<", "4.25.0"):
from transformers.generation_utils import GenerationMixin
else:
from transformers.generation import GenerationMixin

core = Core()

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -92,12 +88,7 @@ def __init__(
if enable_compilation:
self.compile()

if is_transformers_version("<=", "4.25.1"):
self.generation_config = None
else:
from transformers import GenerationConfig

self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None
self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None

@staticmethod
def load_model(file_name: Union[str, Path], load_in_8bit: bool = False):
Expand Down Expand Up @@ -247,7 +238,7 @@ def _cached_file(
return model_cache_path

@classmethod
def _from_transformers(
def _export(
cls,
model_id: str,
config: PretrainedConfig,
Expand Down Expand Up @@ -304,6 +295,12 @@ def _from_transformers(
config.save_pretrained(save_dir_path)
return cls._from_pretrained(model_id=save_dir_path, config=config, load_in_8bit=False, **kwargs)

@classmethod
def _from_transformers(cls, *args, **kwargs):
# TODO : add warning when from_pretrained_method is set to cls._export instead of cls._from_transformers when export=True
# logger.warning("The method `_from_transformers` is deprecated, please use `_export` instead")
return cls._export(*args, **kwargs)

@classmethod
def _to_load(
cls,
Expand Down
11 changes: 2 additions & 9 deletions optimum/intel/openvino/modeling_base_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,10 @@
import openvino
from huggingface_hub import hf_hub_download
from openvino._offline_transformations import apply_moc_transformations, compress_model_transformation
from transformers import PretrainedConfig
from transformers import GenerationConfig, PretrainedConfig
from transformers.file_utils import add_start_docstrings

from ...exporters.openvino import main_export
from ..utils.import_utils import is_transformers_version
from .modeling_base import OVBaseModel
from .utils import (
ONNX_DECODER_NAME,
Expand Down Expand Up @@ -75,13 +74,7 @@ def __init__(
self.encoder_model = encoder
self.decoder_model = decoder
self.decoder_with_past_model = decoder_with_past

if is_transformers_version("<=", "4.25.1"):
self.generation_config = None
else:
from transformers import GenerationConfig

self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None
self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None

def _save_pretrained(self, save_directory: Union[str, Path]):
"""
Expand Down
8 changes: 1 addition & 7 deletions optimum/intel/openvino/modeling_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,18 @@
from openvino.runtime import Core, Tensor, Type
from transformers import AutoModelForCausalLM, PretrainedConfig
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
from transformers.generation import GenerationMixin
from transformers.modeling_outputs import CausalLMOutputWithPast

from optimum.utils import NormalizedConfigManager

from ...exporters.openvino import ensure_stateful_is_available, main_export, patch_stateful
from ...exporters.openvino.stateful import model_has_state
from ..utils.import_utils import is_transformers_version
from ..utils.modeling_utils import MULTI_QUERY_ATTN_MODELS
from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel
from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, STR_TO_OV_TYPE


if is_transformers_version("<", "4.25.0"):
from transformers.generation_utils import GenerationMixin
else:
from transformers.generation import GenerationMixin


logger = logging.getLogger(__name__)

core = Core()
Expand Down
4 changes: 2 additions & 2 deletions optimum/intel/openvino/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,8 +329,8 @@ def _from_transformers(
**kwargs,
)

def to(self, device: str):
self._device = device.upper()
def to(self, device: Union["torch.device", str]):
self._device = str(device).upper()
self.clear_requests()
return self

Expand Down
18 changes: 4 additions & 14 deletions optimum/intel/openvino/modeling_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,15 @@
WhisperForConditionalGeneration,
)
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
from transformers.generation import GenerationMixin
from transformers.generation.logits_process import WhisperTimeStampLogitsProcessor
from transformers.modeling_outputs import BaseModelOutput, Seq2SeqLMOutput
from transformers.models.whisper.tokenization_whisper import TASK_IDS, TO_LANGUAGE_CODE

from ..utils.import_utils import is_transformers_version
from .modeling_base_seq2seq import OVBaseModelForSeq2SeqLM
from .utils import _print_compiled_model_properties


if is_transformers_version("<", "4.25.0"):
from transformers.generation_utils import GenerationMixin
else:
from transformers.generation import GenerationMixin

if TYPE_CHECKING:
from transformers import PretrainedConfig

Expand Down Expand Up @@ -304,8 +299,8 @@ def __init__(
except AttributeError:
pass

def to(self, device: str):
self._device = device.upper()
def to(self, device: Union["torch.device", str]):
self._device = str(device).upper()
self.encoder._device = self._device
self.decoder._device = self._device
if self.use_cache:
Expand Down Expand Up @@ -755,12 +750,7 @@ class _OVModelForWhisper(OVModelForSpeechSeq2Seq):
auto_model_class = WhisperForConditionalGeneration

@classmethod
def _from_pretrained(
cls,
model_id: Union[str, Path],
config: "PretrainedConfig",
**kwargs,
):
def _from_pretrained(cls, model_id: Union[str, Path], config: "PretrainedConfig", **kwargs):
return super(OVModelForSpeechSeq2Seq, cls)._from_pretrained(model_id, config, **kwargs)

# Adapted from transformers.models.whisper.modeling_whisper
Expand Down
6 changes: 3 additions & 3 deletions tests/openvino/test_modeling_basic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
The goal of the test in this file is to test that basic functionality of optimum[openvino] works:
- Load the model with `from_transformers=True`
- Load the model with `export=True`
- Do inference with appropriate pipeline
- Save the model to disk

Expand Down Expand Up @@ -58,7 +58,7 @@ def test_pipeline(self, model_id):
tokenizer = AutoTokenizer.from_pretrained(model_id)
model_class_str = MODEL_NAMES[model_id]
model_class = eval(model_class_str)
model = model_class.from_pretrained(model_id, from_transformers=True)
model = model_class.from_pretrained(model_id, export=True)
model.save_pretrained(f"{model_id}_ov")
model = model_class.from_pretrained(f"{model_id}_ov")

Expand All @@ -80,7 +80,7 @@ def test_openvino_methods(self):
"""
model_id = "hf-internal-testing/tiny-random-distilbert"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = OVModelForSequenceClassification.from_pretrained(model_id, from_transformers=True)
model = OVModelForSequenceClassification.from_pretrained(model_id, export=True)
model.reshape(1, 16)
model.half()
model.to("cpu")
Expand Down
Loading