Skip to content

Commit

Permalink
Merge remote-tracking branch 'sl/stateful' into ea/stateful
Browse files Browse the repository at this point in the history
  • Loading branch information
eaidova committed Jan 8, 2024
2 parents e2194b3 + 9992419 commit 70d086a
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 15 deletions.
3 changes: 2 additions & 1 deletion optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ def main_export(
`int4_sym_g64` - INT4 symmetric weights w/ group size 64, "int4_asym_g64" - as previous but asymmetric w/ zero-point, `f32` - means no compression.
compression_ratio (`Optional[float]`, defaults to `None`):
Compression ratio between primary and backup precision (only relevant to INT4).
stateful (`Optional[bool]`) - Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs
stateful (`Optional[bool]`, defaults to `True`):
Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs
**kwargs_shapes (`Dict`):
Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export.
Expand Down
18 changes: 8 additions & 10 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@
from optimum.exporters.onnx.convert import export_tensorflow as export_tensorflow_onnx
from optimum.exporters.onnx.model_patcher import DecoderModelPatcher
from optimum.utils import is_diffusers_available
from .stateful import patch_stateful, raise_if_openvino_is_too_old
from .better_transformer_patch import patch_model_with_bettertransformer

from ...intel.utils.import_utils import is_nncf_available, is_optimum_version
from .better_transformer_patch import patch_model_with_bettertransformer
Expand Down Expand Up @@ -130,7 +128,7 @@ def export(
Compression ratio between primary and backup precision (only relevant to INT4).
input_shapes (`Optional[Dict]`, defaults to `None`):
If specified, allows to use specific shapes for the example input provided to the exporter.
stateful (`Optional[bool]`):
stateful (`Optional[bool]`, defaults to `False`):
Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs
Returns:
Expand Down Expand Up @@ -242,8 +240,6 @@ def export_pytorch_via_onnx(
`int4_sym_g64` - INT4 symmetric weights w/ group size 64, "int4_asym_g64" - as previous but asymmetric w/ zero-point.
compression_ratio (`Optional[float]`, defaults to `None`):
Compression ratio between primary and backup precision (only relevant to INT4).
stateful (`Optional[bool]`):
Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs
Returns:
`Tuple[List[str], List[str], bool]`: A tuple with an ordered list of the model's inputs, and the named inputs from
Expand Down Expand Up @@ -307,7 +303,7 @@ def export_pytorch(
`int4_sym_g64` - INT4 symmetric weights w/ group size 64, "int4_asym_g64" - as previous but asymmetric w/ zero-point.
compression_ratio (`Optional[float]`, defaults to `None`):
Compression ratio between primary and backup precision (only relevant to INT4).
stateful (`Optional[bool]`):
stateful (`Optional[bool]`, defaults to `False`):
Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs
Returns:
Expand Down Expand Up @@ -408,9 +404,11 @@ def ts_patched_forward(*args, **kwargs):
if patch_model_forward:
model.forward = orig_forward
if stateful:
raise ValueError(
"Making stateful models is not supported when exporting to ONNX as an intermediate step. "
"Set stateful=False, or provide a model that can be converted to OpenVINO without fallback to ONNX conversion path."
# cannot raise because stateful is enabled by default and it would break backward compatibility for models that couldn't convert to OV directly
# TODO: Implement stateful for ONNX path as well, not doing it right now because of lack of validation
logger.warn(
"[ WARNING ] Making stateful models is not supported when exporting to ONNX as an intermediate step. Stateless model will be exported instead. "
"Provide a model that can be converted to OpenVINO without fallback to ONNX conversion path."
)
return export_pytorch_via_onnx(
model,
Expand Down Expand Up @@ -488,7 +486,7 @@ def export_models(
Compression ratio between primary and backup precision (only relevant to INT4).
model_kwargs (Optional[Dict[str, Any]], optional):
Additional kwargs for model export.
stateful (`Optional[bool]`)
stateful (`Optional[bool]`, defaults to `False`)
Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs
Raises:
Expand Down
2 changes: 0 additions & 2 deletions optimum/exporters/openvino/stateful.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
import numpy as np
from transformers import PretrainedConfig

import numpy as np
from packaging import version
import openvino as ov
from openvino.runtime import opset13
from optimum.intel.utils.import_utils import _openvino_version, is_openvino_version
Expand Down
1 change: 0 additions & 1 deletion optimum/intel/openvino/modeling_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from ..utils.modeling_utils import MULTI_QUERY_ATTN_MODELS
from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel
from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, STR_TO_OV_TYPE
from ...exporters.openvino import patch_stateful, raise_if_openvino_is_too_old


if is_transformers_version("<", "4.25.0"):
Expand Down
2 changes: 1 addition & 1 deletion tests/openvino/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@ def test_stateful(self, model_arch):
def test_stateful_on_converted_model(self):
model_id = "vuiseng9/ov-gpt2-fp32-kv-cache"
# reference without state
loaded_model = OVModelForCausalLM.from_pretrained(model_id)
loaded_model = OVModelForCausalLM.from_pretrained(model_id, stateful=False)
self.assertIsInstance(loaded_model.config, PretrainedConfig)
self.assertFalse(loaded_model.stateful)
self.assertTrue(loaded_model.use_cache)
Expand Down

0 comments on commit 70d086a

Please sign in to comment.