Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip automodel compression weights tests for nncf==2.8.0 #535

Merged
merged 10 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 1 addition & 20 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,7 @@
from optimum.exporters.onnx.model_patcher import DecoderModelPatcher
from optimum.utils import is_diffusers_available

from ...intel.utils.import_utils import (
_torch_version,
_transformers_version,
is_nncf_available,
is_optimum_version,
is_torch_version,
is_transformers_version,
)
from ...intel.utils.import_utils import is_nncf_available, is_optimum_version
from .model_patcher import patch_model_with_bettertransformer
from .stateful import ensure_stateful_is_available, patch_stateful
from .utils import (
Expand Down Expand Up @@ -330,18 +323,6 @@ def export_pytorch(
output = Path(output)

if stateful:
if is_transformers_version("<", "4.36") or is_torch_version("<", "2.1.1"):
COLOR_RED = "\033[1;31m"
COLOR_RESET = "\033[0m"
logger.warning(
COLOR_RED
+ "[WARNING] For good performance with stateful models, transformers>=4.36.2 and PyTorch>=2.1.1 are required. "
f"This Python environment has Transformers {_transformers_version} and PyTorch {_torch_version}. "
"Consider upgrading PyTorch and Transformers, for example by running "
"`pip install --upgrade --upgrade-strategy eager optimum[openvino,nncf]`, and export the model again"
+ COLOR_RESET
)

# Trigger bettertransformer together with stateful model because OpenVINO HW-dependent transformations expect
# both of them are applied to demonstrate the best performance.
# TODO: Consider applying bettertransformer regardless of stateful flag -- requires additional validation.
Expand Down
25 changes: 20 additions & 5 deletions optimum/exporters/openvino/model_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,31 @@

import logging as log

from optimum.intel.utils.import_utils import is_torch_version
from optimum.intel.utils.import_utils import (
_torch_version,
_transformers_version,
is_torch_version,
is_transformers_version,
)


def patch_model_with_bettertransformer(model):
if is_torch_version("<", "2.0"):
# check that the model has not yet been pathced
if hasattr(model, "use_bettertransformer") and model.use_bettertransformer is True:
return model

if is_transformers_version("<", "4.36") or is_torch_version("<", "2.1.1"):
COLOR_RED = "\033[1;31m"
COLOR_RESET = "\033[0m"
log.warn(
"integration Scaled Dot Product Attention optimization supported only with torch > 2.0."
"Usage model with stateful=True may be non-effective if model does not contain torch.functional.scaled_dot_product_attention"
"It is recommended to upgrade PyTorch version for using stateful model or use stateful=False"
COLOR_RED
+ "[WARNING] For good performance with stateful models, transformers>=4.36.2 and PyTorch>=2.1.1 are required. "
f"This Python environment has Transformers {_transformers_version} and PyTorch {_torch_version}. "
"Consider upgrading PyTorch and Transformers, for example by running "
"`pip install --upgrade --upgrade-strategy eager optimum[openvino,nncf]`, and export the model again"
+ COLOR_RESET
)

# model already has required SDPA implementation
if getattr(model, "_supports_sdpa", False) and getattr(model.config, "_attn_implementation", "eager") == "sdpa":
return model
Expand Down
70 changes: 44 additions & 26 deletions optimum/intel/openvino/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,24 @@
import transformers
from accelerate.data_loader import DataLoaderStateMixin
from datasets import Dataset, load_dataset
from nncf import NNCFConfig, compress_weights
from nncf import NNCFConfig
from nncf.torch import create_compressed_model, register_default_init_args, register_module
from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_inputs_with_objwalk
from nncf.torch.initialization import PTInitializingDataLoader
from openvino._offline_transformations import compress_quantize_weights_transformation
from openvino.runtime import Core, Tensor
from torch.utils._pytree import tree_map
from torch.utils.data import DataLoader, RandomSampler
from transformers import DataCollator, PreTrainedModel, default_data_collator
from transformers.pytorch_utils import Conv1D

from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed
from optimum.exporters.tasks import TasksManager
from optimum.quantization_base import OptimumQuantizer

from ...exporters.openvino import export, export_pytorch_via_onnx
from ...exporters.openvino.stateful import ensure_export_task_support_stateful
from ...exporters.openvino.model_patcher import patch_model_with_bettertransformer
from ...exporters.openvino.stateful import ensure_export_task_support_stateful, ensure_stateful_is_available
from ..utils.constant import _TASK_ALIASES
from .configuration import OVConfig
from .modeling_base import OVBaseModel
Expand Down Expand Up @@ -348,9 +351,7 @@ def _quantize_ovcausallm(
self.model.model,
quantization_dataset,
model_type=nncf.ModelType.TRANSFORMER if not kwargs.get("model_type") else kwargs.get("model_type"),
fast_bias_correction=True
if not kwargs.get("fast_bias_correction")
else kwargs.get("fast_bias_correction"),
fast_bias_correction=kwargs.get("fast_bias_correction", True),
**kwargs,
)
self.model.model = quantized_model
Expand Down Expand Up @@ -392,13 +393,42 @@ def _quantize_torchmodel(
if file_name is None and quantization_config.save_onnx_model
else Path(ov_file_name).with_suffix(".onnx")
)

task = self.task
model = self.model
self.model.config.save_pretrained(save_directory)
if task.startswith("text-generation"):
onnx_config = onnx_config_class(
model.config, use_past=model.config.use_cache, use_past_in_inputs=model.config.use_cache
)
if model.config.use_cache:
task = "text-generation-with-past"
else:
onnx_config = onnx_config_class(model.config)

stateful = ensure_stateful_is_available() and ensure_export_task_support_stateful(task)

if weights_only:
if getattr(self.model.config, "tie_word_embeddings", True):
# to fix problem with shared embedding weights in nncf compress_weights()
self.model.tie_weights()
compressed_model = compress_weights(self.model)
self.model = compressed_model
if stateful:
# patch model before weight compression
model = patch_model_with_bettertransformer(model)

dummy_inputs = onnx_config.generate_dummy_inputs(framework="pt")
device = model.device
alexsu52 marked this conversation as resolved.
Show resolved Hide resolved
dummy_inputs = tree_map(
lambda value: value.to(device) if isinstance(value, torch.Tensor) else value, dummy_inputs
)
check_dummy_inputs_are_allowed(model, dummy_inputs)

nncf.compress_weights(model, dataset=nncf.Dataset([dummy_inputs]))
else:
if stateful:
logger.warn(
"Quantization algorithm does not support optimized stateful models. "
"The original model without optimization will be quantized and export."
)
stateful = False

calibration_dataloader = self._get_calibration_dataloader(
calibration_dataset=calibration_dataset,
batch_size=batch_size,
Expand All @@ -410,22 +440,10 @@ def _quantize_torchmodel(
quantization_config.add_input_info(model_inputs)
nncf_config = NNCFConfig.from_dict(quantization_config.__dict__)
nncf_config = register_default_init_args(nncf_config, calibration_dataloader)
controller, compressed_model = create_compressed_model(
self.model, nncf_config, wrap_inputs_fn=wrap_nncf_model_inputs_with_objwalk
controller, model = create_compressed_model(
model, nncf_config, wrap_inputs_fn=wrap_nncf_model_inputs_with_objwalk
)
compressed_model = controller.strip(do_copy=False)

task = self.task
model = self.model
self.model.config.save_pretrained(save_directory)
if task.startswith("text-generation"):
onnx_config = onnx_config_class(
model.config, use_past=model.config.use_cache, use_past_in_inputs=model.config.use_cache
)
if model.config.use_cache:
task = "text-generation-with-past"
else:
onnx_config = onnx_config_class(model.config)
model = controller.strip(do_copy=False)

model_path = save_directory / (onnx_file_name if quantization_config.save_onnx_model else ov_file_name)
onnx_path = save_directory / onnx_file_name
Expand All @@ -434,7 +452,7 @@ def _quantize_torchmodel(
opset = max(opset, MIN_ONNX_QDQ_OPSET)
kwargs = {}
if not quantization_config.save_onnx_model:
kwargs = {"stateful": ensure_export_task_support_stateful(task)}
kwargs = {"stateful": stateful}
_, _, is_onnx = export_fn(model=model, config=onnx_config, output=model_path, opset=opset, **kwargs)
if is_onnx:
# Load and save the compressed model
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"transformers>=4.34.0",
],
"openvino": ["openvino>=2023.2", "onnx", "onnxruntime", "transformers>=4.36.0", "optimum>=1.16.1"],
"nncf": ["nncf>=2.7.0"],
"nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git"],
"ipex": ["intel-extension-for-pytorch", "onnx"],
"diffusers": ["diffusers"],
"quality": QUALITY_REQUIRE,
Expand Down
4 changes: 2 additions & 2 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@


class OVQuantizerTest(unittest.TestCase):
# TODO : add models
# TODO : add models, enable OVModelForCausalLM.
SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (
(OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 32, 35),
(OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 41, 23),
# (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 41, 23),
)

@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
Expand Down
10 changes: 5 additions & 5 deletions tests/openvino/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,15 @@
"bert": (70,),
"roberta": (68,),
"albert": (84,),
"vit": (62,),
"vit": (64,),
"blenderbot": (70,),
"gpt2": (46,),
"wav2vec2": (30,),
"wav2vec2": (34,),
"distilbert": (66,),
"t5": (64, 104, 84),
"stable-diffusion": (148, 8, 8, 64),
"stable-diffusion-xl": (296, 8, 8, 66),
"stable-diffusion-xl-refiner": (296, 8, 8, 66),
"stable-diffusion": (242, 34, 42, 64),
"stable-diffusion-xl": (366, 34, 42, 66),
"stable-diffusion-xl-refiner": (366, 34, 42, 66),
}

_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (64, 477)}
Expand Down
Loading