From fa1bc56f151f5e50f19a0b856eba83cd822ce7be Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Tue, 30 Apr 2024 15:12:25 +0200 Subject: [PATCH 1/7] Proper datasets.Dataset importing --- optimum/intel/openvino/quantization.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 86e473fd1c..d4889c561a 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -21,7 +21,6 @@ from pathlib import Path from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union -import datasets import nncf import openvino import torch @@ -62,6 +61,8 @@ if is_datasets_available(): from datasets import Dataset +else: + Dataset = None register_module(ignored_algorithms=[])(Conv1D) @@ -318,7 +319,7 @@ def _quantize_ovbasemodel( self, ov_config: OVConfig, save_directory: Union[str, Path] = None, - calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None, + calibration_dataset: Optional[Union["Dataset", nncf.Dataset, Iterable]] = None, batch_size: int = 1, data_collator: Optional[DataCollator] = None, remove_unused_columns: bool = True, @@ -358,7 +359,7 @@ def _quantize_ovbasemodel( if isinstance(calibration_dataset, nncf.Dataset): quantization_dataset = calibration_dataset - elif isinstance(calibration_dataset, datasets.Dataset): + elif Dataset is not None and isinstance(calibration_dataset, Dataset): calibration_dataloader = self._get_calibration_dataloader( calibration_dataset=calibration_dataset, batch_size=batch_size, @@ -411,7 +412,7 @@ def _quantize_torchmodel( self, ov_config: OVConfig, save_directory: Union[str, Path], - calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None, + calibration_dataset: Optional[Union["Dataset", nncf.Dataset, Iterable]] = None, file_name: Optional[str] = None, batch_size: int = 1, data_collator: Optional[DataCollator] = None, @@ -482,7 +483,7 @@ def _quantize_torchmodel( if isinstance(calibration_dataset, nncf.Dataset): quantization_dataset = calibration_dataset - elif isinstance(calibration_dataset, datasets.Dataset): + elif isinstance(calibration_dataset, Dataset): calibration_dataloader = self._get_calibration_dataloader( calibration_dataset=calibration_dataset, batch_size=batch_size, @@ -567,7 +568,7 @@ def get_calibration_dataset( use_auth_token: Optional[Union[bool, str]] = None, token: Optional[Union[bool, str]] = None, cache_dir: str = HUGGINGFACE_HUB_CACHE, - ) -> datasets.Dataset: + ) -> "Dataset": """ Create the calibration `datasets.Dataset` to use for the post-training static quantization calibration step. @@ -671,7 +672,7 @@ def _weight_only_quantization( ) dataset = None if calibration_dataset is not None: - if isinstance(calibration_dataset, datasets.Dataset): + if Dataset is not None and isinstance(calibration_dataset, Dataset): raise ValueError( "Providing calibration dataset as an instance of `datasets.Dataset` for OV weight-only " "quantization is not supported. Please provide it as `nncf.Dataset` or as iterable of " From ca30de156918069eb1af2d13bd2545a7f2b5a851 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Mon, 6 May 2024 10:32:19 +0200 Subject: [PATCH 2/7] SD calibration dataset collection refactoring --- optimum/intel/openvino/configuration.py | 1 + optimum/intel/openvino/modeling_diffusion.py | 82 ++------ optimum/intel/openvino/quantization.py | 210 ++++++++++++++----- tests/openvino/test_quantization.py | 4 +- 4 files changed, 174 insertions(+), 123 deletions(-) diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py index 5de672b702..30dfe5ae6f 100644 --- a/optimum/intel/openvino/configuration.py +++ b/optimum/intel/openvino/configuration.py @@ -57,6 +57,7 @@ class OVQuantizationMethod(str, Enum): DEFAULT = "default" + HYBRID = "hybrid" @dataclass diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index 2de7cb8154..ae86ea2bfe 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import copy import importlib import logging import os @@ -57,7 +57,7 @@ ) from ...exporters.openvino import main_export -from .configuration import OVConfig, OVWeightQuantizationConfig +from .configuration import OVConfig, OVWeightQuantizationConfig, OVQuantizationMethod from .loaders import OVTextualInversionLoaderMixin from .modeling_base import OVBaseModel from .utils import ( @@ -300,13 +300,11 @@ def _from_pretrained( # load the UNet model uncompressed to apply hybrid quantization further unet = cls.load_model(unet_path) # Apply weights compression to other `components` without dataset - weight_quantization_params = { - param: value for param, value in quantization_config.__dict__.items() if param != "dataset" - } - weight_quantization_config = OVWeightQuantizationConfig.from_dict(weight_quantization_params) + quantization_config_without_dataset = copy.deepcopy(quantization_config) + quantization_config_without_dataset.dataset = None else: - weight_quantization_config = quantization_config - unet = cls.load_model(unet_path, weight_quantization_config) + quantization_config_without_dataset = quantization_config + unet = cls.load_model(unet_path, quantization_config_without_dataset) components = { "vae_encoder": new_model_save_dir / DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER / vae_encoder_file_name, @@ -316,7 +314,7 @@ def _from_pretrained( } for key, value in components.items(): - components[key] = cls.load_model(value, weight_quantization_config) if value.is_file() else None + components[key] = cls.load_model(value, quantization_config_without_dataset) if value.is_file() else None if model_save_dir is None: model_save_dir = new_model_save_dir @@ -332,12 +330,14 @@ def _from_pretrained( if not isinstance(sd_model, supported_pipelines): raise NotImplementedError(f"Quantization in hybrid mode is not supported for {cls.__name__}") - nsamples = quantization_config.num_samples if quantization_config.num_samples else 200 - unet_inputs = sd_model._prepare_unet_inputs(quantization_config.dataset, nsamples) + from optimum.intel import OVQuantizer - from .quantization import _hybrid_quantization + quantizer = OVQuantizer(sd_model) + quantization_config_copy = copy.deepcopy(quantization_config) + quantization_config_copy.quant_method = OVQuantizationMethod.HYBRID + quantizer.quantize(ov_config=OVConfig(quantization_config=quantization_config_copy)) - unet = _hybrid_quantization(sd_model.unet.model, weight_quantization_config, dataset=unet_inputs) + return sd_model return cls( unet=unet, @@ -348,62 +348,6 @@ def _from_pretrained( **kwargs, ) - def _prepare_unet_inputs( - self, - dataset: Union[str, List[Any]], - num_samples: int, - height: Optional[int] = None, - width: Optional[int] = None, - seed: Optional[int] = 42, - **kwargs, - ) -> Dict[str, Any]: - self.compile() - - size = self.unet.config.get("sample_size", 64) * self.vae_scale_factor - height = height or min(size, 512) - width = width or min(size, 512) - - if isinstance(dataset, str): - dataset = deepcopy(dataset) - available_datasets = PREDEFINED_SD_DATASETS.keys() - if dataset not in available_datasets: - raise ValueError( - f"""You have entered a string value for dataset. You can only choose between - {list(available_datasets)}, but the {dataset} was found""" - ) - - from datasets import load_dataset - - dataset_metadata = PREDEFINED_SD_DATASETS[dataset] - dataset = load_dataset(dataset, split=dataset_metadata["split"], streaming=True).shuffle(seed=seed) - input_names = dataset_metadata["inputs"] - dataset = dataset.select_columns(list(input_names.values())) - - def transform_fn(data_item): - return {inp_name: data_item[column] for inp_name, column in input_names.items()} - - else: - - def transform_fn(data_item): - return data_item if isinstance(data_item, (list, dict)) else [data_item] - - from .quantization import InferRequestWrapper - - calibration_data = [] - self.unet.request = InferRequestWrapper(self.unet.request, calibration_data) - - for inputs in dataset: - inputs = transform_fn(inputs) - if isinstance(inputs, dict): - self.__call__(**inputs, height=height, width=width) - else: - self.__call__(*inputs, height=height, width=width) - if len(calibration_data) >= num_samples: - break - - self.unet.request = self.unet.request.request - return calibration_data[:num_samples] - @classmethod def _from_transformers( cls, diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index d4889c561a..f2258864ae 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import collections.abc import copy import inspect import logging @@ -49,13 +50,14 @@ from ..utils.constant import _TASK_ALIASES from ..utils.import_utils import DATASETS_IMPORT_ERROR, is_datasets_available from ..utils.modeling_utils import get_model_device -from .configuration import OVConfig, OVQuantizationConfig, OVWeightQuantizationConfig +from .configuration import OVConfig, OVQuantizationConfig, OVWeightQuantizationConfig, OVQuantizationMethod from .modeling_base import OVBaseModel from .utils import ( MAX_ONNX_OPSET, MIN_ONNX_QDQ_OPSET, ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, + PREDEFINED_SD_DATASETS, ) @@ -201,7 +203,7 @@ def from_pretrained(cls, model: PreTrainedModel, **kwargs): def quantize( self, - calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None, + calibration_dataset: Optional[Union["Dataset", nncf.Dataset, Iterable]] = None, save_directory: Optional[Union[str, Path]] = None, ov_config: OVConfig = None, file_name: Optional[str] = None, @@ -325,74 +327,84 @@ def _quantize_ovbasemodel( remove_unused_columns: bool = True, **kwargs, ): + from optimum.intel.openvino.modeling_diffusion import OVStableDiffusionPipelineBase + if save_directory is not None: save_directory = Path(save_directory) save_directory.mkdir(parents=True, exist_ok=True) - quantization_config = ov_config.quantization_config + + if calibration_dataset is not None: + # Process custom calibration dataset + + if isinstance(self.model, OVStableDiffusionPipelineBase): + calibration_dataset = self._prepare_unet_dataset( + quantization_config.num_samples, + dataset=calibration_dataset) + elif Dataset is not None and isinstance(calibration_dataset, Dataset): + calibration_dataloader = self._get_calibration_dataloader( + calibration_dataset=calibration_dataset, + batch_size=batch_size, + remove_unused_columns=remove_unused_columns, + data_collator=data_collator, + ) + + if self.model.export_feature == "text-generation" and self.model.use_cache: + calibration_dataset = self._prepare_text_generation_dataset( + quantization_config, calibration_dataloader) + else: + calibration_dataset = nncf.Dataset(calibration_dataloader) + elif isinstance(calibration_dataset, collections.abc.Iterable): + calibration_dataset = nncf.Dataset(calibration_dataset) + elif not isinstance(calibration_dataset, nncf.Dataset): + raise ValueError("`calibration_dataset` must be either an `Iterable` object or an instance of " + f"`nncf.Dataset` or `datasets.Dataset`. Found: {type(calibration_dataset)}.") + if isinstance(quantization_config, OVWeightQuantizationConfig): + if quantization_config.dataset is not None and calibration_dataset is not None: + logger.info( + "Both `quantization_config.dataset` and `calibration_dataset` were provided for weight only " + "quantization. Will rely on `calibration_dataset`." + ) + if calibration_dataset is None and isinstance(quantization_config.dataset, str): from optimum.intel import OVModelForCausalLM if isinstance(self.model, OVModelForCausalLM): - from optimum.gptq.data import get_dataset, prepare_dataset - - tokenizer = AutoTokenizer.from_pretrained(quantization_config.tokenizer) - nsamples = quantization_config.num_samples if quantization_config.num_samples else 128 - calibration_dataset = get_dataset( - quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples - ) - calibration_dataset = prepare_dataset(calibration_dataset) - calibration_dataset = nncf.Dataset(calibration_dataset, lambda x: self.model.prepare_inputs(**x)) + calibration_dataset = self._prepare_gptq_dataset(quantization_config) + elif isinstance(self.model, OVStableDiffusionPipelineBase): + calibration_dataset = self._prepare_unet_dataset( + quantization_config.num_samples, + dataset_name=quantization_config.dataset) else: raise ValueError( f"Can't create weight compression calibration dataset from string for {type(self.model)}" ) - _weight_only_quantization(self.model.model, quantization_config, calibration_dataset) + if quantization_config.quant_method == OVQuantizationMethod.HYBRID: + if calibration_dataset is None: + raise ValueError("Calibration dataset is required to run hybrid quantization.") + if isinstance(self.model, OVStableDiffusionPipelineBase): + self.model.unet.model = _hybrid_quantization(self.model.unet.model, quantization_config, calibration_dataset) + else: + self.model.model = _hybrid_quantization(self.model.model, quantization_config, calibration_dataset) + else: + _weight_only_quantization(self.model.model, quantization_config, calibration_dataset) if save_directory is not None: self.model.save_pretrained(save_directory) ov_config.save_pretrained(save_directory) return + if not isinstance(quantization_config, OVQuantizationConfig): raise ValueError(f"Unsupported type of quantization config: {type(quantization_config)}") - if isinstance(calibration_dataset, nncf.Dataset): - quantization_dataset = calibration_dataset - elif Dataset is not None and isinstance(calibration_dataset, Dataset): - calibration_dataloader = self._get_calibration_dataloader( - calibration_dataset=calibration_dataset, - batch_size=batch_size, - remove_unused_columns=remove_unused_columns, - data_collator=data_collator, - ) - - if self.model.export_feature == "text-generation" and self.model.use_cache: - # Prefetch past_key_values - self.model.update_pkv_precision(True) - self.model.compile() - collected_inputs = [] - - self.model.request = InferRequestWrapper(self.model.request, collected_inputs) - try: - for data in calibration_dataloader: - self.model.generate(**data, max_new_tokens=1) - if len(collected_inputs) >= quantization_config.num_samples: - break - finally: - self.model.request = self.model.request.request - quantization_dataset = nncf.Dataset(collected_inputs) - else: - quantization_dataset = nncf.Dataset(calibration_dataloader) - else: - if calibration_dataset is None: - raise ValueError("Calibration dataset is required to run quantization.") - quantization_dataset = nncf.Dataset(calibration_dataset) + if calibration_dataset is None: + raise ValueError("Calibration dataset is required to run quantization.") # Actual model quantization quantized_model = nncf.quantize( self.model.model, - quantization_dataset, + calibration_dataset, subset_size=quantization_config.num_samples, ignored_scope=quantization_config.get_ignored_scope_instance(), model_type=nncf.ModelType(quantization_config.model_type), @@ -655,6 +667,103 @@ def _remove_unused_columns(self, dataset: "Dataset"): ignored_columns = list(set(dataset.column_names) - set(self._signature_columns)) return dataset.remove_columns(ignored_columns) + def _prepare_gptq_dataset(self, quantization_config: OVWeightQuantizationConfig): + from optimum.gptq.data import get_dataset, prepare_dataset + + tokenizer = AutoTokenizer.from_pretrained(quantization_config.tokenizer) + nsamples = quantization_config.num_samples if quantization_config.num_samples else 128 + calibration_dataset = get_dataset( + quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples + ) + calibration_dataset = prepare_dataset(calibration_dataset) + calibration_dataset = nncf.Dataset(calibration_dataset, lambda x: self.model.prepare_inputs(**x)) + + return calibration_dataset + + def _prepare_text_generation_dataset( + self, + quantization_config: OVQuantizationConfig, + calibration_dataloader: OVDataLoader) -> nncf.Dataset: + # TODO: this function is not covered by tests, remove if not relevant anymore or cover by tests otherwise + + # Prefetch past_key_values + self.model.update_pkv_precision(True) + self.model.compile() + collected_inputs = [] + + num_samples = quantization_config.num_samples or 200 + + self.model.request = InferRequestWrapper(self.model.model.request, collected_inputs) + try: + for data in calibration_dataloader: + self.model.generate(**data, max_new_tokens=1) + if len(collected_inputs) >= num_samples: + break + finally: + self.model.model.request = self.model.model.request.request + calibration_dataset = nncf.Dataset(collected_inputs) + return calibration_dataset + + def _prepare_unet_dataset( + self, + num_samples: Optional[int] = None, + dataset_name: Optional[str] = None, + dataset: Optional[Union[Iterable, "Dataset"]] = None) -> nncf.Dataset: + self.model.compile() + + size = self.model.unet.config.get("sample_size", 64) * self.model.vae_scale_factor + height, width = 2 * (min(size, 512),) + num_samples = num_samples or 200 + + if dataset is not None: + if isinstance(dataset, nncf.Dataset): + return dataset + if Dataset is not None and isinstance(dataset, Dataset): + dataset = dataset.select_columns(["caption"]) + + def transform_fn(data_item): + return data_item if isinstance(data_item, (list, dict)) else [data_item] + + elif isinstance(dataset_name, str): + available_datasets = PREDEFINED_SD_DATASETS.keys() + if dataset_name not in available_datasets: + raise ValueError( + f"""You have entered a string value for dataset. You can only choose between + {list(available_datasets)}, but the {dataset_name} was found""" + ) + + from datasets import load_dataset + + dataset_metadata = PREDEFINED_SD_DATASETS[dataset_name] + dataset = load_dataset(dataset_name, split=dataset_metadata["split"], streaming=True).shuffle(seed=self.seed) + input_names = dataset_metadata["inputs"] + dataset = dataset.select_columns(list(input_names.values())) + + def transform_fn(data_item): + return {inp_name: data_item[column] for inp_name, column in input_names.items()} + + else: + raise ValueError("For UNet inputs collection either quantization_config.dataset or custom " + "calibration_dataset must be provided.") + + calibration_data = [] + try: + self.model.unet.request = InferRequestWrapper(self.model.unet.request, calibration_data) + + for inputs in dataset: + inputs = transform_fn(inputs) + if isinstance(inputs, dict): + self.model(**inputs, height=height, width=width) + else: + self.model(*inputs, height=height, width=width) + if len(calibration_data) >= num_samples: + break + finally: + self.model.unet.request = self.model.unet.request.request + + calibration_dataset = nncf.Dataset(calibration_data[:num_samples]) + return calibration_dataset + def _weight_only_quantization( model: openvino.runtime.Model, @@ -665,11 +774,6 @@ def _weight_only_quantization( if isinstance(config, dict): config = OVWeightQuantizationConfig.from_dict(quantization_config) - if config.dataset is not None and calibration_dataset is not None: - logger.info( - "Both `quantization_config.dataset` and `calibration_dataset` were provided for weight only " - "quantization. Will rely on `calibration_dataset`." - ) dataset = None if calibration_dataset is not None: if Dataset is not None and isinstance(calibration_dataset, Dataset): @@ -752,7 +856,7 @@ def _collect_ops_with_weights(model): def _hybrid_quantization( - model: openvino.runtime.Model, quantization_config: OVWeightQuantizationConfig, dataset: Dict[str, Any] + model: openvino.runtime.Model, quantization_config: OVWeightQuantizationConfig, dataset: nncf.Dataset ) -> openvino.runtime.Model: """ Quantize a model in hybrid mode with NNCF which means that we quantize: @@ -764,7 +868,7 @@ def _hybrid_quantization( The OpenVINO Runtime model for applying hybrid quantization. quantization_config (`OVWeightQuantizationConfig`): The configuration containing the parameters related to quantization. - dataset (`Dict[str, Any]`): + dataset (`nncf.Dataset`): The dataset used for hybrid quantization. Returns: The OpenVINO Runtime model with applied hybrid quantization. @@ -781,7 +885,7 @@ def _hybrid_quantization( subset_size = quantization_config.num_samples if quantization_config.num_samples else 200 quantized_model = nncf.quantize( model=compressed_model, - calibration_dataset=nncf.Dataset(dataset), + calibration_dataset=dataset, model_type=nncf.ModelType.TRANSFORMER, ignored_scope=ptq_ignored_scope, # SQ algo should be disabled for MatMul nodes because their weights are already compressed diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 896f37d767..de6b80827d 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -413,8 +413,10 @@ def test_ovmodel_hybrid_quantization_with_custom_dataset( model = model_cls.from_pretrained( model_id, export=True, - quantization_config=OVWeightQuantizationConfig(bits=8, dataset=dataset, num_samples=3), ) + quantizer = OVQuantizer(model) + quantization_config = OVWeightQuantizationConfig(bits=8, num_samples=3, quant_method=OVQuantizationMethod.HYBRID) + quantizer.quantize(ov_config=OVConfig(quantization_config=quantization_config), calibration_dataset=dataset) num_fake_quantize, num_int8, num_int4 = get_num_quantized_nodes(model.unet) self.assertEqual(expected_num_fake_quantize, num_fake_quantize) self.assertEqual(expected_ov_int8, num_int8) From de9b5c18c0b508422361a6562cbdd90b144aa776 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Mon, 6 May 2024 10:39:36 +0200 Subject: [PATCH 3/7] linters --- optimum/intel/openvino/modeling_diffusion.py | 9 ++-- optimum/intel/openvino/quantization.py | 51 +++++++++++--------- tests/openvino/test_quantization.py | 10 ++-- 3 files changed, 37 insertions(+), 33 deletions(-) diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index ae86ea2bfe..c5afb2c14b 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import copy + import importlib import logging import os @@ -57,14 +57,13 @@ ) from ...exporters.openvino import main_export -from .configuration import OVConfig, OVWeightQuantizationConfig, OVQuantizationMethod +from .configuration import OVConfig, OVQuantizationMethod, OVWeightQuantizationConfig from .loaders import OVTextualInversionLoaderMixin from .modeling_base import OVBaseModel from .utils import ( ONNX_WEIGHTS_NAME, OV_TO_NP_TYPE, OV_XML_FILE_NAME, - PREDEFINED_SD_DATASETS, _print_compiled_model_properties, ) @@ -300,7 +299,7 @@ def _from_pretrained( # load the UNet model uncompressed to apply hybrid quantization further unet = cls.load_model(unet_path) # Apply weights compression to other `components` without dataset - quantization_config_without_dataset = copy.deepcopy(quantization_config) + quantization_config_without_dataset = deepcopy(quantization_config) quantization_config_without_dataset.dataset = None else: quantization_config_without_dataset = quantization_config @@ -333,7 +332,7 @@ def _from_pretrained( from optimum.intel import OVQuantizer quantizer = OVQuantizer(sd_model) - quantization_config_copy = copy.deepcopy(quantization_config) + quantization_config_copy = deepcopy(quantization_config) quantization_config_copy.quant_method = OVQuantizationMethod.HYBRID quantizer.quantize(ov_config=OVConfig(quantization_config=quantization_config_copy)) diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index f2258864ae..a749f38e66 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -50,7 +50,7 @@ from ..utils.constant import _TASK_ALIASES from ..utils.import_utils import DATASETS_IMPORT_ERROR, is_datasets_available from ..utils.modeling_utils import get_model_device -from .configuration import OVConfig, OVQuantizationConfig, OVWeightQuantizationConfig, OVQuantizationMethod +from .configuration import OVConfig, OVQuantizationConfig, OVQuantizationMethod, OVWeightQuantizationConfig from .modeling_base import OVBaseModel from .utils import ( MAX_ONNX_OPSET, @@ -339,8 +339,8 @@ def _quantize_ovbasemodel( if isinstance(self.model, OVStableDiffusionPipelineBase): calibration_dataset = self._prepare_unet_dataset( - quantization_config.num_samples, - dataset=calibration_dataset) + quantization_config.num_samples, dataset=calibration_dataset + ) elif Dataset is not None and isinstance(calibration_dataset, Dataset): calibration_dataloader = self._get_calibration_dataloader( calibration_dataset=calibration_dataset, @@ -351,14 +351,17 @@ def _quantize_ovbasemodel( if self.model.export_feature == "text-generation" and self.model.use_cache: calibration_dataset = self._prepare_text_generation_dataset( - quantization_config, calibration_dataloader) + quantization_config, calibration_dataloader + ) else: calibration_dataset = nncf.Dataset(calibration_dataloader) elif isinstance(calibration_dataset, collections.abc.Iterable): calibration_dataset = nncf.Dataset(calibration_dataset) elif not isinstance(calibration_dataset, nncf.Dataset): - raise ValueError("`calibration_dataset` must be either an `Iterable` object or an instance of " - f"`nncf.Dataset` or `datasets.Dataset`. Found: {type(calibration_dataset)}.") + raise ValueError( + "`calibration_dataset` must be either an `Iterable` object or an instance of " + f"`nncf.Dataset` or `datasets.Dataset`. Found: {type(calibration_dataset)}." + ) if isinstance(quantization_config, OVWeightQuantizationConfig): if quantization_config.dataset is not None and calibration_dataset is not None: @@ -374,8 +377,8 @@ def _quantize_ovbasemodel( calibration_dataset = self._prepare_gptq_dataset(quantization_config) elif isinstance(self.model, OVStableDiffusionPipelineBase): calibration_dataset = self._prepare_unet_dataset( - quantization_config.num_samples, - dataset_name=quantization_config.dataset) + quantization_config.num_samples, dataset_name=quantization_config.dataset + ) else: raise ValueError( f"Can't create weight compression calibration dataset from string for {type(self.model)}" @@ -385,7 +388,9 @@ def _quantize_ovbasemodel( if calibration_dataset is None: raise ValueError("Calibration dataset is required to run hybrid quantization.") if isinstance(self.model, OVStableDiffusionPipelineBase): - self.model.unet.model = _hybrid_quantization(self.model.unet.model, quantization_config, calibration_dataset) + self.model.unet.model = _hybrid_quantization( + self.model.unet.model, quantization_config, calibration_dataset + ) else: self.model.model = _hybrid_quantization(self.model.model, quantization_config, calibration_dataset) else: @@ -672,18 +677,15 @@ def _prepare_gptq_dataset(self, quantization_config: OVWeightQuantizationConfig) tokenizer = AutoTokenizer.from_pretrained(quantization_config.tokenizer) nsamples = quantization_config.num_samples if quantization_config.num_samples else 128 - calibration_dataset = get_dataset( - quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples - ) + calibration_dataset = get_dataset(quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples) calibration_dataset = prepare_dataset(calibration_dataset) calibration_dataset = nncf.Dataset(calibration_dataset, lambda x: self.model.prepare_inputs(**x)) return calibration_dataset def _prepare_text_generation_dataset( - self, - quantization_config: OVQuantizationConfig, - calibration_dataloader: OVDataLoader) -> nncf.Dataset: + self, quantization_config: OVQuantizationConfig, calibration_dataloader: OVDataLoader + ) -> nncf.Dataset: # TODO: this function is not covered by tests, remove if not relevant anymore or cover by tests otherwise # Prefetch past_key_values @@ -705,10 +707,11 @@ def _prepare_text_generation_dataset( return calibration_dataset def _prepare_unet_dataset( - self, - num_samples: Optional[int] = None, - dataset_name: Optional[str] = None, - dataset: Optional[Union[Iterable, "Dataset"]] = None) -> nncf.Dataset: + self, + num_samples: Optional[int] = None, + dataset_name: Optional[str] = None, + dataset: Optional[Union[Iterable, "Dataset"]] = None, + ) -> nncf.Dataset: self.model.compile() size = self.model.unet.config.get("sample_size", 64) * self.model.vae_scale_factor @@ -735,7 +738,9 @@ def transform_fn(data_item): from datasets import load_dataset dataset_metadata = PREDEFINED_SD_DATASETS[dataset_name] - dataset = load_dataset(dataset_name, split=dataset_metadata["split"], streaming=True).shuffle(seed=self.seed) + dataset = load_dataset(dataset_name, split=dataset_metadata["split"], streaming=True).shuffle( + seed=self.seed + ) input_names = dataset_metadata["inputs"] dataset = dataset.select_columns(list(input_names.values())) @@ -743,8 +748,10 @@ def transform_fn(data_item): return {inp_name: data_item[column] for inp_name, column in input_names.items()} else: - raise ValueError("For UNet inputs collection either quantization_config.dataset or custom " - "calibration_dataset must be provided.") + raise ValueError( + "For UNet inputs collection either quantization_config.dataset or custom " + "calibration_dataset must be provided." + ) calibration_data = [] try: diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index de6b80827d..98eb121d72 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -21,21 +21,17 @@ from collections import defaultdict from enum import Enum from functools import partial -from typing import List, Union +from typing import Union import evaluate import numpy as np import torch from datasets import load_dataset -from nncf.quantization.advanced_parameters import OverflowFix from parameterized import parameterized -import openvino.runtime as ov import nncf from transformers import ( AutoModelForQuestionAnswering, AutoModelForSequenceClassification, - AutoModelForCausalLM, - AutoModelForTokenClassification, AutoTokenizer, AutoProcessor, TrainingArguments, @@ -415,7 +411,9 @@ def test_ovmodel_hybrid_quantization_with_custom_dataset( export=True, ) quantizer = OVQuantizer(model) - quantization_config = OVWeightQuantizationConfig(bits=8, num_samples=3, quant_method=OVQuantizationMethod.HYBRID) + quantization_config = OVWeightQuantizationConfig( + bits=8, num_samples=3, quant_method=OVQuantizationMethod.HYBRID + ) quantizer.quantize(ov_config=OVConfig(quantization_config=quantization_config), calibration_dataset=dataset) num_fake_quantize, num_int8, num_int4 = get_num_quantized_nodes(model.unet) self.assertEqual(expected_num_fake_quantize, num_fake_quantize) From 4a007f5adad1b2bbbf7dd0587b6f3b0280032b71 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Mon, 6 May 2024 13:45:27 +0200 Subject: [PATCH 4/7] Addressed comments --- optimum/intel/openvino/quantization.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index a749f38e66..d1f28b2905 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -341,7 +341,7 @@ def _quantize_ovbasemodel( calibration_dataset = self._prepare_unet_dataset( quantization_config.num_samples, dataset=calibration_dataset ) - elif Dataset is not None and isinstance(calibration_dataset, Dataset): + elif is_datasets_available() and isinstance(calibration_dataset, Dataset): calibration_dataloader = self._get_calibration_dataloader( calibration_dataset=calibration_dataset, batch_size=batch_size, @@ -374,7 +374,7 @@ def _quantize_ovbasemodel( from optimum.intel import OVModelForCausalLM if isinstance(self.model, OVModelForCausalLM): - calibration_dataset = self._prepare_gptq_dataset(quantization_config) + calibration_dataset = self._prepare_builtin_dataset(quantization_config) elif isinstance(self.model, OVStableDiffusionPipelineBase): calibration_dataset = self._prepare_unet_dataset( quantization_config.num_samples, dataset_name=quantization_config.dataset @@ -392,6 +392,7 @@ def _quantize_ovbasemodel( self.model.unet.model, quantization_config, calibration_dataset ) else: + # This may be for example OVModelForImageClassification, OVModelForAudioClassification, etc. self.model.model = _hybrid_quantization(self.model.model, quantization_config, calibration_dataset) else: _weight_only_quantization(self.model.model, quantization_config, calibration_dataset) @@ -672,7 +673,7 @@ def _remove_unused_columns(self, dataset: "Dataset"): ignored_columns = list(set(dataset.column_names) - set(self._signature_columns)) return dataset.remove_columns(ignored_columns) - def _prepare_gptq_dataset(self, quantization_config: OVWeightQuantizationConfig): + def _prepare_builtin_dataset(self, quantization_config: OVWeightQuantizationConfig): from optimum.gptq.data import get_dataset, prepare_dataset tokenizer = AutoTokenizer.from_pretrained(quantization_config.tokenizer) @@ -721,7 +722,7 @@ def _prepare_unet_dataset( if dataset is not None: if isinstance(dataset, nncf.Dataset): return dataset - if Dataset is not None and isinstance(dataset, Dataset): + if is_datasets_available() and isinstance(dataset, Dataset): dataset = dataset.select_columns(["caption"]) def transform_fn(data_item): @@ -783,7 +784,7 @@ def _weight_only_quantization( dataset = None if calibration_dataset is not None: - if Dataset is not None and isinstance(calibration_dataset, Dataset): + if is_datasets_available() and isinstance(calibration_dataset, Dataset): raise ValueError( "Providing calibration dataset as an instance of `datasets.Dataset` for OV weight-only " "quantization is not supported. Please provide it as `nncf.Dataset` or as iterable of " From 583e43514ba0721fd7dfd87a75fe8f627f4fef58 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Mon, 6 May 2024 13:46:46 +0200 Subject: [PATCH 5/7] Updated SD HQ notebook --- .../stable_diffusion_hybrid_quantization.ipynb | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/notebooks/openvino/stable_diffusion_hybrid_quantization.ipynb b/notebooks/openvino/stable_diffusion_hybrid_quantization.ipynb index 41969b162a..efe413a9ec 100644 --- a/notebooks/openvino/stable_diffusion_hybrid_quantization.ipynb +++ b/notebooks/openvino/stable_diffusion_hybrid_quantization.ipynb @@ -52,7 +52,7 @@ "import transformers\n", "from pathlib import Path\n", "from openvino.runtime import Core\n", - "from optimum.intel import OVStableDiffusionPipeline, OVWeightQuantizationConfig\n", + "from optimum.intel import OVConfig, OVQuantizer, OVStableDiffusionPipeline, OVWeightQuantizationConfig\n", "\n", "transformers.logging.set_verbosity_error()\n", "datasets.logging.set_verbosity_error()" @@ -198,9 +198,14 @@ }, "outputs": [], "source": [ - "quantization_config = OVWeightQuantizationConfig(bits=8, dataset=calibration_dataset, num_samples=NUM_SAMPLES)\n", - "int8_pipe = OVStableDiffusionPipeline.from_pretrained(model_id=MODEL_ID, export=True, quantization_config=quantization_config)\n", - "int8_pipe.save_pretrained(int8_model_path)" + "quantization_config = OVWeightQuantizationConfig(bits=8, num_samples=NUM_SAMPLES)\n", + "int8_pipe = OVStableDiffusionPipeline.from_pretrained(model_id=MODEL_ID, export=True)\n", + "quantizer = OVQuantizer(int8_pipe)\n", + "quantizer.quantize(\n", + " ov_config=OVConfig(quantization_config=quantization_config),\n", + " calibration_dataset=calibration_dataset,\n", + " save_directory=int8_model_path\n", + ")" ] }, { @@ -613,7 +618,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.8.10" } }, "nbformat": 4, From 349350c2c7524aa4be33c0baf680a1e45f894745 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Mon, 6 May 2024 16:10:22 +0200 Subject: [PATCH 6/7] Quantize SD submodels in OVQuantizer --- ...stable_diffusion_hybrid_quantization.ipynb | 5 +-- optimum/intel/openvino/modeling_diffusion.py | 33 ++++++++----------- optimum/intel/openvino/quantization.py | 14 ++++++-- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/notebooks/openvino/stable_diffusion_hybrid_quantization.ipynb b/notebooks/openvino/stable_diffusion_hybrid_quantization.ipynb index efe413a9ec..142cde4923 100644 --- a/notebooks/openvino/stable_diffusion_hybrid_quantization.ipynb +++ b/notebooks/openvino/stable_diffusion_hybrid_quantization.ipynb @@ -53,6 +53,7 @@ "from pathlib import Path\n", "from openvino.runtime import Core\n", "from optimum.intel import OVConfig, OVQuantizer, OVStableDiffusionPipeline, OVWeightQuantizationConfig\n", + "from optimum.intel.openvino.configuration import OVQuantizationMethod\n", "\n", "transformers.logging.set_verbosity_error()\n", "datasets.logging.set_verbosity_error()" @@ -198,8 +199,8 @@ }, "outputs": [], "source": [ - "quantization_config = OVWeightQuantizationConfig(bits=8, num_samples=NUM_SAMPLES)\n", "int8_pipe = OVStableDiffusionPipeline.from_pretrained(model_id=MODEL_ID, export=True)\n", + "quantization_config = OVWeightQuantizationConfig(bits=8, num_samples=NUM_SAMPLES, quant_method=OVQuantizationMethod.HYBRID)\n", "quantizer = OVQuantizer(int8_pipe)\n", "quantizer.quantize(\n", " ov_config=OVConfig(quantization_config=quantization_config),\n", @@ -618,7 +619,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index c5afb2c14b..c92d20e3ea 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -292,19 +292,7 @@ def _from_pretrained( else: kwargs[name] = load_method(new_model_save_dir) - quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit) - unet_path = new_model_save_dir / DIFFUSION_MODEL_UNET_SUBFOLDER / unet_file_name - if quantization_config is not None and quantization_config.dataset is not None: - # load the UNet model uncompressed to apply hybrid quantization further - unet = cls.load_model(unet_path) - # Apply weights compression to other `components` without dataset - quantization_config_without_dataset = deepcopy(quantization_config) - quantization_config_without_dataset.dataset = None - else: - quantization_config_without_dataset = quantization_config - unet = cls.load_model(unet_path, quantization_config_without_dataset) - components = { "vae_encoder": new_model_save_dir / DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER / vae_encoder_file_name, "vae_decoder": new_model_save_dir / DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER / vae_decoder_file_name, @@ -312,13 +300,19 @@ def _from_pretrained( "text_encoder_2": new_model_save_dir / DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER / text_encoder_2_file_name, } - for key, value in components.items(): - components[key] = cls.load_model(value, quantization_config_without_dataset) if value.is_file() else None - if model_save_dir is None: model_save_dir = new_model_save_dir - if quantization_config is not None and quantization_config.dataset is not None: + quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit) + if quantization_config is None or quantization_config.dataset is None: + unet = cls.load_model(unet_path, quantization_config) + for key, value in components.items(): + components[key] = cls.load_model(value, quantization_config) if value.is_file() else None + else: + # Load uncompressed models to apply hybrid quantization further + unet = cls.load_model(unet_path) + for key, value in components.items(): + components[key] = cls.load_model(value) if value.is_file() else None sd_model = cls(unet=unet, config=config, model_save_dir=model_save_dir, **components, **kwargs) supported_pipelines = ( @@ -331,10 +325,10 @@ def _from_pretrained( from optimum.intel import OVQuantizer + hybrid_quantization_config = deepcopy(quantization_config) + hybrid_quantization_config.quant_method = OVQuantizationMethod.HYBRID quantizer = OVQuantizer(sd_model) - quantization_config_copy = deepcopy(quantization_config) - quantization_config_copy.quant_method = OVQuantizationMethod.HYBRID - quantizer.quantize(ov_config=OVConfig(quantization_config=quantization_config_copy)) + quantizer.quantize(ov_config=OVConfig(quantization_config=hybrid_quantization_config)) return sd_model @@ -347,6 +341,7 @@ def _from_pretrained( **kwargs, ) + @classmethod def _from_transformers( cls, diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index d1f28b2905..45961a86ff 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -63,8 +63,6 @@ if is_datasets_available(): from datasets import Dataset -else: - Dataset = None register_module(ignored_algorithms=[])(Conv1D) @@ -388,11 +386,21 @@ def _quantize_ovbasemodel( if calibration_dataset is None: raise ValueError("Calibration dataset is required to run hybrid quantization.") if isinstance(self.model, OVStableDiffusionPipelineBase): + # Apply weight-only quantization to all SD submodels except UNet + quantization_config_copy = copy.deepcopy(quantization_config) + quantization_config_copy.dataset = None + quantization_config_copy.quant_method = OVQuantizationMethod.DEFAULT + for sd_submodel_name in ["vae_encoder", "vae_decoder", "text_encoder", "text_encoder_2"]: + sd_submodel = getattr(self.model, sd_submodel_name) + if sd_submodel is not None: + _weight_only_quantization(sd_submodel.model, quantization_config_copy) + + # Apply hybrid quantization to UNet self.model.unet.model = _hybrid_quantization( self.model.unet.model, quantization_config, calibration_dataset ) else: - # This may be for example OVModelForImageClassification, OVModelForAudioClassification, etc. + # The model may be for example OVModelForImageClassification, OVModelForAudioClassification, etc. self.model.model = _hybrid_quantization(self.model.model, quantization_config, calibration_dataset) else: _weight_only_quantization(self.model.model, quantization_config, calibration_dataset) From 068236dcb585c8af01f4b76793a2aaed5e58ca0b Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Mon, 6 May 2024 17:13:33 +0200 Subject: [PATCH 7/7] Black --- optimum/intel/openvino/modeling_diffusion.py | 1 - 1 file changed, 1 deletion(-) diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index c92d20e3ea..1b880e736c 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -341,7 +341,6 @@ def _from_pretrained( **kwargs, ) - @classmethod def _from_transformers( cls,