diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py index ee1f62388f..54d4878197 100644 --- a/optimum/commands/export/openvino.py +++ b/optimum/commands/export/openvino.py @@ -189,14 +189,6 @@ def parse_args_openvino(parser: "ArgumentParser"): action="store_true", help="Do not add converted tokenizer and detokenizer OpenVINO models.", ) - # TODO : deprecated - optional_group.add_argument("--fp16", action="store_true", help="Compress weights to fp16") - optional_group.add_argument("--int8", action="store_true", help="Compress weights to int8") - optional_group.add_argument( - "--convert-tokenizer", - action="store_true", - help="[Deprecated] Add converted tokenizer and detokenizer with OpenVINO Tokenizers.", - ) class OVExportCommand(BaseOptimumCLICommand): @@ -243,17 +235,6 @@ def _get_default_int4_config(model_id_or_path, library_name): ) library_name = "transformers" - if self.args.fp16: - logger.warning( - "`--fp16` option is deprecated and will be removed in a future version. Use `--weight-format` instead." - ) - self.args.weight_format = "fp16" - if self.args.int8: - logger.warning( - "`--int8` option is deprecated and will be removed in a future version. Use `--weight-format` instead." - ) - self.args.weight_format = "int8" - if self.args.weight_format is None: ov_config = None elif self.args.weight_format in {"fp16", "fp32"}: @@ -296,9 +277,6 @@ def _get_default_int4_config(model_id_or_path, library_name): quantization_config["group_size"] = 128 if "128" in self.args.weight_format else 64 ov_config = OVConfig(quantization_config=quantization_config) - if self.args.convert_tokenizer: - logger.warning("`--convert-tokenizer` option is deprecated. Tokenizer will be converted by default.") - quantization_config = ov_config.quantization_config if ov_config else None quantize_with_dataset = quantization_config and getattr(quantization_config, "dataset", None) is not None task = infer_task(self.args.task, self.args.model) diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 8568f4d4df..a5171e6ae7 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -193,11 +193,6 @@ def __init__(self, model: transformers.PreTrainedModel, task: Optional[str] = No signature = inspect.signature(self.model.forward) self._signature_columns = list(signature.parameters.keys()) - @property - def input_names(self): - logger.warning("The`input_names` attribute is deprecated and will be removed in v1.18.0") - return None - @classmethod def from_pretrained(cls, model: PreTrainedModel, **kwargs): # TODO : Create model @@ -212,7 +207,6 @@ def quantize( batch_size: int = 1, data_collator: Optional[DataCollator] = None, remove_unused_columns: bool = True, - weights_only: bool = None, **kwargs, ): """ @@ -235,10 +229,6 @@ def quantize( The function to use to form a batch from a list of elements of the calibration dataset. remove_unused_columns (`bool`, defaults to `True`): Whether to remove the columns unused by the model forward method. - weights_only (`bool`, *optional*): - Being deprecated. - Compress weights to integer precision (8-bit by default) while keeping activations - floating-point. Fits best for LLM footprint reduction and performance acceleration. Examples: ```python @@ -263,32 +253,20 @@ def quantize( >>> optimized_model = OVModelForSequenceClassification.from_pretrained("./quantized_model") ``` """ - # TODO: deprecate weights_only argument - if weights_only is not None: - logger.warning( - "`weights_only` argument is deprecated and will be removed in v1.18.0. In the future please provide `ov_config.quantization_config` " - "as an instance of `OVWeightQuantizationConfig` for weight-only compression or as an instance of `OVQuantizationConfig` for full model quantization." - ) - if ov_config is None: ov_config = OVConfig() if not isinstance(ov_config, OVConfig): raise TypeError(f"`ov_config` should be an `OVConfig`, but got: {type(ov_config)} instead.") quantization_config = ov_config.quantization_config if quantization_config is None: - if (weights_only is None or weights_only is True) and calibration_dataset is None: - if weights_only is None: - logger.info( - "`quantization_config` was not provided, 8-bit asymmetric weight quantization will be applied." - ) - ov_config.quantization_config = OVWeightQuantizationConfig(bits=8) - else: - logger.warning( - "`quantization_config` was not provided, but calibration dataset was provided, assuming full " - "model quantization is intended. In the future, please provide `quantization_config` as an " - "instance of OVQuantizationConfig." - ) - ov_config.quantization_config = OVQuantizationConfig() + logger.warning( + "`quantization_config` was not provided. In the future, please provide `quantization_config`" + ) + ov_config.quantization_config = ( + OVWeightQuantizationConfig(bits=8) + if calibration_dataset is None + else OVWeightQuantizationConfig(bits=8) + ) if isinstance(self.model, OVBaseModel): self._quantize_ovbasemodel(