Update minimum itrex version (#675)

* Update minimum itrex version * Trigger test * trigger test * trigger test * fix * remove tests itrex version contraint
huggingface · Apr 23, 2024 · a0dc06c · a0dc06c
1 parent 00581ab
commit a0dc06c
Show file tree

Hide file tree

Showing 8 changed files with 47 additions and 162 deletions.
diff --git a/.github/workflows/test_inc.yml b/.github/workflows/test_inc.yml
@@ -33,7 +33,7 @@ jobs:
         pip install cmake
         pip install py-cpuinfo
         pip install .[neural-compressor,diffusers,tests]
-        pip install intel-extension-for-transformers==1.4.0
+        pip install intel-extension-for-transformers
         pip install peft
 
     - name: Test with Pytest

diff --git a/examples/neural_compressor/language-modeling/run_clm.py b/examples/neural_compressor/language-modeling/run_clm.py
@@ -57,13 +57,10 @@
 from transformers.utils.versions import require_version
 
 from optimum.intel.neural_compressor import INCModelForCausalLM, INCQuantizer, INCTrainer
-from optimum.intel.utils.import_utils import (
-    INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR,
-    is_intel_extension_for_transformers_available,
-)
+from optimum.intel.utils.import_utils import ITREX_IMPORT_ERROR, is_itrex_available
 
 
-if is_intel_extension_for_transformers_available():
+if is_itrex_available():
     from intel_extension_for_transformers.transformers.utils.config import GPTQConfig, RtnConfig
 
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -658,8 +655,8 @@ def compute_metrics(eval_preds):
             else:
                 recipes = {}
             if optim_args.quantization_approach == "weight_only":
-                if not is_intel_extension_for_transformers_available():
-                    raise ImportError(INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR.format("WeightOnly quantization"))
+                if not is_itrex_available():
+                    raise ImportError(ITREX_IMPORT_ERROR.format("WeightOnly quantization"))
                 if optim_args.apply_pruning or optim_args.apply_distillation:
                     raise ValueError("Weight only quantization and pruning or distillation cannot be combined.")
 

diff --git a/optimum/intel/neural_compressor/__init__.py b/optimum/intel/neural_compressor/__init__.py
@@ -12,7 +12,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-from ..utils.import_utils import is_diffusers_available, is_intel_extension_for_transformers_available
+from ..utils.import_utils import is_diffusers_available
 from .configuration import INCConfig
 from .modeling_base import (
     INCModel,

diff --git a/optimum/intel/neural_compressor/modeling_base.py b/optimum/intel/neural_compressor/modeling_base.py
@@ -44,11 +44,7 @@
 from optimum.intel.generation import BaseModelForCausalLM
 
 from ...modeling_base import OptimizedModel
-from ..utils.import_utils import (
-    _torch_version,
-    is_intel_extension_for_transformers_available,
-    is_torch_version,
-)
+from ..utils.import_utils import _torch_version, is_itrex_available, is_torch_version
 from .configuration import INCConfig
 from .utils import WEIGHTS_NAME
 
@@ -136,7 +132,7 @@ def _from_pretrained(
         model_save_dir = Path(model_cache_path).parent
         inc_config = None
         msg = None
-        if is_intel_extension_for_transformers_available():
+        if is_itrex_available():
             try:
                 quantization_config = PretrainedConfig.from_pretrained(model_save_dir / "quantize_config.json")
                 algorithm = getattr(quantization_config, "quant_method", None)

diff --git a/optimum/intel/neural_compressor/quantization.py b/optimum/intel/neural_compressor/quantization.py
@@ -19,11 +19,10 @@
 from enum import Enum
 from itertools import chain
 from pathlib import Path
-from typing import Callable, Dict, Optional, Union
+from typing import Callable, Optional, Union
 
 import torch
 from datasets import Dataset, load_dataset
-from neural_compressor.adaptor.pytorch import PyTorch_FXAdaptor, _cfg_to_qconfig, _propagate_qconfig
 from neural_compressor.config import PostTrainingQuantConfig
 from neural_compressor.experimental.export import torch_to_int8_onnx
 from neural_compressor.model.onnx_model import ONNXModel
@@ -47,14 +46,14 @@
 
 from ..utils.constant import _TASK_ALIASES, MIN_QDQ_ONNX_OPSET, ONNX_WEIGHTS_NAME, WEIGHTS_NAME
 from ..utils.import_utils import (
-    INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR,
-    _intel_extension_for_transformers_version,
+    ITREX_IMPORT_ERROR,
     _ipex_version,
+    _itrex_version,
     _neural_compressor_version,
     _torch_version,
-    is_intel_extension_for_transformers_available,
-    is_intel_extension_for_transformers_version,
     is_ipex_version,
+    is_itrex_available,
+    is_itrex_version,
     is_neural_compressor_version,
     is_torch_version,
 )
@@ -69,16 +68,21 @@
     INCModelForTokenClassification,
     INCModelForVision2Seq,
 )
-from .utils import INCDataLoader, _cfgs_to_fx_cfgs
-
+from .utils import (
+    IPEX_MINIMUM_VERSION,
+    ITREX_MINIMUM_TORCH_VERSION,
+    ITREX_MINIMUM_VERSION,
+    NEURAL_COMPRESSOR_MINIMUM_VERSION,
+    NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION,
+    INCDataLoader,
+)
 
-INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION = "1.4.0"
 
-if is_intel_extension_for_transformers_available():
-    if is_intel_extension_for_transformers_version("!=", INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION):
+if is_itrex_available():
+    if is_itrex_version("<", ITREX_MINIMUM_VERSION):
         raise ImportError(
-            f"Found an incompatible version of `intel-extension-for-transformers`. Found version {_intel_extension_for_transformers_version}, "
-            f"but only version {INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION} is supported."
+            f"Found an incompatible version of `intel-extension-for-transformers`. Found version {_itrex_version}, "
+            f"but only version {ITREX_MINIMUM_VERSION} or higher is supported."
         )
     from intel_extension_for_transformers.transformers.llm.quantization.utils import convert_to_quantized_model
     from intel_extension_for_transformers.transformers.modeling.modeling_auto import save_low_bit
@@ -92,10 +96,6 @@
 
 logger = logging.getLogger(__name__)
 
-NEURAL_COMPRESSOR_MINIMUM_VERSION = "2.1.0"
-NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION = "2.3.0"
-IPEX_MINIMUM_VERSION = "2.1.0"
-ITREX_MINIMUM_TORCH_VERSION = "2.2.0"
 
 if is_neural_compressor_version("<", NEURAL_COMPRESSOR_MINIMUM_VERSION):
     raise ImportError(
@@ -231,8 +231,8 @@ def quantize(
                     f"Found an incompatible version of neural-compressor. Found version {_neural_compressor_version}, "
                     f"but only version {NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION} or higher supports weight-only quantization."
                 )
-            if not is_intel_extension_for_transformers_available():
-                raise ImportError(INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR.format("Weight only quantization"))
+            if not is_itrex_available():
+                raise ImportError(ITREX_IMPORT_ERROR.format("Weight only quantization"))
 
             if is_torch_version("<", ITREX_MINIMUM_TORCH_VERSION):
                 raise ImportError(
@@ -514,70 +514,3 @@ def _get_calibration_dataloader(
     def _remove_unused_columns(self, dataset: Dataset):
         ignored_columns = list(set(dataset.column_names) - set(self._signature_columns))
         return dataset.remove_columns(ignored_columns)
-
-
-# Adapted from https://github.com/intel/neural-compressor/blob/master/neural_compressor/utils/pytorch.py#L96
-def _apply_quantization_from_config(q_config: Dict, model: torch.nn.Module) -> torch.nn.Module:
-    """
-    Apply Intel Neural Compressor quantization steps on the given model.
-
-    Arguments:
-        q_config (`Dict`):
-            Dictionary containing all quantization information such as approach, dtype, scheme and granularity.
-        model (`torch.nn.Module`):
-            Model to quantize.
-    Returns:
-        q_model (`torch.nn.Module`):
-            Quantized model.
-    """
-    from torch.quantization import add_observer_, convert
-    from torch.quantization.quantize_fx import convert_fx, prepare_fx, prepare_qat_fx
-
-    approach = q_config.get("approach")
-    framework = q_config.get("framework")
-
-    if approach not in SUPPORTED_QUANT_MODE:
-        raise ValueError(
-            "Unknown quantization approach. Supported approach are " + ", ".join(SUPPORTED_QUANT_MODE.keys())
-        )
-
-    quant_mode = INCQuantizationMode(approach)
-    q_model = copy.deepcopy(model)
-    q_model.eval()
-
-    if framework == "pytorch_fx":
-        op_cfgs = _cfg_to_qconfig(q_config, approach)
-        fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, approach)
-
-        if not q_config["fx_sub_module_list"]:
-            if quant_mode == INCQuantizationMode.AWARE_TRAINING:
-                q_model.train()
-                q_model = prepare_qat_fx(q_model, fx_op_cfgs)
-            else:
-                q_model = prepare_fx(q_model, fx_op_cfgs)
-            q_model = convert_fx(q_model)
-
-        else:
-            sub_module_list = q_config["fx_sub_module_list"]
-            if q_config["approach"] == "quant_aware_training":
-                q_model.train()
-                PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, fx_op_cfgs, q_model, prefix="", is_qat=True)
-            else:
-                PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, fx_op_cfgs, q_model, prefix="")
-            PyTorch_FXAdaptor.convert_sub_graph(sub_module_list, q_model, prefix="")
-
-    else:
-        if quant_mode == INCQuantizationMode.DYNAMIC:
-            q_mapping = torch.quantization.quantization_mappings.get_default_dynamic_quant_module_mappings()
-            op_cfgs = _cfg_to_qconfig(q_config, approach)
-        else:
-            q_mapping = torch.quantization.quantization_mappings.get_default_static_quant_module_mappings()
-            op_cfgs = _cfg_to_qconfig(q_config)
-
-        _propagate_qconfig(q_model, op_cfgs, approach=approach)
-
-        if quant_mode != INCQuantizationMode.DYNAMIC:
-            add_observer_(q_model)
-        q_model = convert(q_model, mapping=q_mapping, inplace=True)
-
-    return q_model
diff --git a/optimum/intel/neural_compressor/utils.py b/optimum/intel/neural_compressor/utils.py
@@ -16,11 +16,9 @@
 import os
 import warnings
 from collections import UserDict
-from typing import Dict
 
 import torch
 from neural_compressor.utils.pytorch import load
-from packaging import version
 from torch.utils.data import DataLoader
 
 from ..utils.constant import WEIGHTS_NAME
@@ -31,6 +29,12 @@
 
 CONFIG_NAME = "best_configure.yaml"
 
+NEURAL_COMPRESSOR_MINIMUM_VERSION = "2.1.0"
+NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION = "2.3.0"
+IPEX_MINIMUM_VERSION = "2.1.0"
+ITREX_MINIMUM_VERSION = "1.4.0"
+ITREX_MINIMUM_TORCH_VERSION = "2.2.0"
+
 
 _HEAD_TO_AUTOMODELS = {
     "fill-mask": "INCModelForMaskedLM",
@@ -45,10 +49,6 @@
 }
 
 
-parsed_torch_version_base = version.parse(version.parse(torch.__version__).base_version)
-is_torch_less_than_1_13 = parsed_torch_version_base < version.parse("1.13.0")
-
-
 class INCDataLoader(DataLoader):
     use_label = True
 
@@ -73,44 +73,6 @@ def __iter__(self):
                 yield input
 
 
-def _cfgs_to_fx_cfgs(op_cfgs: Dict, observer_type: str = "post_training_static_quant") -> Dict:
-    """Inc function which convert a quantization config to a format that meets the requirements of torch.fx.
-
-    Arguments:
-        op_cfgs (`dict`):
-            Dictionary of quantization configure for each op.
-        observer_type (`str`):
-            Specify observer type.
-    Returns:
-        fx_op_cfgs (`dict`):
-            Dictionary of quantization configure that meets the requirements of torch.fx.
-    """
-    if not is_torch_less_than_1_13:
-        from torch.ao.quantization import QConfigMapping
-
-        fx_op_cfgs = QConfigMapping()
-    else:
-        fx_op_cfgs = {}
-        op_tuple_cfg_list = []
-    for key, value in op_cfgs.items():
-        if key == "default_qconfig":
-            if not is_torch_less_than_1_13:
-                fx_op_cfgs.set_global(value)
-            else:
-                fx_op_cfgs[""] = value
-            continue
-        if not is_torch_less_than_1_13:
-            fx_op_cfgs.set_module_name(key, value)
-        else:
-            op_tuple = (key, value)
-            op_tuple_cfg_list.append(op_tuple)
-
-    if is_torch_less_than_1_13:
-        fx_op_cfgs["module_name"] = op_tuple_cfg_list
-
-    return fx_op_cfgs
-
-
 def load_quantized_model(checkpoint_dir_or_file: str, model: torch.nn.Module, **kwargs) -> torch.nn.Module:
     """
     Returns the quantized model, which was quantized through neural_compressor.

diff --git a/optimum/intel/utils/import_utils.py b/optimum/intel/utils/import_utils.py
@@ -61,14 +61,14 @@
         _neural_compressor_available = False
 
 
-_intel_extension_for_transformers_available = importlib.util.find_spec("intel_extension_for_transformers") is not None
-_intel_extension_for_transformers_version = "N/A"
-if _intel_extension_for_transformers_available:
+_itrex_available = importlib.util.find_spec("intel_extension_for_transformers") is not None
+_itrex_version = "N/A"
+if _itrex_available:
     try:
-        _intel_extension_for_transformers_version = importlib_metadata.version("intel_extension_for_transformers")
+        _itrex_version = importlib_metadata.version("intel_extension_for_transformers")
         logging.warn("`transformers` version >= 4.31 is requirements by intel-extension-for-transformers.")
     except importlib_metadata.PackageNotFoundError:
-        _intel_extension_for_transformers_available = False
+        _itrex_available = False
 
 
 _ipex_available = importlib.util.find_spec("intel_extension_for_pytorch") is not None
@@ -158,8 +158,8 @@ def is_neural_compressor_available():
     return _neural_compressor_available
 
 
-def is_intel_extension_for_transformers_available():
-    return _intel_extension_for_transformers_available
+def is_itrex_available():
+    return _itrex_available
 
 
 def is_ipex_available():
@@ -314,13 +314,13 @@ def is_neural_compressor_version(operation: str, version: str):
     return compare_versions(parse(_neural_compressor_version), operation, version)
 
 
-def is_intel_extension_for_transformers_version(operation: str, version: str):
+def is_itrex_version(operation: str, version: str):
     """
     Compare the current intel_extension_for_transformers version to a given reference with an operation.
     """
-    if not _intel_extension_for_transformers_available:
+    if not _itrex_available:
         return False
-    return compare_versions(parse(_intel_extension_for_transformers_version), operation, version)
+    return compare_versions(parse(_itrex_version), operation, version)
 
 
 def is_openvino_version(operation: str, version: str):
@@ -396,7 +396,7 @@ def is_timm_version(operation: str, version: str):
 `pip install neural-compressor`. Please note that you may need to restart your runtime after installation.
 """
 
-INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR = """
+ITREX_IMPORT_ERROR = """
 {0} requires the intel-extension-for-transformers library but it was not found in your environment. You can install it with pip:
 `pip install intel-extension-for-transformers` and `pip install peft`. Please note that you may need to restart your runtime after installation.
 """
@@ -418,10 +418,7 @@ def is_timm_version(operation: str, version: str):
         ("nncf", (is_nncf_available, NNCF_IMPORT_ERROR)),
         ("openvino", (is_openvino_available, OPENVINO_IMPORT_ERROR)),
         ("neural_compressor", (is_neural_compressor_available, NEURAL_COMPRESSOR_IMPORT_ERROR)),
-        (
-            "intel_extension_for_transformers",
-            (is_intel_extension_for_transformers_available, INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR),
-        ),
+        ("itrex", (is_itrex_available, ITREX_IMPORT_ERROR)),
         ("accelerate", (is_accelerate_available, ACCELERATE_IMPORT_ERROR)),
     ]
 )

diff --git a/tests/neural_compressor/test_optimization.py b/tests/neural_compressor/test_optimization.py
@@ -45,7 +45,7 @@
     set_seed,
 )
 from utils_tests import MODEL_NAMES, SEED, INCTestMixin, _generate_dataset
-from optimum.intel.utils.import_utils import is_torch_version, is_intel_extension_for_transformers_available
+from optimum.intel.utils.import_utils import is_torch_version, is_itrex_available
 
 
 from optimum.intel import (
@@ -511,7 +511,7 @@ class WeightOnlyQuantizationTest(INCTestMixin):
     )
 
     @parameterized.expand(WEIGHT_ONLY_CONFIG)
-    @unittest.skipIf(not is_intel_extension_for_transformers_available(), reason="ITREX not available")
+    @unittest.skipIf(not is_itrex_available(), reason="ITREX not available")
     def test_weight_only_quantization(self, methodology, weight_dtype):
         model_name = "hf-internal-testing/tiny-random-GPTNeoForCausalLM"