Skip to content

Commit

Permalink
Update minimum itrex version (#675)
Browse files Browse the repository at this point in the history
* Update minimum itrex version

* Trigger test

* trigger test

* trigger test

* fix

* remove tests itrex version contraint
  • Loading branch information
echarlaix authored Apr 23, 2024
1 parent 00581ab commit a0dc06c
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 162 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_inc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
pip install cmake
pip install py-cpuinfo
pip install .[neural-compressor,diffusers,tests]
pip install intel-extension-for-transformers==1.4.0
pip install intel-extension-for-transformers
pip install peft
- name: Test with Pytest
Expand Down
11 changes: 4 additions & 7 deletions examples/neural_compressor/language-modeling/run_clm.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,10 @@
from transformers.utils.versions import require_version

from optimum.intel.neural_compressor import INCModelForCausalLM, INCQuantizer, INCTrainer
from optimum.intel.utils.import_utils import (
INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR,
is_intel_extension_for_transformers_available,
)
from optimum.intel.utils.import_utils import ITREX_IMPORT_ERROR, is_itrex_available


if is_intel_extension_for_transformers_available():
if is_itrex_available():
from intel_extension_for_transformers.transformers.utils.config import GPTQConfig, RtnConfig

os.environ["CUDA_VISIBLE_DEVICES"] = ""
Expand Down Expand Up @@ -658,8 +655,8 @@ def compute_metrics(eval_preds):
else:
recipes = {}
if optim_args.quantization_approach == "weight_only":
if not is_intel_extension_for_transformers_available():
raise ImportError(INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR.format("WeightOnly quantization"))
if not is_itrex_available():
raise ImportError(ITREX_IMPORT_ERROR.format("WeightOnly quantization"))
if optim_args.apply_pruning or optim_args.apply_distillation:
raise ValueError("Weight only quantization and pruning or distillation cannot be combined.")

Expand Down
2 changes: 1 addition & 1 deletion optimum/intel/neural_compressor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from ..utils.import_utils import is_diffusers_available, is_intel_extension_for_transformers_available
from ..utils.import_utils import is_diffusers_available
from .configuration import INCConfig
from .modeling_base import (
INCModel,
Expand Down
8 changes: 2 additions & 6 deletions optimum/intel/neural_compressor/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,7 @@
from optimum.intel.generation import BaseModelForCausalLM

from ...modeling_base import OptimizedModel
from ..utils.import_utils import (
_torch_version,
is_intel_extension_for_transformers_available,
is_torch_version,
)
from ..utils.import_utils import _torch_version, is_itrex_available, is_torch_version
from .configuration import INCConfig
from .utils import WEIGHTS_NAME

Expand Down Expand Up @@ -136,7 +132,7 @@ def _from_pretrained(
model_save_dir = Path(model_cache_path).parent
inc_config = None
msg = None
if is_intel_extension_for_transformers_available():
if is_itrex_available():
try:
quantization_config = PretrainedConfig.from_pretrained(model_save_dir / "quantize_config.json")
algorithm = getattr(quantization_config, "quant_method", None)
Expand Down
105 changes: 19 additions & 86 deletions optimum/intel/neural_compressor/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@
from enum import Enum
from itertools import chain
from pathlib import Path
from typing import Callable, Dict, Optional, Union
from typing import Callable, Optional, Union

import torch
from datasets import Dataset, load_dataset
from neural_compressor.adaptor.pytorch import PyTorch_FXAdaptor, _cfg_to_qconfig, _propagate_qconfig
from neural_compressor.config import PostTrainingQuantConfig
from neural_compressor.experimental.export import torch_to_int8_onnx
from neural_compressor.model.onnx_model import ONNXModel
Expand All @@ -47,14 +46,14 @@

from ..utils.constant import _TASK_ALIASES, MIN_QDQ_ONNX_OPSET, ONNX_WEIGHTS_NAME, WEIGHTS_NAME
from ..utils.import_utils import (
INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR,
_intel_extension_for_transformers_version,
ITREX_IMPORT_ERROR,
_ipex_version,
_itrex_version,
_neural_compressor_version,
_torch_version,
is_intel_extension_for_transformers_available,
is_intel_extension_for_transformers_version,
is_ipex_version,
is_itrex_available,
is_itrex_version,
is_neural_compressor_version,
is_torch_version,
)
Expand All @@ -69,16 +68,21 @@
INCModelForTokenClassification,
INCModelForVision2Seq,
)
from .utils import INCDataLoader, _cfgs_to_fx_cfgs

from .utils import (
IPEX_MINIMUM_VERSION,
ITREX_MINIMUM_TORCH_VERSION,
ITREX_MINIMUM_VERSION,
NEURAL_COMPRESSOR_MINIMUM_VERSION,
NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION,
INCDataLoader,
)

INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION = "1.4.0"

if is_intel_extension_for_transformers_available():
if is_intel_extension_for_transformers_version("!=", INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION):
if is_itrex_available():
if is_itrex_version("<", ITREX_MINIMUM_VERSION):
raise ImportError(
f"Found an incompatible version of `intel-extension-for-transformers`. Found version {_intel_extension_for_transformers_version}, "
f"but only version {INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION} is supported."
f"Found an incompatible version of `intel-extension-for-transformers`. Found version {_itrex_version}, "
f"but only version {ITREX_MINIMUM_VERSION} or higher is supported."
)
from intel_extension_for_transformers.transformers.llm.quantization.utils import convert_to_quantized_model
from intel_extension_for_transformers.transformers.modeling.modeling_auto import save_low_bit
Expand All @@ -92,10 +96,6 @@

logger = logging.getLogger(__name__)

NEURAL_COMPRESSOR_MINIMUM_VERSION = "2.1.0"
NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION = "2.3.0"
IPEX_MINIMUM_VERSION = "2.1.0"
ITREX_MINIMUM_TORCH_VERSION = "2.2.0"

if is_neural_compressor_version("<", NEURAL_COMPRESSOR_MINIMUM_VERSION):
raise ImportError(
Expand Down Expand Up @@ -231,8 +231,8 @@ def quantize(
f"Found an incompatible version of neural-compressor. Found version {_neural_compressor_version}, "
f"but only version {NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION} or higher supports weight-only quantization."
)
if not is_intel_extension_for_transformers_available():
raise ImportError(INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR.format("Weight only quantization"))
if not is_itrex_available():
raise ImportError(ITREX_IMPORT_ERROR.format("Weight only quantization"))

if is_torch_version("<", ITREX_MINIMUM_TORCH_VERSION):
raise ImportError(
Expand Down Expand Up @@ -514,70 +514,3 @@ def _get_calibration_dataloader(
def _remove_unused_columns(self, dataset: Dataset):
ignored_columns = list(set(dataset.column_names) - set(self._signature_columns))
return dataset.remove_columns(ignored_columns)


# Adapted from https://github.com/intel/neural-compressor/blob/master/neural_compressor/utils/pytorch.py#L96
def _apply_quantization_from_config(q_config: Dict, model: torch.nn.Module) -> torch.nn.Module:
"""
Apply Intel Neural Compressor quantization steps on the given model.
Arguments:
q_config (`Dict`):
Dictionary containing all quantization information such as approach, dtype, scheme and granularity.
model (`torch.nn.Module`):
Model to quantize.
Returns:
q_model (`torch.nn.Module`):
Quantized model.
"""
from torch.quantization import add_observer_, convert
from torch.quantization.quantize_fx import convert_fx, prepare_fx, prepare_qat_fx

approach = q_config.get("approach")
framework = q_config.get("framework")

if approach not in SUPPORTED_QUANT_MODE:
raise ValueError(
"Unknown quantization approach. Supported approach are " + ", ".join(SUPPORTED_QUANT_MODE.keys())
)

quant_mode = INCQuantizationMode(approach)
q_model = copy.deepcopy(model)
q_model.eval()

if framework == "pytorch_fx":
op_cfgs = _cfg_to_qconfig(q_config, approach)
fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, approach)

if not q_config["fx_sub_module_list"]:
if quant_mode == INCQuantizationMode.AWARE_TRAINING:
q_model.train()
q_model = prepare_qat_fx(q_model, fx_op_cfgs)
else:
q_model = prepare_fx(q_model, fx_op_cfgs)
q_model = convert_fx(q_model)

else:
sub_module_list = q_config["fx_sub_module_list"]
if q_config["approach"] == "quant_aware_training":
q_model.train()
PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, fx_op_cfgs, q_model, prefix="", is_qat=True)
else:
PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, fx_op_cfgs, q_model, prefix="")
PyTorch_FXAdaptor.convert_sub_graph(sub_module_list, q_model, prefix="")

else:
if quant_mode == INCQuantizationMode.DYNAMIC:
q_mapping = torch.quantization.quantization_mappings.get_default_dynamic_quant_module_mappings()
op_cfgs = _cfg_to_qconfig(q_config, approach)
else:
q_mapping = torch.quantization.quantization_mappings.get_default_static_quant_module_mappings()
op_cfgs = _cfg_to_qconfig(q_config)

_propagate_qconfig(q_model, op_cfgs, approach=approach)

if quant_mode != INCQuantizationMode.DYNAMIC:
add_observer_(q_model)
q_model = convert(q_model, mapping=q_mapping, inplace=True)

return q_model
50 changes: 6 additions & 44 deletions optimum/intel/neural_compressor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,9 @@
import os
import warnings
from collections import UserDict
from typing import Dict

import torch
from neural_compressor.utils.pytorch import load
from packaging import version
from torch.utils.data import DataLoader

from ..utils.constant import WEIGHTS_NAME
Expand All @@ -31,6 +29,12 @@

CONFIG_NAME = "best_configure.yaml"

NEURAL_COMPRESSOR_MINIMUM_VERSION = "2.1.0"
NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION = "2.3.0"
IPEX_MINIMUM_VERSION = "2.1.0"
ITREX_MINIMUM_VERSION = "1.4.0"
ITREX_MINIMUM_TORCH_VERSION = "2.2.0"


_HEAD_TO_AUTOMODELS = {
"fill-mask": "INCModelForMaskedLM",
Expand All @@ -45,10 +49,6 @@
}


parsed_torch_version_base = version.parse(version.parse(torch.__version__).base_version)
is_torch_less_than_1_13 = parsed_torch_version_base < version.parse("1.13.0")


class INCDataLoader(DataLoader):
use_label = True

Expand All @@ -73,44 +73,6 @@ def __iter__(self):
yield input


def _cfgs_to_fx_cfgs(op_cfgs: Dict, observer_type: str = "post_training_static_quant") -> Dict:
"""Inc function which convert a quantization config to a format that meets the requirements of torch.fx.
Arguments:
op_cfgs (`dict`):
Dictionary of quantization configure for each op.
observer_type (`str`):
Specify observer type.
Returns:
fx_op_cfgs (`dict`):
Dictionary of quantization configure that meets the requirements of torch.fx.
"""
if not is_torch_less_than_1_13:
from torch.ao.quantization import QConfigMapping

fx_op_cfgs = QConfigMapping()
else:
fx_op_cfgs = {}
op_tuple_cfg_list = []
for key, value in op_cfgs.items():
if key == "default_qconfig":
if not is_torch_less_than_1_13:
fx_op_cfgs.set_global(value)
else:
fx_op_cfgs[""] = value
continue
if not is_torch_less_than_1_13:
fx_op_cfgs.set_module_name(key, value)
else:
op_tuple = (key, value)
op_tuple_cfg_list.append(op_tuple)

if is_torch_less_than_1_13:
fx_op_cfgs["module_name"] = op_tuple_cfg_list

return fx_op_cfgs


def load_quantized_model(checkpoint_dir_or_file: str, model: torch.nn.Module, **kwargs) -> torch.nn.Module:
"""
Returns the quantized model, which was quantized through neural_compressor.
Expand Down
27 changes: 12 additions & 15 deletions optimum/intel/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,14 @@
_neural_compressor_available = False


_intel_extension_for_transformers_available = importlib.util.find_spec("intel_extension_for_transformers") is not None
_intel_extension_for_transformers_version = "N/A"
if _intel_extension_for_transformers_available:
_itrex_available = importlib.util.find_spec("intel_extension_for_transformers") is not None
_itrex_version = "N/A"
if _itrex_available:
try:
_intel_extension_for_transformers_version = importlib_metadata.version("intel_extension_for_transformers")
_itrex_version = importlib_metadata.version("intel_extension_for_transformers")
logging.warn("`transformers` version >= 4.31 is requirements by intel-extension-for-transformers.")
except importlib_metadata.PackageNotFoundError:
_intel_extension_for_transformers_available = False
_itrex_available = False


_ipex_available = importlib.util.find_spec("intel_extension_for_pytorch") is not None
Expand Down Expand Up @@ -158,8 +158,8 @@ def is_neural_compressor_available():
return _neural_compressor_available


def is_intel_extension_for_transformers_available():
return _intel_extension_for_transformers_available
def is_itrex_available():
return _itrex_available


def is_ipex_available():
Expand Down Expand Up @@ -314,13 +314,13 @@ def is_neural_compressor_version(operation: str, version: str):
return compare_versions(parse(_neural_compressor_version), operation, version)


def is_intel_extension_for_transformers_version(operation: str, version: str):
def is_itrex_version(operation: str, version: str):
"""
Compare the current intel_extension_for_transformers version to a given reference with an operation.
"""
if not _intel_extension_for_transformers_available:
if not _itrex_available:
return False
return compare_versions(parse(_intel_extension_for_transformers_version), operation, version)
return compare_versions(parse(_itrex_version), operation, version)


def is_openvino_version(operation: str, version: str):
Expand Down Expand Up @@ -396,7 +396,7 @@ def is_timm_version(operation: str, version: str):
`pip install neural-compressor`. Please note that you may need to restart your runtime after installation.
"""

INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR = """
ITREX_IMPORT_ERROR = """
{0} requires the intel-extension-for-transformers library but it was not found in your environment. You can install it with pip:
`pip install intel-extension-for-transformers` and `pip install peft`. Please note that you may need to restart your runtime after installation.
"""
Expand All @@ -418,10 +418,7 @@ def is_timm_version(operation: str, version: str):
("nncf", (is_nncf_available, NNCF_IMPORT_ERROR)),
("openvino", (is_openvino_available, OPENVINO_IMPORT_ERROR)),
("neural_compressor", (is_neural_compressor_available, NEURAL_COMPRESSOR_IMPORT_ERROR)),
(
"intel_extension_for_transformers",
(is_intel_extension_for_transformers_available, INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR),
),
("itrex", (is_itrex_available, ITREX_IMPORT_ERROR)),
("accelerate", (is_accelerate_available, ACCELERATE_IMPORT_ERROR)),
]
)
Expand Down
4 changes: 2 additions & 2 deletions tests/neural_compressor/test_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
set_seed,
)
from utils_tests import MODEL_NAMES, SEED, INCTestMixin, _generate_dataset
from optimum.intel.utils.import_utils import is_torch_version, is_intel_extension_for_transformers_available
from optimum.intel.utils.import_utils import is_torch_version, is_itrex_available


from optimum.intel import (
Expand Down Expand Up @@ -511,7 +511,7 @@ class WeightOnlyQuantizationTest(INCTestMixin):
)

@parameterized.expand(WEIGHT_ONLY_CONFIG)
@unittest.skipIf(not is_intel_extension_for_transformers_available(), reason="ITREX not available")
@unittest.skipIf(not is_itrex_available(), reason="ITREX not available")
def test_weight_only_quantization(self, methodology, weight_dtype):
model_name = "hf-internal-testing/tiny-random-GPTNeoForCausalLM"

Expand Down

0 comments on commit a0dc06c

Please sign in to comment.