Skip to content

Commit

Permalink
Merge branch 'main' into deprecate-use_auth_token
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil authored Apr 23, 2024
2 parents 8fca1d6 + a0dc06c commit 3afa9d0
Show file tree
Hide file tree
Showing 21 changed files with 124 additions and 184 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_inc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
pip install cmake
pip install py-cpuinfo
pip install .[neural-compressor,diffusers,tests]
pip install intel-extension-for-transformers==1.4.0
pip install intel-extension-for-transformers
pip install peft
- name: Test with Pytest
Expand Down
40 changes: 40 additions & 0 deletions .github/workflows/test_offline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Offline usage / Python - Test

on:
push:
branches: [main]
pull_request:
branches: [main]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
build:
strategy:
fail-fast: false
matrix:
python-version: [3.9]
os: [ubuntu-latest]

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip install .[tests,openvino]
- name: Test
run: |
HF_HOME=/tmp/ huggingface-cli download hf-internal-testing/tiny-random-gpt2
HF_HOME=/tmp/ HF_HUB_OFFLINE=1 optimum-cli export openvino --model hf-internal-testing/tiny-random-gpt2 gpt2_openvino --task text-generation
huggingface-cli download hf-internal-testing/tiny-random-gpt2
HF_HUB_OFFLINE=1 optimum-cli export openvino --model hf-internal-testing/tiny-random-gpt2 gpt2_openvino --task text-generation
pytest tests/openvino/test_modeling.py -k "test_load_from_hub" -s -vvvvv
HF_HUB_OFFLINE=1 pytest tests/openvino/test_modeling.py -k "test_load_from_hub" -s -vvvvv
11 changes: 4 additions & 7 deletions examples/neural_compressor/language-modeling/run_clm.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,10 @@
from transformers.utils.versions import require_version

from optimum.intel.neural_compressor import INCModelForCausalLM, INCQuantizer, INCTrainer
from optimum.intel.utils.import_utils import (
INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR,
is_intel_extension_for_transformers_available,
)
from optimum.intel.utils.import_utils import ITREX_IMPORT_ERROR, is_itrex_available


if is_intel_extension_for_transformers_available():
if is_itrex_available():
from intel_extension_for_transformers.transformers.utils.config import GPTQConfig, RtnConfig

os.environ["CUDA_VISIBLE_DEVICES"] = ""
Expand Down Expand Up @@ -658,8 +655,8 @@ def compute_metrics(eval_preds):
else:
recipes = {}
if optim_args.quantization_approach == "weight_only":
if not is_intel_extension_for_transformers_available():
raise ImportError(INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR.format("WeightOnly quantization"))
if not is_itrex_available():
raise ImportError(ITREX_IMPORT_ERROR.format("WeightOnly quantization"))
if optim_args.apply_pruning or optim_args.apply_distillation:
raise ValueError("Weight only quantization and pruning or distillation cannot be combined.")

Expand Down
6 changes: 5 additions & 1 deletion optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from pathlib import Path
from typing import TYPE_CHECKING, Optional

from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE

from ...exporters import TasksManager
from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available
from ..base import BaseOptimumCLICommand, CommandInfo
Expand Down Expand Up @@ -47,7 +49,9 @@ def parse_args_openvino(parser: "ArgumentParser"):
f" {str(TasksManager.get_all_tasks())}. For decoder models, use `xxx-with-past` to export the model using past key values in the decoder."
),
)
optional_group.add_argument("--cache_dir", type=str, default=None, help="Path indicating where to store cache.")
optional_group.add_argument(
"--cache_dir", type=str, default=HUGGINGFACE_HUB_CACHE, help="Path indicating where to store cache."
)
optional_group.add_argument(
"--framework",
type=str,
Expand Down
3 changes: 2 additions & 1 deletion optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union

from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
from requests.exceptions import ConnectionError as RequestsConnectionError
from transformers import AutoConfig, AutoTokenizer, PreTrainedTokenizerBase

Expand Down Expand Up @@ -48,7 +49,7 @@ def main_export(
task: str = "auto",
device: str = "cpu",
framework: Optional[str] = None,
cache_dir: Optional[str] = None,
cache_dir: str = HUGGINGFACE_HUB_CACHE,
trust_remote_code: bool = False,
pad_token_id: Optional[int] = None,
subfolder: str = "",
Expand Down
5 changes: 3 additions & 2 deletions optimum/intel/generation/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import torch
from huggingface_hub import hf_hub_download
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
from transformers import AutoConfig, AutoModelForCausalLM, GenerationConfig, PretrainedConfig, PreTrainedModel
from transformers.generation import GenerationMixin
from transformers.modeling_outputs import CausalLMOutputWithPast
Expand Down Expand Up @@ -357,7 +358,7 @@ def _from_pretrained(
token: Optional[Union[bool, str]] = None,
revision: Optional[Union[str, None]] = None,
force_download: bool = False,
cache_dir: Optional[str] = None,
cache_dir: str = HUGGINGFACE_HUB_CACHE,
file_name: Optional[str] = WEIGHTS_NAME,
local_files_only: bool = False,
use_cache: bool = True,
Expand Down Expand Up @@ -403,7 +404,7 @@ def _from_transformers(
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
force_download: bool = False,
cache_dir: Optional[str] = None,
cache_dir: str = HUGGINGFACE_HUB_CACHE,
subfolder: str = "",
local_files_only: bool = False,
use_cache: bool = True,
Expand Down
5 changes: 3 additions & 2 deletions optimum/intel/ipex/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import intel_extension_for_pytorch as ipex
import torch
from huggingface_hub import hf_hub_download
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
from intel_extension_for_pytorch.cpu._auto_kernel_selection import _enable_tpp
from intel_extension_for_pytorch.transformers.optimize import get_dummy_input
from transformers import (
Expand Down Expand Up @@ -154,7 +155,7 @@ def _from_transformers(
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
force_download: bool = False,
cache_dir: Optional[str] = None,
cache_dir: str = HUGGINGFACE_HUB_CACHE,
subfolder: str = "",
local_files_only: bool = False,
torch_dtype: Optional[Union[str, "torch.dtype"]] = None,
Expand Down Expand Up @@ -193,7 +194,7 @@ def _from_pretrained(
token: Optional[Union[bool, str]] = None,
revision: Optional[Union[str, None]] = None,
force_download: bool = False,
cache_dir: Optional[str] = None,
cache_dir: str = HUGGINGFACE_HUB_CACHE,
file_name: Optional[str] = WEIGHTS_NAME,
local_files_only: bool = False,
subfolder: str = "",
Expand Down
2 changes: 1 addition & 1 deletion optimum/intel/neural_compressor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from ..utils.import_utils import is_diffusers_available, is_intel_extension_for_transformers_available
from ..utils.import_utils import is_diffusers_available
from .configuration import INCConfig
from .modeling_base import (
INCModel,
Expand Down
11 changes: 4 additions & 7 deletions optimum/intel/neural_compressor/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import torch
from huggingface_hub import hf_hub_download
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
from neural_compressor.utils.pytorch import load
from transformers import (
AutoConfig,
Expand All @@ -43,11 +44,7 @@
from optimum.intel.generation import BaseModelForCausalLM

from ...modeling_base import OptimizedModel
from ..utils.import_utils import (
_torch_version,
is_intel_extension_for_transformers_available,
is_torch_version,
)
from ..utils.import_utils import _torch_version, is_itrex_available, is_torch_version
from .configuration import INCConfig
from .utils import WEIGHTS_NAME

Expand Down Expand Up @@ -105,7 +102,7 @@ def _from_pretrained(
token: Optional[Union[bool, str]] = None,
revision: Optional[Union[str, None]] = None,
force_download: bool = False,
cache_dir: Optional[str] = None,
cache_dir: str = HUGGINGFACE_HUB_CACHE,
file_name: str = WEIGHTS_NAME,
local_files_only: bool = False,
subfolder: str = "",
Expand Down Expand Up @@ -137,7 +134,7 @@ def _from_pretrained(
model_save_dir = Path(model_cache_path).parent
inc_config = None
msg = None
if is_intel_extension_for_transformers_available():
if is_itrex_available():
try:
quantization_config = PretrainedConfig.from_pretrained(model_save_dir / "quantize_config.json")
algorithm = getattr(quantization_config, "quant_method", None)
Expand Down
105 changes: 19 additions & 86 deletions optimum/intel/neural_compressor/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@
from enum import Enum
from itertools import chain
from pathlib import Path
from typing import Callable, Dict, Optional, Union
from typing import Callable, Optional, Union

import torch
from datasets import Dataset, load_dataset
from neural_compressor.adaptor.pytorch import PyTorch_FXAdaptor, _cfg_to_qconfig, _propagate_qconfig
from neural_compressor.config import PostTrainingQuantConfig
from neural_compressor.experimental.export import torch_to_int8_onnx
from neural_compressor.model.onnx_model import ONNXModel
Expand All @@ -47,14 +46,14 @@

from ..utils.constant import _TASK_ALIASES, MIN_QDQ_ONNX_OPSET, ONNX_WEIGHTS_NAME, WEIGHTS_NAME
from ..utils.import_utils import (
INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR,
_intel_extension_for_transformers_version,
ITREX_IMPORT_ERROR,
_ipex_version,
_itrex_version,
_neural_compressor_version,
_torch_version,
is_intel_extension_for_transformers_available,
is_intel_extension_for_transformers_version,
is_ipex_version,
is_itrex_available,
is_itrex_version,
is_neural_compressor_version,
is_torch_version,
)
Expand All @@ -69,16 +68,21 @@
INCModelForTokenClassification,
INCModelForVision2Seq,
)
from .utils import INCDataLoader, _cfgs_to_fx_cfgs

from .utils import (
IPEX_MINIMUM_VERSION,
ITREX_MINIMUM_TORCH_VERSION,
ITREX_MINIMUM_VERSION,
NEURAL_COMPRESSOR_MINIMUM_VERSION,
NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION,
INCDataLoader,
)

INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION = "1.4.0"

if is_intel_extension_for_transformers_available():
if is_intel_extension_for_transformers_version("!=", INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION):
if is_itrex_available():
if is_itrex_version("<", ITREX_MINIMUM_VERSION):
raise ImportError(
f"Found an incompatible version of `intel-extension-for-transformers`. Found version {_intel_extension_for_transformers_version}, "
f"but only version {INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION} is supported."
f"Found an incompatible version of `intel-extension-for-transformers`. Found version {_itrex_version}, "
f"but only version {ITREX_MINIMUM_VERSION} or higher is supported."
)
from intel_extension_for_transformers.transformers.llm.quantization.utils import convert_to_quantized_model
from intel_extension_for_transformers.transformers.modeling.modeling_auto import save_low_bit
Expand All @@ -92,10 +96,6 @@

logger = logging.getLogger(__name__)

NEURAL_COMPRESSOR_MINIMUM_VERSION = "2.1.0"
NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION = "2.3.0"
IPEX_MINIMUM_VERSION = "2.1.0"
ITREX_MINIMUM_TORCH_VERSION = "2.2.0"

if is_neural_compressor_version("<", NEURAL_COMPRESSOR_MINIMUM_VERSION):
raise ImportError(
Expand Down Expand Up @@ -231,8 +231,8 @@ def quantize(
f"Found an incompatible version of neural-compressor. Found version {_neural_compressor_version}, "
f"but only version {NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION} or higher supports weight-only quantization."
)
if not is_intel_extension_for_transformers_available():
raise ImportError(INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR.format("Weight only quantization"))
if not is_itrex_available():
raise ImportError(ITREX_IMPORT_ERROR.format("Weight only quantization"))

if is_torch_version("<", ITREX_MINIMUM_TORCH_VERSION):
raise ImportError(
Expand Down Expand Up @@ -516,70 +516,3 @@ def _get_calibration_dataloader(
def _remove_unused_columns(self, dataset: Dataset):
ignored_columns = list(set(dataset.column_names) - set(self._signature_columns))
return dataset.remove_columns(ignored_columns)


# Adapted from https://github.com/intel/neural-compressor/blob/master/neural_compressor/utils/pytorch.py#L96
def _apply_quantization_from_config(q_config: Dict, model: torch.nn.Module) -> torch.nn.Module:
"""
Apply Intel Neural Compressor quantization steps on the given model.
Arguments:
q_config (`Dict`):
Dictionary containing all quantization information such as approach, dtype, scheme and granularity.
model (`torch.nn.Module`):
Model to quantize.
Returns:
q_model (`torch.nn.Module`):
Quantized model.
"""
from torch.quantization import add_observer_, convert
from torch.quantization.quantize_fx import convert_fx, prepare_fx, prepare_qat_fx

approach = q_config.get("approach")
framework = q_config.get("framework")

if approach not in SUPPORTED_QUANT_MODE:
raise ValueError(
"Unknown quantization approach. Supported approach are " + ", ".join(SUPPORTED_QUANT_MODE.keys())
)

quant_mode = INCQuantizationMode(approach)
q_model = copy.deepcopy(model)
q_model.eval()

if framework == "pytorch_fx":
op_cfgs = _cfg_to_qconfig(q_config, approach)
fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, approach)

if not q_config["fx_sub_module_list"]:
if quant_mode == INCQuantizationMode.AWARE_TRAINING:
q_model.train()
q_model = prepare_qat_fx(q_model, fx_op_cfgs)
else:
q_model = prepare_fx(q_model, fx_op_cfgs)
q_model = convert_fx(q_model)

else:
sub_module_list = q_config["fx_sub_module_list"]
if q_config["approach"] == "quant_aware_training":
q_model.train()
PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, fx_op_cfgs, q_model, prefix="", is_qat=True)
else:
PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, fx_op_cfgs, q_model, prefix="")
PyTorch_FXAdaptor.convert_sub_graph(sub_module_list, q_model, prefix="")

else:
if quant_mode == INCQuantizationMode.DYNAMIC:
q_mapping = torch.quantization.quantization_mappings.get_default_dynamic_quant_module_mappings()
op_cfgs = _cfg_to_qconfig(q_config, approach)
else:
q_mapping = torch.quantization.quantization_mappings.get_default_static_quant_module_mappings()
op_cfgs = _cfg_to_qconfig(q_config)

_propagate_qconfig(q_model, op_cfgs, approach=approach)

if quant_mode != INCQuantizationMode.DYNAMIC:
add_observer_(q_model)
q_model = convert(q_model, mapping=q_mapping, inplace=True)

return q_model
Loading

0 comments on commit 3afa9d0

Please sign in to comment.