diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml index bf9460c75a..6d709eecfd 100644 --- a/.github/workflows/test_openvino.yml +++ b/.github/workflows/test_openvino.yml @@ -32,7 +32,7 @@ jobs: python -m pip install --upgrade pip # install PyTorch CPU version to avoid installing CUDA packages on GitHub runner without GPU pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu - pip install .[openvino,openvino-tokenizers,nncf,tests,diffusers] + pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime - name: Test with Pytest run: | pytest tests/openvino/ --ignore test_modeling_basic diff --git a/optimum/intel/__init__.py b/optimum/intel/__init__.py index 93a4417bfc..59059d688d 100644 --- a/optimum/intel/__init__.py +++ b/optimum/intel/__init__.py @@ -18,6 +18,7 @@ from transformers.utils import OptionalDependencyNotAvailable, _LazyModule from .utils import ( + is_accelerate_available, is_diffusers_available, is_ipex_available, is_neural_compressor_available, @@ -29,6 +30,7 @@ _import_structure = { "openvino": [], + "utils.dummy_openvino_and_nncf_objects": [], } try: @@ -57,13 +59,19 @@ if not (is_openvino_available() and is_nncf_available()): raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: - _import_structure["utils.dummy_openvino_and_nncf_objects"] = [ - "OVQuantizer", - "OVTrainer", - "OVTrainingArguments", - ] + _import_structure["utils.dummy_openvino_and_nncf_objects"].extend(["OVQuantizer", "OVTrainingArguments"]) +else: + _import_structure["openvino"].extend(["OVQuantizer", "OVTrainingArguments"]) + + +try: + if not (is_openvino_available() and is_nncf_available() and is_accelerate_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + _import_structure["utils.dummy_openvino_and_nncf_objects"].extend(["OVTrainer"]) else: - _import_structure["openvino"].extend(["OVQuantizer", "OVTrainer", "OVTrainingArguments"]) + _import_structure["openvino"].extend(["OVTrainer"]) + try: if not (is_openvino_available() and is_diffusers_available()): @@ -145,6 +153,7 @@ "INCSeq2SeqTrainer", "INCTrainer", ] + try: if not (is_neural_compressor_available() and is_diffusers_available()): raise OptionalDependencyNotAvailable() @@ -177,13 +186,17 @@ if not (is_openvino_available() and is_nncf_available()): raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: - from .utils.dummy_openvino_and_nncf_objects import ( - OVQuantizer, - OVTrainer, - OVTrainingArguments, - ) + from .utils.dummy_openvino_and_nncf_objects import OVQuantizer, OVTrainingArguments + else: + from .openvino import OVQuantizer, OVTrainingArguments + + try: + if not (is_openvino_available() and is_nncf_available() and is_accelerate_available()): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from .utils.dummy_openvino_and_nncf_objects import OVTrainer else: - from .openvino import OVQuantizer, OVTrainer, OVTrainingArguments + from .openvino import OVTrainer try: if not (is_openvino_available() and is_diffusers_available()): diff --git a/optimum/intel/openvino/__init__.py b/optimum/intel/openvino/__init__.py index a6227615a2..1df932771a 100644 --- a/optimum/intel/openvino/__init__.py +++ b/optimum/intel/openvino/__init__.py @@ -14,7 +14,7 @@ import logging -from ..utils.import_utils import is_diffusers_available, is_nncf_available +from ..utils.import_utils import is_accelerate_available, is_diffusers_available, is_nncf_available from .utils import ( OV_DECODER_NAME, OV_DECODER_WITH_PAST_NAME, @@ -37,9 +37,11 @@ patch_torch_operators() from .quantization import OVQuantizer - from .trainer import OVTrainer from .training_args import OVTrainingArguments + if is_accelerate_available(): + from .trainer import OVTrainer + from .configuration import OVConfig, OVWeightQuantizationConfig from .modeling import ( diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index d7b88f2be3..cd26f91f22 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -17,7 +17,7 @@ import logging import os from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Union +from typing import Any, Callable, Dict, Optional, Tuple, Union import nncf import openvino @@ -56,8 +56,7 @@ if is_datasets_available(): - if TYPE_CHECKING: - from datasets import Dataset + from datasets import Dataset register_module(ignored_algorithms=[])(Conv1D) @@ -147,6 +146,7 @@ def __init__(self, model: transformers.PreTrainedModel, task: Optional[str] = No ) self.task = task or feature self.seed = seed + # TODO : deprecate input_names self.input_names = None signature = inspect.signature(self.model.forward) self._signature_columns = list(signature.parameters.keys()) @@ -526,9 +526,15 @@ def _get_calibration_dataloader( data_collator: Optional[DataCollator] = None, ) -> OVDataLoader: data_collator = data_collator if data_collator is not None else default_data_collator + + if not is_datasets_available() or not isinstance(calibration_dataset, Dataset): + logger.warning( + "`remove_unused_columns` set to `False` as calibration_dataset is not an instance of `datasets.Dataset`" + ) + remove_unused_columns = False + if remove_unused_columns: calibration_dataset = self._remove_unused_columns(calibration_dataset) - self.input_names = calibration_dataset.column_names generator = torch.Generator() generator.manual_seed(self.seed) sampler = RandomSampler(calibration_dataset, generator=generator) diff --git a/optimum/intel/utils/__init__.py b/optimum/intel/utils/__init__.py index 4e7522ee77..d77588f896 100644 --- a/optimum/intel/utils/__init__.py +++ b/optimum/intel/utils/__init__.py @@ -16,6 +16,7 @@ _neural_compressor_version, _torch_version, compare_versions, + is_accelerate_available, is_diffusers_available, is_ipex_available, is_neural_compressor_available, diff --git a/optimum/intel/utils/dummy_openvino_and_nncf_objects.py b/optimum/intel/utils/dummy_openvino_and_nncf_objects.py index 45c390aff2..8ae3135667 100644 --- a/optimum/intel/utils/dummy_openvino_and_nncf_objects.py +++ b/optimum/intel/utils/dummy_openvino_and_nncf_objects.py @@ -27,14 +27,14 @@ def from_pretrained(cls, *args, **kwargs): class OVTrainer(metaclass=DummyObject): - _backends = ["openvino", "nncf"] + _backends = ["openvino", "nncf", "accelerate"] def __init__(self, *args, **kwargs): - requires_backends(self, ["openvino", "nncf"]) + requires_backends(self, ["openvino", "nncf", "accelerate"]) @classmethod def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["openvino", "nncf"]) + requires_backends(cls, ["openvino", "nncf", "accelerate"]) class OVQuantizer(metaclass=DummyObject): diff --git a/optimum/intel/utils/import_utils.py b/optimum/intel/utils/import_utils.py index f1fb486c4f..1d5ce25086 100644 --- a/optimum/intel/utils/import_utils.py +++ b/optimum/intel/utils/import_utils.py @@ -156,6 +156,16 @@ _datasets_available = False +_accelerate_available = importlib.util.find_spec("accelerate") is not None +_accelerate_version = "N/A" + +if _accelerate_available: + try: + _accelerate_version = importlib_metadata.version("accelerate") + except importlib_metadata.PackageNotFoundError: + _accelerate_available = False + + def is_transformers_available(): return _transformers_available @@ -196,6 +206,10 @@ def is_datasets_available(): return _datasets_available +def is_accelerate_available(): + return _accelerate_available + + # This function was copied from: https://github.com/huggingface/accelerate/blob/874c4967d94badd24f893064cc3bef45f57cadf7/src/accelerate/utils/versions.py#L319 def compare_versions(library_or_version: Union[str, Version], operation: str, requirement_version: str): """ @@ -317,6 +331,11 @@ def is_timm_version(operation: str, version: str): `pip install datasets`. Please note that you may need to restart your runtime after installation. """ +ACCELERATE_IMPORT_ERROR = """ +{0} requires the accelerate library but it was not found in your environment. You can install it with pip: +`pip install accelerate`. Please note that you may need to restart your runtime after installation. +""" + BACKENDS_MAPPING = OrderedDict( [ ("diffusers", (is_diffusers_available, DIFFUSERS_IMPORT_ERROR)), @@ -324,6 +343,7 @@ def is_timm_version(operation: str, version: str): ("nncf", (is_nncf_available, NNCF_IMPORT_ERROR)), ("openvino", (is_openvino_available, OPENVINO_IMPORT_ERROR)), ("neural_compressor", (is_neural_compressor_available, NEURAL_COMPRESSOR_IMPORT_ERROR)), + ("accelerate", (is_accelerate_available, ACCELERATE_IMPORT_ERROR)), ] ) diff --git a/setup.py b/setup.py index dd98548018..ac4056c30d 100644 --- a/setup.py +++ b/setup.py @@ -18,10 +18,11 @@ "datasets>=1.4.0", "sentencepiece", "scipy", - "accelerate", # transformers 4.29 require accelerate for PyTorch + "onnx", ] TESTS_REQUIRE = [ + "accelerate", "pytest", "parameterized", "Pillow", @@ -39,11 +40,11 @@ QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"] EXTRAS_REQUIRE = { - "neural-compressor": ["neural-compressor>=2.2.0", "onnx", "onnxruntime<1.15.0"], - "openvino": ["openvino>=2023.3", "onnx", "onnxruntime", "nncf>=2.8.1"], + "neural-compressor": ["neural-compressor>=2.2.0", "onnxruntime<1.15.0", "accelerate"], + "openvino": ["openvino>=2023.3", "nncf>=2.8.1"], "openvino-tokenizers": ["openvino-tokenizers[transformers]"], "nncf": ["nncf>=2.8.1"], - "ipex": ["intel-extension-for-pytorch", "onnx"], + "ipex": ["intel-extension-for-pytorch"], "diffusers": ["diffusers"], "quality": QUALITY_REQUIRE, "tests": TESTS_REQUIRE, diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 0ef89ec8b8..a33e0339f3 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -154,16 +154,16 @@ class OVWeightCompressionTest(unittest.TestCase): # TODO : add models SUPPORTED_ARCHITECTURES_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = ( (OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 70, 70), - (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 46), + (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 44), ) - SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 64, 365),) - SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 388),) + SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 62, 365),) + SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 385),) SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTO_COMPRESSED_MATMULS = ( - (OVModelForCausalLM, "hf-internal-testing/tiny-random-OPTForCausalLM", 16, 136), + (OVModelForCausalLM, "hf-internal-testing/tiny-random-OPTForCausalLM", 14, 136), ) SUPPORTED_ARCHITECTURES_STATEFUL_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = ( - (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 46), + (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 44, 44), ) LOAD_IN_4_BITS_SCOPE = ( @@ -171,7 +171,7 @@ class OVWeightCompressionTest(unittest.TestCase): OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", dict(bits=4, sym=False, group_size=-1, ratio=0.8), - 16, + 14, ), ( OVModelForCausalLM, @@ -182,13 +182,13 @@ class OVWeightCompressionTest(unittest.TestCase): group_size=32, ignored_scope={"names": ["__module.model.transformer.h.2.mlp.c_fc/aten::addmm/MatMul"]}, ), - 6, + 4, ), ( OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", dict(bits=4, sym=False, group_size=-1, ratio=0.8, all_layers=True), - 22, + 18, ), ( OVModelForCausalLM, @@ -201,7 +201,7 @@ class OVWeightCompressionTest(unittest.TestCase): sensitivity_metric="mean_activation_magnitude", dataset="ptb", ), - 16, + 14, ), ( OVModelForCausalLM, @@ -215,7 +215,7 @@ class OVWeightCompressionTest(unittest.TestCase): dataset="ptb", awq=True, ), - 16, + 14, ), ) diff --git a/tests/openvino/test_stable_diffusion.py b/tests/openvino/test_stable_diffusion.py index d8cef2e027..ab6f6f21a6 100644 --- a/tests/openvino/test_stable_diffusion.py +++ b/tests/openvino/test_stable_diffusion.py @@ -28,7 +28,6 @@ from diffusers.utils import load_image from diffusers.utils.testing_utils import floats_tensor from openvino.runtime.ie_api import CompiledModel -from packaging.version import Version, parse from parameterized import parameterized from utils_tests import MODEL_NAMES, SEED @@ -46,13 +45,8 @@ OVModelVaeDecoder, OVModelVaeEncoder, ) -from optimum.onnxruntime import ( - ORTStableDiffusionImg2ImgPipeline, - ORTStableDiffusionInpaintPipeline, - ORTStableDiffusionXLImg2ImgPipeline, - ORTStableDiffusionXLPipeline, -) -from optimum.utils.import_utils import _diffusers_version +from optimum.intel.utils.import_utils import is_diffusers_version +from optimum.utils.import_utils import is_onnxruntime_available F32_CONFIG = {"INFERENCE_PRECISION_HINT": "f32"} @@ -167,7 +161,6 @@ def generate_inputs(self, height=128, width=128, batch_size=1): class OVStableDiffusionImg2ImgPipelineTest(OVStableDiffusionPipelineBaseTest): SUPPORTED_ARCHITECTURES = ("stable-diffusion",) MODEL_CLASS = OVStableDiffusionImg2ImgPipeline - ORT_MODEL_CLASS = ORTStableDiffusionImg2ImgPipeline TASK = "image-to-image" @parameterized.expand(SUPPORTED_ARCHITECTURES) @@ -298,11 +291,13 @@ def test_height_width_properties(self, model_arch: str): class OVStableDiffusionInpaintPipelineTest(OVStableDiffusionPipelineBaseTest): SUPPORTED_ARCHITECTURES = ("stable-diffusion",) MODEL_CLASS = OVStableDiffusionInpaintPipeline - ORT_MODEL_CLASS = ORTStableDiffusionInpaintPipeline TASK = "inpaint" @parameterized.expand(SUPPORTED_ARCHITECTURES) + @unittest.skipIf(not is_onnxruntime_available(), "this test requires onnxruntime") def test_compare_diffusers_pipeline(self, model_arch: str): + from optimum.onnxruntime import ORTStableDiffusionInpaintPipeline + model_id = MODEL_NAMES[model_arch] pipeline = self.MODEL_CLASS.from_pretrained(model_id, export=True, ov_config=F32_CONFIG) batch_size, num_images, height, width = 1, 1, 64, 64 @@ -329,7 +324,7 @@ def test_compare_diffusers_pipeline(self, model_arch: str): outputs = pipeline(**inputs, latents=latents).images self.assertEqual(outputs.shape, (batch_size * num_images, height, width, 3)) - ort_pipeline = self.ORT_MODEL_CLASS.from_pretrained(model_id, export=True) + ort_pipeline = ORTStableDiffusionInpaintPipeline.from_pretrained(model_id, export=True) ort_outputs = ort_pipeline(**inputs, latents=latents).images self.assertTrue(np.allclose(outputs, ort_outputs, atol=1e-1)) @@ -358,7 +353,6 @@ def generate_inputs(self, height=128, width=128, batch_size=1): class OVtableDiffusionXLPipelineTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ("stable-diffusion-xl",) MODEL_CLASS = OVStableDiffusionXLPipeline - ORT_MODEL_CLASS = ORTStableDiffusionXLPipeline PT_MODEL_CLASS = StableDiffusionXLPipeline TASK = "text-to-image" @@ -444,7 +438,6 @@ def test_num_images_per_prompt_static_model(self, model_arch: str): class OVStableDiffusionXLImg2ImgPipelineTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ("stable-diffusion-xl", "stable-diffusion-xl-refiner") MODEL_CLASS = OVStableDiffusionXLImg2ImgPipeline - ORT_MODEL_CLASS = ORTStableDiffusionXLImg2ImgPipeline PT_MODEL_CLASS = StableDiffusionXLImg2ImgPipeline TASK = "image-to-image" @@ -489,7 +482,7 @@ class OVLatentConsistencyModelPipelineTest(unittest.TestCase): TASK = "text-to-image" @parameterized.expand(SUPPORTED_ARCHITECTURES) - @unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version") + @unittest.skipIf(is_diffusers_version("<=", "0.21.4"), "not supported with this diffusers version") def test_compare_to_diffusers(self, model_arch: str): ov_pipeline = self.MODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True, ov_config=F32_CONFIG) self.assertIsInstance(ov_pipeline.text_encoder, OVModelTextEncoder) @@ -532,7 +525,7 @@ def test_compare_to_diffusers(self, model_arch: str): self.assertEqual(pipeline.device.type, ov_pipeline.device) @parameterized.expand(SUPPORTED_ARCHITECTURES) - @unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version") + @unittest.skipIf(is_diffusers_version("<=", "0.21.4"), "not supported with this diffusers version") def test_num_images_per_prompt_static_model(self, model_arch: str): model_id = MODEL_NAMES[model_arch] pipeline = self.MODEL_CLASS.from_pretrained(model_id, export=True, compile=False, dynamic_shapes=False) diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py index 937c0bf3f5..80298faf2b 100644 --- a/tests/openvino/test_training.py +++ b/tests/openvino/test_training.py @@ -365,7 +365,7 @@ def tearDown(self): "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( model_id="hf-internal-testing/tiny-random-bert", nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], - expected_fake_quantize=44, + expected_fake_quantize=34, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss"], @@ -376,7 +376,7 @@ def tearDown(self): CUSTOMIZED_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, ], - expected_fake_quantize=44, + expected_fake_quantize=34, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss"], @@ -385,7 +385,7 @@ def tearDown(self): model_id="hf-internal-testing/tiny-random-bert", teacher_model_id="hf-internal-testing/tiny-random-bert", nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], - expected_fake_quantize=44, + expected_fake_quantize=34, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], @@ -397,7 +397,7 @@ def tearDown(self): CUSTOMIZED_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, ], - expected_fake_quantize=44, + expected_fake_quantize=34, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], @@ -749,7 +749,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): "quantization,structured_movement_sparsity": OVTrainerTestDescriptor( model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], - expected_fake_quantize=48, + expected_fake_quantize=40, expected_int8=30, expected_binary_masks=48, compression_metrics=["compression_loss"], @@ -766,7 +766,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], - expected_fake_quantize=48, + expected_fake_quantize=40, expected_int8=30, expected_binary_masks=48, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 8fabb34e38..04049172d3 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -102,12 +102,12 @@ SEED = 42 _ARCHITECTURES_TO_EXPECTED_INT8 = { - "bert": (70,), + "bert": (68,), "roberta": (68,), "albert": (84,), "vit": (64,), "blenderbot": (70,), - "gpt2": (46,), + "gpt2": (44,), "wav2vec2": (34,), "distilbert": (66,), "t5": (64, 104, 84), @@ -116,7 +116,7 @@ "stable-diffusion-xl-refiner": (366, 34, 42, 66), } -_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (64, 477)} +_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (62, 477)} def get_num_quantized_nodes(ov_model):