diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py index 5abb518183..98d5ecbb5b 100644 --- a/optimum/intel/openvino/configuration.py +++ b/optimum/intel/openvino/configuration.py @@ -32,11 +32,17 @@ logger = logging.getLogger(__name__) + +class OVQuantizationMethod(str, Enum): + DEFAULT = "default" + HYBRID = "hybrid" + AWQ = "awq" + + _DEFAULT_4BIT_CONFIGS = { - "databricks/dolly-v2-3b": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8}, + "databricks/dolly-v2-3b": {"bits": 4, "sym": False, "group_size": 128, "scale_estimation": True}, "EleutherAI/gpt-j-6b": {"bits": 4, "sym": False, "group_size": 64}, "facebook/opt-6.7b": {"bits": 4, "sym": False, "group_size": 64, "ratio": 0.8}, - "bigscience/bloomz-7b1": {"bits": 4, "sym": False, "group_size": 32, "ratio": 0.6}, "togethercomputer/RedPajama-INCITE-7B-Instruct": {"bits": 4, "sym": False, "group_size": 128}, "HuggingFaceH4/zephyr-7b-beta": { "bits": 4, @@ -44,7 +50,7 @@ "group_size": 128, "ratio": 0.8, "dataset": "wikitext2", - "awq": True, + "quant_method": OVQuantizationMethod.AWQ, }, "meta-llama/Llama-2-7b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.6}, "meta-llama/Llama-2-7b-chat": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8}, @@ -55,7 +61,7 @@ "group_size": 64, "ratio": 0.8, "dataset": "wikitext2", - "awq": True, + "quant_method": OVQuantizationMethod.AWQ, }, "stabilityai/stablelm-zephyr-3b": { "bits": 4, @@ -63,13 +69,13 @@ "group_size": 128, "ratio": 1.0, "dataset": "wikitext2", - "awq": True, + "quant_method": OVQuantizationMethod.AWQ, }, "stabilityai/stable-code-3b": {"bits": 4, "sym": True, "group_size": 64, "ratio": 0.8}, "pansophic/rocket-3B": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8}, "THUDM/chatglm2-6b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.72}, "Qwen/Qwen-7B-Chat": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.6}, - "openlm-research/open_llama_3b": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True}, + "openlm-research/open_llama_3b": {"bits": 4, "sym": False, "group_size": 64, "all_layers": True}, "openlm-research/open_llama_3b_v2": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True}, "tiiuae/falcon-7b-instruct": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True}, "psmathur/orca_mini_3b": { @@ -78,7 +84,7 @@ "group_size": 64, "all_layers": True, "dataset": "wikitext2", - "awq": True, + "quant_method": OVQuantizationMethod.AWQ, }, "bigscience/bloomz-560m": { "bits": 4, @@ -86,11 +92,16 @@ "group_size": 64, "ratio": 0.8, "dataset": "wikitext2", - "awq": True, + "quant_method": OVQuantizationMethod.AWQ, }, "mistralai/Mixtral-8x7B-v0.1": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8}, "facebook/opt-2.7b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.7}, - "togethercomputer/RedPajama-INCITE-Chat-3B-v1": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8}, + "togethercomputer/RedPajama-INCITE-Chat-3B-v1": { + "bits": 4, + "sym": False, + "group_size": 128, + "scale_estimation": True, + }, "lmsys/vicuna-7b-v1.5": {"bits": 4, "sym": False, "group_size": 128, "ratio": 1.0}, "stabilityai/stablelm-tuned-alpha-3b": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8}, "mistralai/Mistral-7B-v0.1": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.9}, @@ -100,8 +111,20 @@ "group_size": 128, "ratio": 0.8, "dataset": "wikitext2", - "awq": True, + "quant_method": OVQuantizationMethod.AWQ, }, + "openai-community/gpt2": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.5, "scale_estimation": True}, + "lmsys/longchat-7b-16k": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.9}, + "bigcode/starcoder2-3b": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.9}, + "TinyLlama/TinyLlama-1.1B-Chat-v1.0": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8}, + "stabilityai/stablelm-tuned-alpha-7b": { + "bits": 4, + "sym": False, + "group_size": 128, + "ratio": 0.6, + "scale_estimation": True, + }, + "microsoft/phi-2": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.9}, } _DEFAULT_4BIT_CONFIG = { @@ -113,12 +136,6 @@ } -class OVQuantizationMethod(str, Enum): - DEFAULT = "default" - HYBRID = "hybrid" - AWQ = "awq" - - @dataclass class OVQuantizationConfigBase(QuantizationConfigMixin): """ diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index f879dc7880..e36ea47df2 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -61,7 +61,13 @@ OVWeightQuantizationConfig, OVDynamicQuantizationConfig, ) -from optimum.intel.openvino.configuration import OVQuantizationMethod, OVQuantizationConfigBase +from optimum.intel.openvino.configuration import ( + OVQuantizationMethod, + OVQuantizationConfigBase, + _DEFAULT_4BIT_CONFIGS, + _DEFAULT_4BIT_CONFIG, +) +from copy import deepcopy from optimum.intel.openvino.quantization import InferRequestWrapper from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version @@ -820,6 +826,13 @@ class OVQuantizationConfigTest(unittest.TestCase): (dict(bits=8, fast_bias_correction=True, weight_only=False), OVQuantizationConfig, None), ) + def get_default_configurations() -> dict: + default_configurations = deepcopy(_DEFAULT_4BIT_CONFIGS) + default_configurations.update({"default": _DEFAULT_4BIT_CONFIG}) + return default_configurations + + DEFAULT_CONFIGURATIONS = get_default_configurations() + @parameterized.expand(QUANTIZATION_CONFIGS) def test_config_serialization(self, quantization_config: OVQuantizationConfigBase): ov_config = OVConfig(quantization_config=quantization_config) @@ -849,6 +862,14 @@ def test_config_from_dict(self, quantization_config: dict, config_type: type, wa if hasattr(ov_config.quantization_config, k): self.assertEqual(getattr(ov_config.quantization_config, k), v) + @parameterized.expand(DEFAULT_CONFIGURATIONS) + def test_named_default_configurations(self, config_id: str): + custom_configuration = self.DEFAULT_CONFIGURATIONS[config_id] + prepared_config = OVModelForCausalLM._prepare_weight_quantization_config(custom_configuration) + for field_name, reference_value in custom_configuration.items(): + value = prepared_config.__getattribute__(field_name) + self.assertEqual(value, reference_value) + class InferRequestWrapperTest(unittest.TestCase): MODEL_ID = ("openai/whisper-tiny.en",)