diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py index 30dfe5ae6f..eb233f3d15 100644 --- a/optimum/intel/openvino/configuration.py +++ b/optimum/intel/openvino/configuration.py @@ -33,18 +33,25 @@ logger = logging.getLogger(__name__) _DEFAULT_4BIT_CONFIGS = { - "databricks/dolly-v2-3b": {"bits": 4, "sym": False, "group_size": 32, "ratio": 0.5}, + "databricks/dolly-v2-3b": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8}, "EleutherAI/gpt-j-6b": {"bits": 4, "sym": False, "group_size": 64}, "facebook/opt-6.7b": {"bits": 4, "sym": False, "group_size": 64, "ratio": 0.8}, "bigscience/bloomz-7b1": {"bits": 4, "sym": False, "group_size": 32, "ratio": 0.6}, "togethercomputer/RedPajama-INCITE-7B-Instruct": {"bits": 4, "sym": False, "group_size": 128}, - "HuggingFaceH4/zephyr-7b-beta": {"bits": 4, "sym": True, "group_size": 64, "ratio": 0.6}, - "meta-llama/Llama-2-7b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.6}, + "HuggingFaceH4/zephyr-7b-beta": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 0.8, + "dataset": "wikitext2", + "awq": True, + }, + "meta-llama/Llama-2-7b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.7}, "meta-llama/Llama-2-7b-chat": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8}, "meta-llama/Llama-2-13b-chat": {"bits": 4, "sym": True, "group_size": 64, "ratio": 0.8}, "stabilityai/stablelm-3b-4e1t": {"bits": 4, "sym": True, "group_size": 64, "ratio": 0.8}, "stablelm-epoch-3b-preview": {"bits": 4, "sym": True, "group_size": 64, "ratio": 0.8}, - "stable-zephyr-3b-dpo": {"bits": 4, "sym": False, "group_size": 64, "ratio": 0.8}, + "stabilityai/stablelm-zephyr-3b": {"bits": 4, "sym": False, "group_size": 128, "ratio": 1.0}, "pansophic/rocket-3B": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8}, "THUDM/chatglm2-6b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.72}, "Qwen/Qwen-7B-Chat": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.6}, @@ -52,6 +59,19 @@ "tiiuae/falcon-7b": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True}, "psmathur/orca_mini_3b": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True}, "mistralai/Mixtral-8x7B-v0.1": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8}, + "facebook/opt-2.7b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.7}, + "togethercomputer/RedPajama-INCITE-Chat-3B-v1": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8}, + "lmsys/vicuna-7b-v1.5": {"bits": 4, "sym": False, "group_size": 128, "ratio": 1.0}, + "stabilityai/stablelm-tuned-alpha-3b": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8}, + "mistralai/Mistral-7B-v0.1": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.9}, + "baichuan-inc/Baichuan2-7B-Chat": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 0.8, + "dataset": "wikitext2", + "awq": True, + }, }