Skip to content

Commit

Permalink
Update NNCF requirement to 2.11; add scale-estimation option
Browse files Browse the repository at this point in the history
  • Loading branch information
nikita-savelyevv committed Jun 12, 2024
1 parent 0486d80 commit 3ff19f7
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 2 deletions.
11 changes: 11 additions & 0 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,16 @@ def parse_args_openvino(parser: "ArgumentParser"):
"case it will be skipped."
),
)
optional_group.add_argument(
"--scale-estimation",
action="store_true",
default=None,
help=(
"Indicates whether to apply a scale estimation algorithm that minimizes the L2 error between the original "
"and compressed layers. Providing a dataset is required to run scale estimation. Please note, that "
"applying scale estimation takes additional memory and time."
),
)
optional_group.add_argument(
"--sensitivity-metric",
type=str,
Expand Down Expand Up @@ -255,6 +265,7 @@ def run(self):
"num_samples": self.args.num_samples,
"quant_method": QuantizationMethod.AWQ if self.args.awq else None,
"sensitivity_metric": self.args.sensitivity_metric,
"scale_estimation": self.args.scale_estimation,
}

if self.args.weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}:
Expand Down
5 changes: 5 additions & 0 deletions optimum/intel/openvino/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
The maximum number of samples composing the calibration dataset.
quant_method (`str`, defaults of OVQuantizationMethod.DEFAULT):
Weight compression method to apply.
scale_estimation (`bool`, *optional*):
Indicates whether to apply a scale estimation algorithm that minimizes the L2 error between the original and
compressed layers. Providing a dataset is required to run scale estimation.
"""

def __init__(
Expand All @@ -188,6 +191,7 @@ def __init__(
ignored_scope: Optional[dict] = None,
num_samples: Optional[int] = None,
quant_method: Union[QuantizationMethod, OVQuantizationMethod] = OVQuantizationMethod.DEFAULT,
scale_estimation: bool = None,
**kwargs,
):
super().__init__(bits=bits, sym=sym, ignored_scope=ignored_scope, num_samples=num_samples)
Expand All @@ -199,6 +203,7 @@ def __init__(
self.sensitivity_metric = sensitivity_metric
self.quant_method = quant_method
self.post_init()
self.scale_estimation = scale_estimation

def post_init(self):
r"""
Expand Down
1 change: 1 addition & 0 deletions optimum/intel/openvino/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,7 @@ def _weight_only_quantization(
ignored_scope=config.get_ignored_scope_instance(),
dataset=dataset,
subset_size=config.num_samples if config.num_samples else 128,
scale_estimation=config.scale_estimation,
)


Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@

EXTRAS_REQUIRE = {
"neural-compressor": ["neural-compressor>=2.2.0", "onnxruntime<1.15.0", "accelerate"],
"openvino": ["openvino>=2023.3", "nncf>=2.10.0", "openvino-tokenizers[transformers]"],
"nncf": ["nncf>=2.10.0"],
"openvino": ["openvino>=2023.3", "nncf>=2.11.0", "openvino-tokenizers[transformers]"],
"nncf": ["nncf>=2.11.0"],
"ipex": ["intel-extension-for-pytorch", "transformers>=4.39.0,<=4.41.2"],
"diffusers": ["diffusers"],
"quality": QUALITY_REQUIRE,
Expand Down
8 changes: 8 additions & 0 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,13 @@ class OVCLIExportTestCase(unittest.TestCase):
4,
28,
),
(
"text-generation-with-past",
"llama_awq",
"int4 --ratio 1.0 --sym --group-size 16 --scale-estimation --dataset wikitext2 --num-samples 100 ",
4,
28,
),
]

def _openvino_export(
Expand Down Expand Up @@ -218,6 +225,7 @@ def test_exporters_cli_int4(self, task: str, model_type: str, option: str, expec
self.assertEqual(expected_int8, num_int8)
self.assertEqual(expected_int4, num_int4)
self.assertTrue("--awq" not in option or b"Applying AWQ" in result.stdout)
self.assertTrue("--scale-estimation" not in option or b"Applying Scale Estimation" in result.stdout)

def test_exporters_cli_help(self):
subprocess.run(
Expand Down
1 change: 1 addition & 0 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ class OVWeightCompressionTest(unittest.TestCase):
sensitivity_metric="mean_activation_magnitude",
dataset="ptb",
quant_method=QuantizationMethod.AWQ,
scale_estimation=True
),
16,
),
Expand Down

0 comments on commit 3ff19f7

Please sign in to comment.