Update NNCF requirement to 2.11; add scale-estimation option

huggingface · Jun 12, 2024 · 3ff19f7 · 3ff19f7
1 parent 0486d80
commit 3ff19f7
Show file tree

Hide file tree

Showing 6 changed files with 28 additions and 2 deletions.
diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
@@ -141,6 +141,16 @@ def parse_args_openvino(parser: "ArgumentParser"):
             "case it will be skipped."
         ),
     )
+    optional_group.add_argument(
+        "--scale-estimation",
+        action="store_true",
+        default=None,
+        help=(
+            "Indicates whether to apply a scale estimation algorithm that minimizes the L2 error between the original "
+            "and compressed layers. Providing a dataset is required to run scale estimation. Please note, that "
+            "applying scale estimation takes additional memory and time."
+        ),
+    )
     optional_group.add_argument(
         "--sensitivity-metric",
         type=str,
@@ -255,6 +265,7 @@ def run(self):
                     "num_samples": self.args.num_samples,
                     "quant_method": QuantizationMethod.AWQ if self.args.awq else None,
                     "sensitivity_metric": self.args.sensitivity_metric,
+                    "scale_estimation": self.args.scale_estimation,
                 }
 
             if self.args.weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}:

diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py
@@ -173,6 +173,9 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
             The maximum number of samples composing the calibration dataset.
         quant_method (`str`, defaults of OVQuantizationMethod.DEFAULT):
             Weight compression method to apply.
+        scale_estimation (`bool`, *optional*):
+            Indicates whether to apply a scale estimation algorithm that minimizes the L2 error between the original and
+            compressed layers. Providing a dataset is required to run scale estimation.
     """
 
     def __init__(
@@ -188,6 +191,7 @@ def __init__(
         ignored_scope: Optional[dict] = None,
         num_samples: Optional[int] = None,
         quant_method: Union[QuantizationMethod, OVQuantizationMethod] = OVQuantizationMethod.DEFAULT,
+        scale_estimation: bool = None,
         **kwargs,
     ):
         super().__init__(bits=bits, sym=sym, ignored_scope=ignored_scope, num_samples=num_samples)
@@ -199,6 +203,7 @@ def __init__(
         self.sensitivity_metric = sensitivity_metric
         self.quant_method = quant_method
         self.post_init()
+        self.scale_estimation = scale_estimation
 
     def post_init(self):
         r"""

diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py
@@ -823,6 +823,7 @@ def _weight_only_quantization(
         ignored_scope=config.get_ignored_scope_instance(),
         dataset=dataset,
         subset_size=config.num_samples if config.num_samples else 128,
+        scale_estimation=config.scale_estimation,
     )
 
 

diff --git a/setup.py b/setup.py
@@ -60,8 +60,8 @@
 
 EXTRAS_REQUIRE = {
     "neural-compressor": ["neural-compressor>=2.2.0", "onnxruntime<1.15.0", "accelerate"],
-    "openvino": ["openvino>=2023.3", "nncf>=2.10.0", "openvino-tokenizers[transformers]"],
-    "nncf": ["nncf>=2.10.0"],
+    "openvino": ["openvino>=2023.3", "nncf>=2.11.0", "openvino-tokenizers[transformers]"],
+    "nncf": ["nncf>=2.11.0"],
     "ipex": ["intel-extension-for-pytorch", "transformers>=4.39.0,<=4.41.2"],
     "diffusers": ["diffusers"],
     "quality": QUALITY_REQUIRE,

diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
@@ -97,6 +97,13 @@ class OVCLIExportTestCase(unittest.TestCase):
             4,
             28,
         ),
+        (
+            "text-generation-with-past",
+            "llama_awq",
+            "int4 --ratio 1.0 --sym --group-size 16 --scale-estimation --dataset wikitext2 --num-samples 100 ",
+            4,
+            28,
+        ),
     ]
 
     def _openvino_export(
@@ -218,6 +225,7 @@ def test_exporters_cli_int4(self, task: str, model_type: str, option: str, expec
             self.assertEqual(expected_int8, num_int8)
             self.assertEqual(expected_int4, num_int4)
             self.assertTrue("--awq" not in option or b"Applying AWQ" in result.stdout)
+            self.assertTrue("--scale-estimation" not in option or b"Applying Scale Estimation" in result.stdout)
 
     def test_exporters_cli_help(self):
         subprocess.run(

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
@@ -230,6 +230,7 @@ class OVWeightCompressionTest(unittest.TestCase):
                 sensitivity_metric="mean_activation_magnitude",
                 dataset="ptb",
                 quant_method=QuantizationMethod.AWQ,
+                scale_estimation=True
             ),
             16,
         ),