From c93c2e7ea990152ce862e0ca3596d483610ced57 Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Tue, 7 Jan 2025 13:47:56 +0100
Subject: [PATCH 1/9] Fp8 implementation

---
 docs/source/openvino/export.mdx         |  9 ++++-----
 optimum/commands/export/openvino.py     |  6 +-----
 optimum/intel/openvino/configuration.py | 20 +++++++++++---------
 optimum/intel/openvino/quantization.py  | 13 ++++++++-----
 4 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/docs/source/openvino/export.mdx b/docs/source/openvino/export.mdx
index 12d82b3e29..83ab3315f5 100644
--- a/docs/source/openvino/export.mdx
+++ b/docs/source/openvino/export.mdx
@@ -31,7 +31,7 @@ Check out the help for more options:
 
 ```text
 usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt,tf}] [--trust-remote-code]
-                                   [--weight-format {fp32,fp16,int8,int4,mxfp4,nf4}] [--quant-mode {int8}]
+                                   [--weight-format {fp32,fp16,int8,int4,mxfp4,nf4}] [--quant-mode {int8,fp8_e4m3,fp8_e5m2}]
                                    [--library {transformers,diffusers,timm,sentence_transformers,open_clip}]
                                    [--cache_dir CACHE_DIR] [--pad-token-id PAD_TOKEN_ID] [--ratio RATIO] [--sym]
                                    [--group-size GROUP_SIZE] [--backup-precision {none,int8_sym,int8_asym}]
@@ -67,10 +67,9 @@ Optional arguments:
                         on your local machine arbitrary code present in the model repository.
   --weight-format {fp32,fp16,int8,int4,mxfp4,nf4}
                         The weight format of the exported model.
-  --quant-mode {int8}
+  --quant-mode {int8,fp8_e4m3,fp8_e5m2}
                         Quantization precision mode. This is used for applying full model quantization including
-                        activations. The only currently supported choice is 'int8' for int8 quantization of both
-                        weights and activations.
+                        activations.
   --library {transformers,diffusers,timm,sentence_transformers,open_clip}
                         The library used to load the model before export. If not provided, will attempt to infer the
                         local checkpoint's library
@@ -166,7 +165,7 @@ Models larger than 1 billion parameters are exported to the OpenVINO format with
 </Tip>
 
 
-Besides weight-only quantization, you can also apply full model quantization including activations by setting `--quant-mode` to `int8`. This will quantize both weights and activations of Linear, Convolutional and some other layers to int8. Currently this is only supported for speech-to-text models. Please see example below.
+Besides weight-only quantization, you can also apply full model quantization including activations by setting `--quant-mode` to preffered precision. This will quantize both weights and activations of Linear, Convolutional and some other layers to selected mode. Currently this is only supported for speech-to-text models. Please see example below.
 
 ```bash
 optimum-cli export openvino -m openai/whisper-large-v3-turbo --quant-mode int8 --dataset librispeech --num-samples 32 --smooth-quant-alpha 0.9 ./whisper-large-v3-turbo
diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
index 7a0b7d7f3b..67510e0bc8 100644
--- a/optimum/commands/export/openvino.py
+++ b/optimum/commands/export/openvino.py
@@ -78,11 +78,10 @@ def parse_args_openvino(parser: "ArgumentParser"):
     optional_group.add_argument(
         "--quant-mode",
         type=str,
-        choices=["int8"],
+        choices=["int8", "fp8_e4m3", "fp8_e5m2"],
         default=None,
         help=(
             "Quantization precision mode. This is used for applying full model quantization including activations. "
-            "The only currently supported choice is 'int8' for int8 quantization of both weights and activations."
         ),
     )
     optional_group.add_argument(
@@ -365,9 +364,6 @@ def run(self):
                 quantization_config["trust_remote_code"] = self.args.trust_remote_code
             ov_config = OVConfig(quantization_config=quantization_config)
         else:
-            if self.args.quant_mode != "int8":
-                raise ValueError("Only 'int8' quantization mode is currently supported.")
-
             quantization_config = {
                 "weight_format": self.args.quant_mode,
                 "activation_format": self.args.quant_mode,
diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py
index 5bf0337f38..777c96dbc9 100644
--- a/optimum/intel/openvino/configuration.py
+++ b/optimum/intel/openvino/configuration.py
@@ -638,9 +638,9 @@ def __init__(
                 SmoothQuant alpha parameter that improves the distribution of activations before MatMul layers and
                 reduces quantization error.
             weight_format (`str`, defaults to "int8"):
-                Data format weights are quantized to. Possible values: ['int8'].
+                Data format weights are quantized to. Possible values: ['int8', 'fp8_e4m3', 'fp8_e5m2'].
             activation_format (`str`, defaults to "int8"):
-                Data format activations are compressed to. Possible values: ['int8'].
+                Data format activations are compressed to. Possible values: ['int8', 'fp8_e4m3', 'fp8_e5m2'].
         """
         super().__init__(
             bits=bits,
@@ -681,11 +681,15 @@ def post_init(self):
                 f"SmoothQuant alpha parameter must be in range [0, 1], but found {self.smooth_quant_alpha}"
             )
 
-        if self.weight_format != "int8":
-            raise ValueError("Only 'int8' weight format is currently supported.")
-
-        if self.activation_format != "int8":
-            raise ValueError("Only 'int8' activation format is currently supported.")
+        if not self.sym:
+            if self.activation_format != "int8":
+                raise ValueError(
+                    f"Asymmetric quantization can not be performed in {self.activation_format} activation format."
+                )
+            if self.weight_format != "int8":
+                raise ValueError(
+                    f"Asymmetric quantization can not be performed in {self.weight_format} weight format."
+                )
 
 
 class OVConfig(BaseConfig):
@@ -713,8 +717,6 @@ def __init__(
         if self.quantization_config is not None:
             if isinstance(self.quantization_config, OVWeightQuantizationConfig):
                 self.dtype = self.quantization_config.weight_format
-            else:
-                self.dtype = "int8"
         else:
             self.dtype = dtype
 
diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py
index 962738e0e1..e8cfc5007f 100644
--- a/optimum/intel/openvino/quantization.py
+++ b/optimum/intel/openvino/quantization.py
@@ -458,11 +458,6 @@ def _quantize_ovbasemodel(
             if calibration_dataset is None:
                 raise ValueError("Calibration dataset is required to run quantization.")
 
-            if quantization_config.weight_format != "int8":
-                raise ValueError("Only 'int8' weight format is currently supported.")
-            if quantization_config.activation_format != "int8":
-                raise ValueError("Only 'int8' activation format is currently supported.")
-
             # Quantize model(s)
             if isinstance(self.model, _OVModelForWhisper):
                 self._quantize_whisper_model(quantization_config, calibration_dataset, **kwargs)
@@ -1071,6 +1066,14 @@ def _full_quantization(
             matmul=quantization_config.smooth_quant_alpha
         )
 
+    q_mode_map = {
+        "fp8_e4m3": nncf.QuantizationMode.FP8_E4M3,
+        "fp8_e5m2": nncf.QuantizationMode.FP8_E5M2,
+    }
+
+    if quantization_config.activation_format in q_mode_map:
+        kwargs.update({"mode": q_mode_map[quantization_config.activation_format]})
+
     quantized_model = nncf.quantize(
         model,
         calibration_dataset,

From 44f11a7bd89474c55ddf6613838ba7b0976d0d4a Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Tue, 7 Jan 2025 16:34:15 +0100
Subject: [PATCH 2/9] All datasets support

---
 optimum/intel/openvino/configuration.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py
index 777c96dbc9..fbaa450949 100644
--- a/optimum/intel/openvino/configuration.py
+++ b/optimum/intel/openvino/configuration.py
@@ -669,13 +669,6 @@ def post_init(self):
         if self.bits != 8:
             raise ValueError(f"Only support 8-bit for static quantization but found {self.bits}")
 
-        if self.dataset is not None:
-            if self.dataset not in PREDEFINED_SPEECH_TO_TEXT_DATASETS:
-                raise ValueError(
-                    f"You have entered the following string value for dataset: {self.dataset}. But it is not supported."
-                    f" Currently you can only choose {list(PREDEFINED_SPEECH_TO_TEXT_DATASETS.keys())}."
-                )
-
         if self.smooth_quant_alpha is not None and not (0 <= self.smooth_quant_alpha <= 1):
             raise ValueError(
                 f"SmoothQuant alpha parameter must be in range [0, 1], but found {self.smooth_quant_alpha}"

From b54abf1953034c0f4ad0569e5b1ed9ae849c4122 Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Wed, 8 Jan 2025 12:12:01 +0100
Subject: [PATCH 3/9] Added test

---
 docs/source/openvino/export.mdx         |  4 ++--
 optimum/commands/export/openvino.py     |  2 +-
 optimum/intel/openvino/configuration.py |  6 ++---
 optimum/intel/openvino/quantization.py  |  4 ++--
 tests/openvino/test_exporters_cli.py    | 20 ++++++++++++----
 tests/openvino/utils_tests.py           | 32 ++++++++++++-------------
 6 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/docs/source/openvino/export.mdx b/docs/source/openvino/export.mdx
index 83ab3315f5..3762febb6c 100644
--- a/docs/source/openvino/export.mdx
+++ b/docs/source/openvino/export.mdx
@@ -31,7 +31,7 @@ Check out the help for more options:
 
 ```text
 usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt,tf}] [--trust-remote-code]
-                                   [--weight-format {fp32,fp16,int8,int4,mxfp4,nf4}] [--quant-mode {int8,fp8_e4m3,fp8_e5m2}]
+                                   [--weight-format {fp32,fp16,int8,int4,mxfp4,nf4}] [--quant-mode {int8,f8e4m3,f8e5m2}]
                                    [--library {transformers,diffusers,timm,sentence_transformers,open_clip}]
                                    [--cache_dir CACHE_DIR] [--pad-token-id PAD_TOKEN_ID] [--ratio RATIO] [--sym]
                                    [--group-size GROUP_SIZE] [--backup-precision {none,int8_sym,int8_asym}]
@@ -67,7 +67,7 @@ Optional arguments:
                         on your local machine arbitrary code present in the model repository.
   --weight-format {fp32,fp16,int8,int4,mxfp4,nf4}
                         The weight format of the exported model.
-  --quant-mode {int8,fp8_e4m3,fp8_e5m2}
+  --quant-mode {int8,f8e4m3,f8e5m2}
                         Quantization precision mode. This is used for applying full model quantization including
                         activations.
   --library {transformers,diffusers,timm,sentence_transformers,open_clip}
diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
index 67510e0bc8..20e2d7ca33 100644
--- a/optimum/commands/export/openvino.py
+++ b/optimum/commands/export/openvino.py
@@ -78,7 +78,7 @@ def parse_args_openvino(parser: "ArgumentParser"):
     optional_group.add_argument(
         "--quant-mode",
         type=str,
-        choices=["int8", "fp8_e4m3", "fp8_e5m2"],
+        choices=["int8", "f8e4m3", "f8e5m2"],
         default=None,
         help=(
             "Quantization precision mode. This is used for applying full model quantization including activations. "
diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py
index fbaa450949..cfe2c9c60e 100644
--- a/optimum/intel/openvino/configuration.py
+++ b/optimum/intel/openvino/configuration.py
@@ -26,7 +26,7 @@
 from optimum.configuration_utils import BaseConfig
 
 from ..utils.import_utils import is_nncf_available
-from .utils import PREDEFINED_SD_DATASETS, PREDEFINED_SPEECH_TO_TEXT_DATASETS, PREDEFINED_VISUAL_LM_DATASETS
+from .utils import PREDEFINED_SD_DATASETS, PREDEFINED_VISUAL_LM_DATASETS
 
 
 if is_nncf_available():
@@ -638,9 +638,9 @@ def __init__(
                 SmoothQuant alpha parameter that improves the distribution of activations before MatMul layers and
                 reduces quantization error.
             weight_format (`str`, defaults to "int8"):
-                Data format weights are quantized to. Possible values: ['int8', 'fp8_e4m3', 'fp8_e5m2'].
+                Data format weights are quantized to. Possible values: ['int8', 'f8e4m3', 'f8e5m2'].
             activation_format (`str`, defaults to "int8"):
-                Data format activations are compressed to. Possible values: ['int8', 'fp8_e4m3', 'fp8_e5m2'].
+                Data format activations are compressed to. Possible values: ['int8', 'f8e4m3', 'f8e5m2'].
         """
         super().__init__(
             bits=bits,
diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py
index e8cfc5007f..5f780cd3a7 100644
--- a/optimum/intel/openvino/quantization.py
+++ b/optimum/intel/openvino/quantization.py
@@ -1067,8 +1067,8 @@ def _full_quantization(
         )
 
     q_mode_map = {
-        "fp8_e4m3": nncf.QuantizationMode.FP8_E4M3,
-        "fp8_e5m2": nncf.QuantizationMode.FP8_E5M2,
+        "f8e4m3": nncf.QuantizationMode.FP8_E4M3,
+        "f8e5m2": nncf.QuantizationMode.FP8_E5M2,
     }
 
     if quantization_config.activation_format in q_mode_map:
diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
index f03b4fbc57..02541c2493 100644
--- a/tests/openvino/test_exporters_cli.py
+++ b/tests/openvino/test_exporters_cli.py
@@ -114,7 +114,16 @@ class OVCLIExportTestCase(unittest.TestCase):
         (
             "automatic-speech-recognition",
             "whisper",
-            "--quant-mode int8 --dataset librispeech --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code",
+            "int8",
+            "--dataset librispeech --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code",
+            (14, 22, 21) if is_transformers_version("<=", "4.36.0") else (14, 22, 25),
+            (14, 21, 17) if is_transformers_version("<=", "4.36.0") else (14, 22, 18),
+        ),
+        (
+            "automatic-speech-recognition",
+            "whisper",
+            "f8e4m3",
+            "--dataset librispeech --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code --sym",
             (14, 22, 21) if is_transformers_version("<=", "4.36.0") else (14, 22, 25),
             (14, 21, 17) if is_transformers_version("<=", "4.36.0") else (14, 22, 18),
         ),
@@ -407,13 +416,14 @@ def test_exporters_cli_full_quantization(
         self,
         task: str,
         model_type: str,
+        quant_mode: str,
         option: str,
         expected_num_fq_nodes_per_model: Tuple[int],
         expected_num_weight_nodes_per_model: Tuple[int],
     ):
         with TemporaryDirectory() as tmpdir:
             subprocess.run(
-                f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} {option} {tmpdir}",
+                f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --quant-mode {quant_mode} {option} {tmpdir}",
                 shell=True,
                 check=True,
             )
@@ -424,9 +434,9 @@ def test_exporters_cli_full_quantization(
                 submodels = [model.encoder, model.decoder, model.decoder_with_past]
             self.assertEqual(len(expected_num_fq_nodes_per_model), len(submodels))
             for i, model in enumerate(submodels):
-                actual_num_fq_nodes, actual_num_weight_nodes = get_num_quantized_nodes(model)
-                self.assertEqual(expected_num_fq_nodes_per_model[i], actual_num_fq_nodes)
-                self.assertEqual(expected_num_weight_nodes_per_model[i], actual_num_weight_nodes["int8"])
+                actual_num_f_nodes, actual_num_weight_nodes = get_num_quantized_nodes(model)
+                self.assertEqual(expected_num_fq_nodes_per_model[i], actual_num_f_nodes)
+                self.assertEqual(expected_num_weight_nodes_per_model[i], actual_num_weight_nodes[quant_mode])
 
     def test_exporters_cli_int4_with_local_model_and_default_config(self):
         with TemporaryDirectory() as tmpdir:
diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
index 02b81bfdc5..eb8406e9ae 100644
--- a/tests/openvino/utils_tests.py
+++ b/tests/openvino/utils_tests.py
@@ -203,29 +203,29 @@
 
 def get_num_quantized_nodes(model):
     num_fake_quantize = 0
-    num_weight_nodes = {
-        "int8": 0,
-        "int4": 0,
-        "f4e2m1": 0,
-        "f8e8m0": 0,
-        "nf4": 0,
+    types_map = {
+        "i8": "int8",
+        "u8": "int8",
+        "i4": "int4",
+        "u4": "int4",
+        "f4e2m1": "f4e2m1",
+        "f8e8m0": "f8e8m0",
+        "nf4": "nf4",
+        "f8e4m3": "f8e4m3",
+        "f8e5m2": "f8e5m2",
     }
+    num_weight_nodes = {n: 0 for n in types_map.values()}
     ov_model = model if isinstance(model, ov.Model) else model.model
     for elem in ov_model.get_ops():
         if "FakeQuantize" in elem.name:
             num_fake_quantize += 1
+        elif "FakeConvert" in elem.name:
+            num_fake_quantize += 1
         for i in range(elem.get_output_size()):
             type_name = elem.get_output_element_type(i).get_type_name()
-            if type_name in ["i8", "u8"]:
-                num_weight_nodes["int8"] += 1
-            if type_name in ["i4", "u4"]:
-                num_weight_nodes["int4"] += 1
-            if type_name == "f4e2m1":
-                num_weight_nodes["f4e2m1"] += 1
-            if type_name == "f8e8m0":
-                num_weight_nodes["f8e8m0"] += 1
-            if type_name == "nf4":
-                num_weight_nodes["nf4"] += 1
+            if type_name in types_map:
+                name = types_map[type_name]
+                num_weight_nodes[name] += 1
     return num_fake_quantize, num_weight_nodes
 
 

From 6f5cd5bc079318cffbe42b1e1ae4b77c55368fe3 Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Wed, 8 Jan 2025 13:42:25 +0100
Subject: [PATCH 4/9] Update test

---
 tests/openvino/test_exporters_cli.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
index 02541c2493..310f9a596b 100644
--- a/tests/openvino/test_exporters_cli.py
+++ b/tests/openvino/test_exporters_cli.py
@@ -120,12 +120,12 @@ class OVCLIExportTestCase(unittest.TestCase):
             (14, 21, 17) if is_transformers_version("<=", "4.36.0") else (14, 22, 18),
         ),
         (
-            "automatic-speech-recognition",
-            "whisper",
+            "text-generation",
+            "phi3",
             "f8e4m3",
-            "--dataset librispeech --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code --sym",
-            (14, 22, 21) if is_transformers_version("<=", "4.36.0") else (14, 22, 25),
-            (14, 21, 17) if is_transformers_version("<=", "4.36.0") else (14, 22, 18),
+            "--dataset wikitext2 --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code --sym",
+            (13,),
+            (10,),
         ),
     ]
 
@@ -429,11 +429,11 @@ def test_exporters_cli_full_quantization(
             )
             model = eval(_HEAD_TO_AUTOMODELS[task]).from_pretrained(tmpdir)
 
-            submodels = []
+            models = [model]
             if task == "automatic-speech-recognition":
-                submodels = [model.encoder, model.decoder, model.decoder_with_past]
-            self.assertEqual(len(expected_num_fq_nodes_per_model), len(submodels))
-            for i, model in enumerate(submodels):
+                models = [model.encoder, model.decoder, model.decoder_with_past]
+            self.assertEqual(len(expected_num_fq_nodes_per_model), len(models))
+            for i, model in enumerate(models):
                 actual_num_f_nodes, actual_num_weight_nodes = get_num_quantized_nodes(model)
                 self.assertEqual(expected_num_fq_nodes_per_model[i], actual_num_f_nodes)
                 self.assertEqual(expected_num_weight_nodes_per_model[i], actual_num_weight_nodes[quant_mode])

From ac7b57a6bf50f7fb9a4f486365eab0725cf6ab77 Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Wed, 8 Jan 2025 13:43:48 +0100
Subject: [PATCH 5/9] Correctness

---
 tests/openvino/utils_tests.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
index eb8406e9ae..760c98bbb4 100644
--- a/tests/openvino/utils_tests.py
+++ b/tests/openvino/utils_tests.py
@@ -202,7 +202,7 @@
 
 
 def get_num_quantized_nodes(model):
-    num_fake_quantize = 0
+    num_fake_nodes = 0
     types_map = {
         "i8": "int8",
         "u8": "int8",
@@ -218,15 +218,15 @@ def get_num_quantized_nodes(model):
     ov_model = model if isinstance(model, ov.Model) else model.model
     for elem in ov_model.get_ops():
         if "FakeQuantize" in elem.name:
-            num_fake_quantize += 1
+            num_fake_nodes += 1
         elif "FakeConvert" in elem.name:
-            num_fake_quantize += 1
+            num_fake_nodes += 1
         for i in range(elem.get_output_size()):
             type_name = elem.get_output_element_type(i).get_type_name()
             if type_name in types_map:
                 name = types_map[type_name]
                 num_weight_nodes[name] += 1
-    return num_fake_quantize, num_weight_nodes
+    return num_fake_nodes, num_weight_nodes
 
 
 @contextmanager

From 2df7fc4e65153fd97d93f8b0dc586faf77e7d96e Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Wed, 8 Jan 2025 13:44:45 +0100
Subject: [PATCH 6/9] Correctness

---
 tests/openvino/utils_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
index 760c98bbb4..ce6231fb5f 100644
--- a/tests/openvino/utils_tests.py
+++ b/tests/openvino/utils_tests.py
@@ -219,7 +219,7 @@ def get_num_quantized_nodes(model):
     for elem in ov_model.get_ops():
         if "FakeQuantize" in elem.name:
             num_fake_nodes += 1
-        elif "FakeConvert" in elem.name:
+        if "FakeConvert" in elem.name:
             num_fake_nodes += 1
         for i in range(elem.get_output_size()):
             type_name = elem.get_output_element_type(i).get_type_name()

From 710f50ac28b57a4287cd8a64759f76286e24e74d Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Wed, 8 Jan 2025 16:09:21 +0100
Subject: [PATCH 7/9] Update docs/source/openvino/export.mdx

Co-authored-by: Alexander Kozlov <alexander.kozlov@intel.com>
---
 docs/source/openvino/export.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/openvino/export.mdx b/docs/source/openvino/export.mdx
index 3762febb6c..1d0c534193 100644
--- a/docs/source/openvino/export.mdx
+++ b/docs/source/openvino/export.mdx
@@ -165,7 +165,7 @@ Models larger than 1 billion parameters are exported to the OpenVINO format with
 </Tip>
 
 
-Besides weight-only quantization, you can also apply full model quantization including activations by setting `--quant-mode` to preffered precision. This will quantize both weights and activations of Linear, Convolutional and some other layers to selected mode. Currently this is only supported for speech-to-text models. Please see example below.
+Besides weight-only quantization, you can also apply full model quantization including activations by setting `--quant-mode` to preffered precision. This will quantize both weights and activations of Linear, Convolutional and some other layers to selected mode. Please see example below.
 
 ```bash
 optimum-cli export openvino -m openai/whisper-large-v3-turbo --quant-mode int8 --dataset librispeech --num-samples 32 --smooth-quant-alpha 0.9 ./whisper-large-v3-turbo

From 3174ef02d5b9bcda27aff4ceaaec3ae392c1e018 Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Wed, 8 Jan 2025 17:09:55 +0100
Subject: [PATCH 8/9] Change test model

---
 tests/openvino/test_exporters_cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
index 310f9a596b..3d31e35943 100644
--- a/tests/openvino/test_exporters_cli.py
+++ b/tests/openvino/test_exporters_cli.py
@@ -121,11 +121,11 @@ class OVCLIExportTestCase(unittest.TestCase):
         ),
         (
             "text-generation",
-            "phi3",
+            "llama",
             "f8e4m3",
             "--dataset wikitext2 --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code --sym",
             (13,),
-            (10,),
+            (16,),
         ),
     ]
 

From 0a8e3e77f3914176ce0bcab79652585dffb1c849 Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Tue, 14 Jan 2025 10:12:02 +0100
Subject: [PATCH 9/9] Apply comments

---
 optimum/intel/openvino/configuration.py | 20 ++++++++------------
 tests/openvino/test_exporters_cli.py    |  8 ++++----
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py
index cfe2c9c60e..cb09110b61 100644
--- a/optimum/intel/openvino/configuration.py
+++ b/optimum/intel/openvino/configuration.py
@@ -658,6 +658,13 @@ def __init__(
         self.overflow_fix = overflow_fix
         self.smooth_quant_alpha = smooth_quant_alpha
         self.activation_format = activation_format
+
+        f8_formats = ["f8e4m3", "f8e5m2"]
+        if self.activation_format in f8_formats and self.weight_format in f8_formats:
+            logger.info(
+                f"{self.activation_format} for activations and {self.weight_format} weights were found. A symmetrical scheme will be used."
+            )
+            self.sym = True
         self.post_init()
 
     def post_init(self):
@@ -674,16 +681,6 @@ def post_init(self):
                 f"SmoothQuant alpha parameter must be in range [0, 1], but found {self.smooth_quant_alpha}"
             )
 
-        if not self.sym:
-            if self.activation_format != "int8":
-                raise ValueError(
-                    f"Asymmetric quantization can not be performed in {self.activation_format} activation format."
-                )
-            if self.weight_format != "int8":
-                raise ValueError(
-                    f"Asymmetric quantization can not be performed in {self.weight_format} weight format."
-                )
-
 
 class OVConfig(BaseConfig):
     CONFIG_NAME = "openvino_config.json"
@@ -708,8 +705,7 @@ def __init__(
             "compression", None
         )  # A field for backward-compatability of training-time compression parameters
         if self.quantization_config is not None:
-            if isinstance(self.quantization_config, OVWeightQuantizationConfig):
-                self.dtype = self.quantization_config.weight_format
+            self.dtype = self.quantization_config.weight_format
         else:
             self.dtype = dtype
 
diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
index 3d31e35943..840c6d4eb3 100644
--- a/tests/openvino/test_exporters_cli.py
+++ b/tests/openvino/test_exporters_cli.py
@@ -123,7 +123,7 @@ class OVCLIExportTestCase(unittest.TestCase):
             "text-generation",
             "llama",
             "f8e4m3",
-            "--dataset wikitext2 --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code --sym",
+            "--dataset wikitext2 --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code",
             (13,),
             (16,),
         ),
@@ -418,7 +418,7 @@ def test_exporters_cli_full_quantization(
         model_type: str,
         quant_mode: str,
         option: str,
-        expected_num_fq_nodes_per_model: Tuple[int],
+        expected_num_f_nodes_per_model: Tuple[int],
         expected_num_weight_nodes_per_model: Tuple[int],
     ):
         with TemporaryDirectory() as tmpdir:
@@ -432,10 +432,10 @@ def test_exporters_cli_full_quantization(
             models = [model]
             if task == "automatic-speech-recognition":
                 models = [model.encoder, model.decoder, model.decoder_with_past]
-            self.assertEqual(len(expected_num_fq_nodes_per_model), len(models))
+            self.assertEqual(len(expected_num_f_nodes_per_model), len(models))
             for i, model in enumerate(models):
                 actual_num_f_nodes, actual_num_weight_nodes = get_num_quantized_nodes(model)
-                self.assertEqual(expected_num_fq_nodes_per_model[i], actual_num_f_nodes)
+                self.assertEqual(expected_num_f_nodes_per_model[i], actual_num_f_nodes)
                 self.assertEqual(expected_num_weight_nodes_per_model[i], actual_num_weight_nodes[quant_mode])
 
     def test_exporters_cli_int4_with_local_model_and_default_config(self):