Update DQ query

smirnov-alexey · smirnov-alexey · commit ba70ef19c167 · 2025-01-09T11:59:39.000Z
diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp
@@ -475,13 +475,7 @@ std::optional<NPUDesc> extract_npu_descriptor(ov::Core& core) {
     }
     const auto arch = core.get_property("NPU", ov::device::architecture);
     const auto max_tiles = core.get_property("NPU", ov::intel_npu::max_tiles);
-
-    bool compiler_dq = false;
-    const auto device_caps = core.get_property("NPU", ov::device::capabilities);
-    if (std::find(device_caps.begin(), device_caps.end(),
-                  "COMPILER_DYNAMIC_QUANTIZATION") != device_caps.end()) {
-        compiler_dq = true;
-    }
+    const auto compiler_dq = core.get_property("NPU", ov::intel_npu::compiler_dynamic_quantization);
     return std::make_optional(NPUDesc{arch, max_tiles, compiler_dq});
 }
 
@@ -526,6 +520,7 @@ ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model,
     }
     if (npudesc.has_value() && npudesc->compiler_dq) {
         config.emplace("NPUW_DQ_FULL", "NO");
+        config.emplace("NPU_COMPILATION_MODE_PARAMS", "enable-weights-dynamic-dequantization=true");
     }
     return config;
 }
@@ -547,6 +542,7 @@ ov::AnyMap get_default_generate_config(const std::shared_ptr<ov::Model>& model,
     }
     if (npudesc.has_value() && npudesc->compiler_dq) {
         config.emplace("NPUW_DQ_FULL", "NO");
+        config.emplace("NPU_COMPILATION_MODE_PARAMS", "enable-weights-dynamic-dequantization=true");
     }
     return config;
 }

Original file line number	Diff line number	Diff line change
`@@ -475,13 +475,7 @@ std::optional<NPUDesc> extract_npu_descriptor(ov::Core& core) {`
`475`	`475`	`}`
`476`	`476`	`const auto arch = core.get_property("NPU", ov::device::architecture);`
`477`	`477`	`const auto max_tiles = core.get_property("NPU", ov::intel_npu::max_tiles);`
`478`		`-`
`479`		`- bool compiler_dq = false;`
`480`		`- const auto device_caps = core.get_property("NPU", ov::device::capabilities);`
`481`		`- if (std::find(device_caps.begin(), device_caps.end(),`
`482`		`- "COMPILER_DYNAMIC_QUANTIZATION") != device_caps.end()) {`
`483`		`- compiler_dq = true;`
`484`		`- }`
	`478`	`+ const auto compiler_dq = core.get_property("NPU", ov::intel_npu::compiler_dynamic_quantization);`
`485`	`479`	`return std::make_optional(NPUDesc{arch, max_tiles, compiler_dq});`
`486`	`480`	`}`
`487`	`481`
`@@ -526,6 +520,7 @@ ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model,`
`526`	`520`	`}`
`527`	`521`	`if (npudesc.has_value() && npudesc->compiler_dq) {`
`528`	`522`	`config.emplace("NPUW_DQ_FULL", "NO");`
	`523`	`+ config.emplace("NPU_COMPILATION_MODE_PARAMS", "enable-weights-dynamic-dequantization=true");`
`529`	`524`	`}`
`530`	`525`	`return config;`
`531`	`526`	`}`
`@@ -547,6 +542,7 @@ ov::AnyMap get_default_generate_config(const std::shared_ptr<ov::Model>& model,`
`547`	`542`	`}`
`548`	`543`	`if (npudesc.has_value() && npudesc->compiler_dq) {`
`549`	`544`	`config.emplace("NPUW_DQ_FULL", "NO");`
	`545`	`+ config.emplace("NPU_COMPILATION_MODE_PARAMS", "enable-weights-dynamic-dequantization=true");`
`550`	`546`	`}`
`551`	`547`	`return config;`
`552`	`548`	`}`