openvinotoolkit · DanLiu2Intel · May 26, 2025 · Jul 20, 2025 · Sep 28, 2025 · Nov 6, 2025
@@ -820,7 +820,7 @@ struct COMPILER_TYPE final : OptionBase<COMPILER_TYPE, ov::intel_npu::CompilerTy
     }
 
     static ov::intel_npu::CompilerType defaultValue() {
-        return ov::intel_npu::CompilerType::DRIVER;
+        return ov::intel_npu::CompilerType::PLUGIN;
     }
 
     static ov::intel_npu::CompilerType parse(std::string_view val) {

@@ -53,6 +53,10 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
         return _mutex;
     }
 
+    bool init_completed() {
+        return _init_completed;
+    }
+
     virtual void set_last_submitted_event(const std::shared_ptr<Event>& event, size_t indexOfCommandList) = 0;
     virtual const std::shared_ptr<Event>& get_last_submitted_event(size_t indexOfCommandList) const = 0;
     virtual void resize_last_submitted_event(size_t batch) = 0;
@@ -68,6 +72,7 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
     // Used to protect zero pipeline creation in the graph. The pipeline should be created only once per graph when the
     // first inference starts running
     std::mutex _mutex;
+    bool _init_completed = false;
 };
 
 }  // namespace intel_npu
@@ -0,0 +1,57 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <optional>
+
+#include "compiler.h"
+#include "intel_npu/common/filtered_config.hpp"
+#include "intel_npu/icompiler.hpp"
+#include "openvino/core/except.hpp"
+
+namespace intel_npu {
+
+class VCLCompilerImpl final : public intel_npu::ICompiler {
+public:
+    VCLCompilerImpl();
+    ~VCLCompilerImpl() override;
+    static const std::shared_ptr<VCLCompilerImpl> getInstance();
+
+    NetworkDescription compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+
+    std::vector<std::shared_ptr<NetworkDescription>> compileWsOneShot(const std::shared_ptr<ov::Model>& model,
+                                                                      const Config& config) const override;
+
+    NetworkDescription compileWsIterative(const std::shared_ptr<ov::Model>& model,
+                                          const Config& config,
+                                          size_t callNumber) const override;
+
+    ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+
+    NetworkMetadata parse(const std::vector<uint8_t>& network, const Config& config) const override;
+
+    uint32_t get_version() const override;
+
+    std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
+                                                            const std::vector<uint8_t>& network,
+                                                            const intel_npu::Config& config) const final override;
+
+    bool get_supported_options(std::vector<char>& options) const;
+
+    bool is_option_supported(const std::string& option, std::optional<std::string> optValue = std::nullopt) const;
+
+    std::shared_ptr<void> getLinkedLibrary() const;
+
+private:
+    vcl_log_handle_t _logHandle = nullptr;
+    vcl_compiler_handle_t _compilerHandle = nullptr;
+    vcl_compiler_properties_t _compilerProperties;
+    vcl_version_info_t _vclVersion;
+    vcl_version_info_t _vclProfilingVersion;
+    Logger _logger;
+};
+
+}  // namespace intel_npu
@@ -0,0 +1,28 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_npu/network_metadata.hpp"
+#include "openvino/core/model.hpp"
+
+namespace intel_npu {
+
+bool isInitMetadata(const NetworkMetadata& networkMetadata);
+
+/**
+ * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon
+ * serialization.
+ * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information
+ * regarding the offset of the weights within the binary file, as well as the original size and precision. This
+ * information is required within the "weights separation" flow, therefore this function is here to store it.
+ * @note Not calling this function in the weights separation flow would lead to this information being lost upon
+ * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent
+ * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be
+ * misinformed and lookups of weights offsets could fail.
+ *
+ * @param model Both source and target.
+ */
+void storeWeightlessCacheAttribute(const std::shared_ptr<ov::Model>& model);
+}  // namespace intel_npu
@@ -40,13 +40,23 @@ class XmlSerializer : public ov::util::XmlSerializer {
                                   false,
                                   ov::element::dynamic,
                                   false),
+          m_base_constant_writer(std::ref(constant_write_handler)),
           m_weightless_constant_writer(weightless_constant_writer
                                            ? weightless_constant_writer
                                            : std::make_shared<WeightlessWriter>(constant_write_handler)) {}
 
 private:
+    /**
+     * @brief Toggles between the two writers.
+     */
     ov::util::ConstantWriter& get_constant_write_handler() override;
 
+    /**
+     * @brief Overriden in order to choose which weights writer will be used based on the occurrence of the
+     * "WeightsPointerAttribute".
+     */
+    bool append_node_attributes(ov::Node& node) override;
+
     std::unique_ptr<ov::util::XmlSerializer> make_visitor(pugi::xml_node& data,
                                                           const std::string& node_type_name,
                                                           ov::util::ConstantWriter& constant_write_handler,
@@ -56,7 +66,15 @@ class XmlSerializer : public ov::util::XmlSerializer {
                                                           ov::element::Type,
                                                           bool) const override;
 
+    /**
+     * @brief The base OV writer, copies the weights in a dedicated buffer.
+     *
+     * @note Ideally, we would not require this writer at all. The current algorithm does not handle subgraphs properly,
+     * so falling back to copying a part of the weights is a temporary fix.
+     */
+    std::reference_wrapper<ov::util::ConstantWriter> m_base_constant_writer;
     std::shared_ptr<WeightlessWriter> m_weightless_constant_writer = nullptr;
+    bool m_use_weightless_writer = false;
 };
 
 /**

@@ -75,4 +75,7 @@ class ZeGraphExtWrappers {
     Logger _logger;
 };
 
+// Parse the result string of query from format <name_0><name_1><name_2> to unordered_set of string
+std::unordered_set<std::string> parseQueryResult(std::vector<char>& data);
+
 }  // namespace intel_npu