Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
7de1d64
Add VCLApi and VCLCompilerImpl
XinWangIntel May 26, 2025
3aad6d8
Use vclAllocateExecutionCreate3 to get metadata
XinWangIntel Jul 20, 2025
cc62686
update vcl version check, default compilertype and vcl download link
DanLiu2Intel Sep 28, 2025
8b11ee8
fix export issue and build
DanLiu2Intel Nov 6, 2025
a08feb7
clang-format, add platform update and compiler check
DanLiu2Intel Nov 12, 2025
cbb8de0
remove ENABLE_VCL_FOR_COMPILER and update tile, default compilertype
DanLiu2Intel Nov 13, 2025
047aab1
set device desc empty and auto parse in compile
DanLiu2Intel Nov 13, 2025
baedc2b
update vcl compiler to openvino_intel_npu_compiler
DanLiu2Intel Nov 17, 2025
0338cfe
Merge branch 'master' into serialize-on-plugin-adapter
DanLiu2Intel Nov 17, 2025
8f3f570
fix ie mdk issue for compilerType Inconsistency issues for 3720
DanLiu2Intel Nov 17, 2025
4f1aa39
fix clang-format
DanLiu2Intel Nov 17, 2025
1a60dfa
fix unit test
DanLiu2Intel Nov 17, 2025
21ea396
fix SERIALIZATION_WEIGHTS_SIZE_THRESHOLD undeclared identifiers
DanLiu2Intel Nov 18, 2025
da62ff8
revert namespace for SERIALIZATION_WEIGHTS_SIZE_THRESHOLD
DanLiu2Intel Nov 18, 2025
3bb783d
Merge branch 'master' into serialize-on-plugin-adapter
DanLiu2Intel Nov 18, 2025
800c78e
remove USE_BASE_MODEL_SERIALIZER option
DanLiu2Intel Nov 18, 2025
acafdc6
fix 3720 platfrom compilerType issue and metadata name issue
DanLiu2Intel Nov 18, 2025
b0c6d8a
Merge branch 'master' into serialize-on-plugin-adapter
DanLiu2Intel Nov 19, 2025
5ab6d0d
Merge branch 'master' into serialize-on-plugin-adapter
DanLiu2Intel Nov 21, 2025
0c9ae28
update to use vcl serializer
DanLiu2Intel Nov 21, 2025
b363ff6
Add compileWsOneShot and compileWsIterative for VCLCompilerImpl
WenjingKangIntel Nov 21, 2025
16d591c
clang-format
DanLiu2Intel Nov 21, 2025
5ba9f39
fix comments
DanLiu2Intel Nov 24, 2025
24e2935
Add OPENVINO_ASSERT for one shot weightless compilation to check init…
WenjingKangIntel Nov 24, 2025
7bb58a7
fix comments2
DanLiu2Intel Nov 24, 2025
32fa2f8
fix comments3 and formt
DanLiu2Intel Nov 24, 2025
ac41769
update copyrigth to `2018-2025`
DanLiu2Intel Nov 25, 2025
e73a423
align try-catch in plugin adapter
DanLiu2Intel Nov 25, 2025
3e98d80
fix serializer conifg
DanLiu2Intel Nov 25, 2025
a9b799d
throw exception form create infer request part
DanLiu2Intel Nov 26, 2025
88ba42d
Merge branch 'master' into serialize-on-plugin-adapter
DanLiu2Intel Nov 27, 2025
050d511
update log
DanLiu2Intel Nov 27, 2025
0cde026
add query parse to serialize config + update log
DanLiu2Intel Nov 28, 2025
b2bb69d
remove old vcl version support
DanLiu2Intel Nov 28, 2025
11b537b
WS bug fixes
razvanapetroaie Nov 27, 2025
5580222
update clang format
DanLiu2Intel Nov 28, 2025
6b8015c
Fallback to weights copies on all unhandled cases
razvanapetroaie Nov 28, 2025
acd1fe3
use MLIR/PLUGIN compilerType and use base serialize method
DanLiu2Intel Nov 28, 2025
0f654c6
fix CheckOneShotVersionThrows issue
DanLiu2Intel Nov 28, 2025
c235786
remove ov cache check
DanLiu2Intel Nov 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,7 @@ struct COMPILER_TYPE final : OptionBase<COMPILER_TYPE, ov::intel_npu::CompilerTy
}

static ov::intel_npu::CompilerType defaultValue() {
return ov::intel_npu::CompilerType::DRIVER;
return ov::intel_npu::CompilerType::PLUGIN;
}

static ov::intel_npu::CompilerType parse(std::string_view val) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
return _mutex;
}

bool init_completed() {
return _init_completed;
}

virtual void set_last_submitted_event(const std::shared_ptr<Event>& event, size_t indexOfCommandList) = 0;
virtual const std::shared_ptr<Event>& get_last_submitted_event(size_t indexOfCommandList) const = 0;
virtual void resize_last_submitted_event(size_t batch) = 0;
Expand All @@ -68,6 +72,7 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
// Used to protect zero pipeline creation in the graph. The pipeline should be created only once per graph when the
// first inference starts running
std::mutex _mutex;
bool _init_completed = false;
};

} // namespace intel_npu
342 changes: 342 additions & 0 deletions src/plugins/intel_npu/src/compiler_adapter/include/compiler.h

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <memory>
#include <optional>

#include "compiler.h"
#include "intel_npu/common/filtered_config.hpp"
#include "intel_npu/icompiler.hpp"
#include "openvino/core/except.hpp"

namespace intel_npu {

class VCLCompilerImpl final : public intel_npu::ICompiler {
public:
VCLCompilerImpl();
~VCLCompilerImpl() override;
static const std::shared_ptr<VCLCompilerImpl> getInstance();

NetworkDescription compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

std::vector<std::shared_ptr<NetworkDescription>> compileWsOneShot(const std::shared_ptr<ov::Model>& model,
const Config& config) const override;

NetworkDescription compileWsIterative(const std::shared_ptr<ov::Model>& model,
const Config& config,
size_t callNumber) const override;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

NetworkMetadata parse(const std::vector<uint8_t>& network, const Config& config) const override;

uint32_t get_version() const override;

std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const std::vector<uint8_t>& network,
const intel_npu::Config& config) const final override;

bool get_supported_options(std::vector<char>& options) const;

bool is_option_supported(const std::string& option, std::optional<std::string> optValue = std::nullopt) const;

std::shared_ptr<void> getLinkedLibrary() const;

private:
vcl_log_handle_t _logHandle = nullptr;
vcl_compiler_handle_t _compilerHandle = nullptr;
vcl_compiler_properties_t _compilerProperties;
vcl_version_info_t _vclVersion;
vcl_version_info_t _vclProfilingVersion;
Logger _logger;
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "intel_npu/network_metadata.hpp"
#include "openvino/core/model.hpp"

namespace intel_npu {

bool isInitMetadata(const NetworkMetadata& networkMetadata);

/**
* @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon
* serialization.
* @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information
* regarding the offset of the weights within the binary file, as well as the original size and precision. This
* information is required within the "weights separation" flow, therefore this function is here to store it.
* @note Not calling this function in the weights separation flow would lead to this information being lost upon
* serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent
* metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be
* misinformed and lookups of weights offsets could fail.
*
* @param model Both source and target.
*/
void storeWeightlessCacheAttribute(const std::shared_ptr<ov::Model>& model);
} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,23 @@ class XmlSerializer : public ov::util::XmlSerializer {
false,
ov::element::dynamic,
false),
m_base_constant_writer(std::ref(constant_write_handler)),
m_weightless_constant_writer(weightless_constant_writer
? weightless_constant_writer
: std::make_shared<WeightlessWriter>(constant_write_handler)) {}

private:
/**
* @brief Toggles between the two writers.
*/
ov::util::ConstantWriter& get_constant_write_handler() override;

/**
* @brief Overriden in order to choose which weights writer will be used based on the occurrence of the
* "WeightsPointerAttribute".
*/
bool append_node_attributes(ov::Node& node) override;

std::unique_ptr<ov::util::XmlSerializer> make_visitor(pugi::xml_node& data,
const std::string& node_type_name,
ov::util::ConstantWriter& constant_write_handler,
Expand All @@ -56,7 +66,15 @@ class XmlSerializer : public ov::util::XmlSerializer {
ov::element::Type,
bool) const override;

/**
* @brief The base OV writer, copies the weights in a dedicated buffer.
*
* @note Ideally, we would not require this writer at all. The current algorithm does not handle subgraphs properly,
* so falling back to copying a part of the weights is a temporary fix.
*/
std::reference_wrapper<ov::util::ConstantWriter> m_base_constant_writer;
std::shared_ptr<WeightlessWriter> m_weightless_constant_writer = nullptr;
bool m_use_weightless_writer = false;
};

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,7 @@ class ZeGraphExtWrappers {
Logger _logger;
};

// Parse the result string of query from format <name_0><name_1><name_2> to unordered_set of string
std::unordered_set<std::string> parseQueryResult(std::vector<char>& data);

} // namespace intel_npu
Loading