Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1313,6 +1313,26 @@ struct DISABLE_VERSION_CHECK final : OptionBase<DISABLE_VERSION_CHECK, bool> {
}
};

struct EXPORT_RAW_BLOB final : OptionBase<EXPORT_RAW_BLOB, bool> {
static std::string_view key() {
return ov::intel_npu::export_raw_blob.name();
}

static bool defaultValue() {
return false;
}

#ifdef NPU_PLUGIN_DEVELOPER_BUILD
static std::string_view envVar() {
return "OV_NPU_EXPORT_RAW_BLOB";
}
#endif

static OptionMode mode() {
return OptionMode::RunTime;
}
};

struct BATCH_COMPILER_MODE_SETTINGS final : OptionBase<BATCH_COMPILER_MODE_SETTINGS, std::string> {
static std::string_view key() {
return ov::intel_npu::batch_compiler_mode_settings.name();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -444,5 +444,12 @@ static constexpr ov::Property<std::string> backend_compilation_params{"NPU_BACKE
*/
static constexpr ov::Property<bool> disable_version_check{"NPU_DISABLE_VERSION_CHECK"};

/**
* @brief [Only for NPU Plugin]
* Type: boolean, default is false.
* This option allows to skip writing plugin metadata to compiled model when exporting it
*/
static constexpr ov::Property<bool> export_raw_blob{"NPU_EXPORT_RAW_BLOB"};

} // namespace intel_npu
} // namespace ov
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/common/src/filtered_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ bool FilteredConfig::isAvailable(std::string key) const {
if (it != _enabled.end() && hasOpt(key)) {
return it->second;
}
// if doesnt exist = not available
// if doesn't exist = not available
return false;
}

Expand Down
30 changes: 13 additions & 17 deletions src/plugins/intel_npu/src/plugin/include/metadata.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace intel_npu {

class MetadataBase {
public:
MetadataBase(uint32_t version, uint64_t blobDataSize);
MetadataBase(uint32_t version);

using uninitialized_source = void*;
using Source = std::
Expand All @@ -34,7 +34,7 @@ class MetadataBase {
/**
* @brief Reads metadata from a ov::Tensor.
*/
void read(const ov::Tensor& tensor);
void read(const ov::Tensor& tensor, size_t offset = 0);

virtual void read() = 0;

Expand All @@ -45,7 +45,7 @@ class MetadataBase {

virtual bool is_compatible() = 0;

virtual uint64_t get_blob_size() const;
uint64_t get_blob_offset() const;

/**
* @returns The sizes of the init schedules. Populated only if "weights separation" has been enabled.
Expand Down Expand Up @@ -108,10 +108,9 @@ class MetadataBase {
* @note This operation was detached from "write" since "write" writes at the beginning of the stream, while this
* method writes at the end. This change allows better extension of class hierarchy.
*/
void append_padding_blob_size_and_magic(std::ostream& stream);
void append_padding(std::ostream& stream);

uint32_t _version;
uint64_t _blobDataSize;
Logger _logger;

/**
Expand All @@ -138,12 +137,12 @@ constexpr std::string_view MAGIC_BYTES = "OVNPU";
constexpr uint32_t METADATA_VERSION_2_0{MetadataBase::make_version(2, 0)};
constexpr uint32_t METADATA_VERSION_2_1{MetadataBase::make_version(2, 1)};
constexpr uint32_t METADATA_VERSION_2_2{MetadataBase::make_version(2, 2)};
constexpr uint32_t METADATA_VERSION_2_3{MetadataBase::make_version(2, 3)};
constexpr uint32_t METADATA_VERSION_3_0{MetadataBase::make_version(3, 0)};

/**
* @brief Current metadata version.
*/
constexpr uint32_t CURRENT_METADATA_VERSION{METADATA_VERSION_2_3};
constexpr uint32_t CURRENT_METADATA_VERSION{METADATA_VERSION_3_0};

constexpr uint16_t CURRENT_METADATA_MAJOR_VERSION{MetadataBase::get_major(CURRENT_METADATA_VERSION)};
constexpr uint16_t CURRENT_METADATA_MINOR_VERSION{MetadataBase::get_minor(CURRENT_METADATA_VERSION)};
Expand Down Expand Up @@ -177,7 +176,7 @@ class OpenvinoVersion final {
/**
* @brief Reads version data from a ov::Tensor.
*/
void read(const ov::Tensor& tensor);
void read(const ov::Tensor& tensor, size_t offset = 0);

/**
* @brief Writes version data to a stream.
Expand Down Expand Up @@ -216,7 +215,7 @@ struct Metadata : public MetadataBase {};
template <>
class Metadata<METADATA_VERSION_2_0> : public MetadataBase {
public:
Metadata(uint64_t blobSize, const std::optional<OpenvinoVersion>& ovVersion = std::nullopt);
Metadata(const std::optional<OpenvinoVersion>& ovVersion = std::nullopt);

void read() override;

Expand Down Expand Up @@ -256,8 +255,7 @@ class Metadata<METADATA_VERSION_2_0> : public MetadataBase {
template <>
class Metadata<METADATA_VERSION_2_1> : public Metadata<METADATA_VERSION_2_0> {
public:
Metadata(uint64_t blobSize,
const std::optional<OpenvinoVersion>& ovVersion = std::nullopt,
Metadata(const std::optional<OpenvinoVersion>& ovVersion = std::nullopt,
const std::optional<std::vector<uint64_t>>& initSizes = std::nullopt);

/**
Expand Down Expand Up @@ -287,8 +285,7 @@ class Metadata<METADATA_VERSION_2_1> : public Metadata<METADATA_VERSION_2_0> {
template <>
class Metadata<METADATA_VERSION_2_2> : public Metadata<METADATA_VERSION_2_1> {
public:
Metadata(uint64_t blobSize,
std::optional<OpenvinoVersion> ovVersion = std::nullopt,
Metadata(std::optional<OpenvinoVersion> ovVersion = std::nullopt,
const std::optional<std::vector<uint64_t>> initSizes = std::nullopt,
const std::optional<int64_t> batchSize = std::nullopt);

Expand All @@ -309,10 +306,9 @@ class Metadata<METADATA_VERSION_2_2> : public Metadata<METADATA_VERSION_2_1> {
* @details The order used for recording the layouts follows the deterministic order in which OV parses the I/O.
*/
template <>
class Metadata<METADATA_VERSION_2_3> : public Metadata<METADATA_VERSION_2_2> {
class Metadata<METADATA_VERSION_3_0> : public Metadata<METADATA_VERSION_2_2> {
public:
Metadata(uint64_t blobSize,
const std::optional<OpenvinoVersion>& ovVersion = std::nullopt,
Metadata(const std::optional<OpenvinoVersion>& ovVersion = std::nullopt,
const std::optional<std::vector<uint64_t>>& initSizes = std::nullopt,
const std::optional<int64_t> batchSize = std::nullopt,
const std::optional<std::vector<ov::Layout>>& inputLayouts = std::nullopt,
Expand All @@ -339,7 +335,7 @@ class Metadata<METADATA_VERSION_2_3> : public Metadata<METADATA_VERSION_2_2> {
* @return Unique pointer to the created MetadataBase object if the major version is supported; otherwise, returns
* 'nullptr'.
*/
std::unique_ptr<MetadataBase> create_metadata(uint32_t version, uint64_t blobSize);
std::unique_ptr<MetadataBase> create_metadata(uint32_t version);

/**
* @brief Reads metadata from a blob (istream).
Expand Down
42 changes: 24 additions & 18 deletions src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,27 +84,33 @@ std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request(
void CompiledModel::export_model(std::ostream& stream) const {
_logger.debug("CompiledModel::export_model");

auto [blobSizesBeforeVersioning, initBlobSizes] = _graph->export_blob(stream);
std::stringstream blobStream;
auto [blobSizesBeforeVersioning, initBlobSizes] = _graph->export_blob(blobStream);

std::optional<std::vector<ov::Layout>> inputLayouts = std::vector<ov::Layout>();
std::optional<std::vector<ov::Layout>> outputLayouts = std::vector<ov::Layout>();
bool shouldExportRawBlobOnly =
(_config.isAvailable(ov::intel_npu::export_raw_blob.name()) && _config.get<EXPORT_RAW_BLOB>() == true);

for (const ov::Output<const ov::Node>& nodeOutput : inputs()) {
inputLayouts->push_back(
std::dynamic_pointer_cast<const ov::op::v0::Parameter>(nodeOutput.get_node_shared_ptr())->get_layout());
}
for (const ov::Output<const ov::Node>& nodeOutput : outputs()) {
outputLayouts->push_back(
std::dynamic_pointer_cast<const ov::op::v0::Result>(nodeOutput.get_node_shared_ptr())->get_layout());
}
if (!shouldExportRawBlobOnly) {
std::optional<std::vector<ov::Layout>> inputLayouts = std::vector<ov::Layout>();
std::optional<std::vector<ov::Layout>> outputLayouts = std::vector<ov::Layout>();

for (const ov::Output<const ov::Node>& nodeOutput : inputs()) {
inputLayouts->push_back(
std::dynamic_pointer_cast<const ov::op::v0::Parameter>(nodeOutput.get_node_shared_ptr())->get_layout());
}
for (const ov::Output<const ov::Node>& nodeOutput : outputs()) {
outputLayouts->push_back(
std::dynamic_pointer_cast<const ov::op::v0::Result>(nodeOutput.get_node_shared_ptr())->get_layout());
}

Metadata<CURRENT_METADATA_VERSION>(blobSizesBeforeVersioning,
CURRENT_OPENVINO_VERSION,
initBlobSizes,
_batchSize,
inputLayouts,
outputLayouts)
.write(stream);
Metadata<CURRENT_METADATA_VERSION>(CURRENT_OPENVINO_VERSION,
initBlobSizes,
_batchSize,
inputLayouts,
outputLayouts)
.write(stream);
}
stream << blobStream.rdbuf();
}

std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {
Expand Down
Loading
Loading