openvinotoolkit · MirceaDan99 · Nov 25, 2025 · Nov 27, 2025 · Nov 28, 2025 · Nov 28, 2025
@@ -1313,6 +1313,26 @@ struct DISABLE_VERSION_CHECK final : OptionBase<DISABLE_VERSION_CHECK, bool> {
     }
 };
 
+struct EXPORT_RAW_BLOB final : OptionBase<EXPORT_RAW_BLOB, bool> {
+    static std::string_view key() {
+        return ov::intel_npu::export_raw_blob.name();
+    }
+
+    static bool defaultValue() {
+        return false;
+    }
+
+#ifdef NPU_PLUGIN_DEVELOPER_BUILD
+    static std::string_view envVar() {
+        return "OV_NPU_EXPORT_RAW_BLOB";
+    }
+#endif
+
+    static OptionMode mode() {
+        return OptionMode::RunTime;
+    }
+};
+
 struct BATCH_COMPILER_MODE_SETTINGS final : OptionBase<BATCH_COMPILER_MODE_SETTINGS, std::string> {
     static std::string_view key() {
         return ov::intel_npu::batch_compiler_mode_settings.name();

@@ -444,5 +444,12 @@ static constexpr ov::Property<std::string> backend_compilation_params{"NPU_BACKE
  */
 static constexpr ov::Property<bool> disable_version_check{"NPU_DISABLE_VERSION_CHECK"};
 
+/**
+ * @brief [Only for NPU Plugin]
+ * Type: boolean, default is false.
+ * This option allows to skip writing plugin metadata to compiled model when exporting it
+ */
+static constexpr ov::Property<bool> export_raw_blob{"NPU_EXPORT_RAW_BLOB"};
+
 }  // namespace intel_npu
 }  // namespace ov
@@ -49,7 +49,7 @@ bool FilteredConfig::isAvailable(std::string key) const {
     if (it != _enabled.end() && hasOpt(key)) {
         return it->second;
     }
-    // if doesnt exist = not available
+    // if doesn't exist = not available
     return false;
 }
 

@@ -20,7 +20,7 @@ namespace intel_npu {
 
 class MetadataBase {
 public:
-    MetadataBase(uint32_t version, uint64_t blobDataSize);
+    MetadataBase(uint32_t version);
 
     using uninitialized_source = void*;
     using Source = std::
@@ -34,7 +34,7 @@ class MetadataBase {
     /**
      * @brief Reads metadata from a ov::Tensor.
      */
-    void read(const ov::Tensor& tensor);
+    void read(const ov::Tensor& tensor, size_t offset = 0);
 
     virtual void read() = 0;
 
@@ -45,7 +45,7 @@ class MetadataBase {
 
     virtual bool is_compatible() = 0;
 
-    virtual uint64_t get_blob_size() const;
+    uint64_t get_blob_offset() const;
 
     /**
      * @returns The sizes of the init schedules. Populated only if "weights separation" has been enabled.
@@ -108,10 +108,9 @@ class MetadataBase {
      * @note This operation was detached from "write" since "write" writes at the beginning of the stream, while this
      * method writes at the end. This change allows better extension of class hierarchy.
      */
-    void append_padding_blob_size_and_magic(std::ostream& stream);
+    void append_padding(std::ostream& stream);
 
     uint32_t _version;
-    uint64_t _blobDataSize;
     Logger _logger;
 
     /**
@@ -138,12 +137,12 @@ constexpr std::string_view MAGIC_BYTES = "OVNPU";
 constexpr uint32_t METADATA_VERSION_2_0{MetadataBase::make_version(2, 0)};
 constexpr uint32_t METADATA_VERSION_2_1{MetadataBase::make_version(2, 1)};
 constexpr uint32_t METADATA_VERSION_2_2{MetadataBase::make_version(2, 2)};
-constexpr uint32_t METADATA_VERSION_2_3{MetadataBase::make_version(2, 3)};
+constexpr uint32_t METADATA_VERSION_3_0{MetadataBase::make_version(3, 0)};
 
 /**
  * @brief Current metadata version.
  */
-constexpr uint32_t CURRENT_METADATA_VERSION{METADATA_VERSION_2_3};
+constexpr uint32_t CURRENT_METADATA_VERSION{METADATA_VERSION_3_0};
 
 constexpr uint16_t CURRENT_METADATA_MAJOR_VERSION{MetadataBase::get_major(CURRENT_METADATA_VERSION)};
 constexpr uint16_t CURRENT_METADATA_MINOR_VERSION{MetadataBase::get_minor(CURRENT_METADATA_VERSION)};
@@ -177,7 +176,7 @@ class OpenvinoVersion final {
     /**
      * @brief Reads version data from a ov::Tensor.
      */
-    void read(const ov::Tensor& tensor);
+    void read(const ov::Tensor& tensor, size_t offset = 0);
 
     /**
      * @brief Writes version data to a stream.
@@ -216,7 +215,7 @@ struct Metadata : public MetadataBase {};
 template <>
 class Metadata<METADATA_VERSION_2_0> : public MetadataBase {
 public:
-    Metadata(uint64_t blobSize, const std::optional<OpenvinoVersion>& ovVersion = std::nullopt);
+    Metadata(const std::optional<OpenvinoVersion>& ovVersion = std::nullopt);
 
     void read() override;
 
@@ -256,8 +255,7 @@ class Metadata<METADATA_VERSION_2_0> : public MetadataBase {
 template <>
 class Metadata<METADATA_VERSION_2_1> : public Metadata<METADATA_VERSION_2_0> {
 public:
-    Metadata(uint64_t blobSize,
-             const std::optional<OpenvinoVersion>& ovVersion = std::nullopt,
+    Metadata(const std::optional<OpenvinoVersion>& ovVersion = std::nullopt,
              const std::optional<std::vector<uint64_t>>& initSizes = std::nullopt);
 
     /**
@@ -287,8 +285,7 @@ class Metadata<METADATA_VERSION_2_1> : public Metadata<METADATA_VERSION_2_0> {
 template <>
 class Metadata<METADATA_VERSION_2_2> : public Metadata<METADATA_VERSION_2_1> {
 public:
-    Metadata(uint64_t blobSize,
-             std::optional<OpenvinoVersion> ovVersion = std::nullopt,
+    Metadata(std::optional<OpenvinoVersion> ovVersion = std::nullopt,
              const std::optional<std::vector<uint64_t>> initSizes = std::nullopt,
              const std::optional<int64_t> batchSize = std::nullopt);
 
@@ -309,10 +306,9 @@ class Metadata<METADATA_VERSION_2_2> : public Metadata<METADATA_VERSION_2_1> {
  * @details The order used for recording the layouts follows the deterministic order in which OV parses the I/O.
  */
 template <>
-class Metadata<METADATA_VERSION_2_3> : public Metadata<METADATA_VERSION_2_2> {
+class Metadata<METADATA_VERSION_3_0> : public Metadata<METADATA_VERSION_2_2> {
 public:
-    Metadata(uint64_t blobSize,
-             const std::optional<OpenvinoVersion>& ovVersion = std::nullopt,
+    Metadata(const std::optional<OpenvinoVersion>& ovVersion = std::nullopt,
              const std::optional<std::vector<uint64_t>>& initSizes = std::nullopt,
              const std::optional<int64_t> batchSize = std::nullopt,
              const std::optional<std::vector<ov::Layout>>& inputLayouts = std::nullopt,
@@ -339,7 +335,7 @@ class Metadata<METADATA_VERSION_2_3> : public Metadata<METADATA_VERSION_2_2> {
  * @return Unique pointer to the created MetadataBase object if the major version is supported; otherwise, returns
  * 'nullptr'.
  */
-std::unique_ptr<MetadataBase> create_metadata(uint32_t version, uint64_t blobSize);
+std::unique_ptr<MetadataBase> create_metadata(uint32_t version);
 
 /**
  * @brief Reads metadata from a blob (istream).

@@ -84,27 +84,33 @@ std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request(
 void CompiledModel::export_model(std::ostream& stream) const {
     _logger.debug("CompiledModel::export_model");
 
-    auto [blobSizesBeforeVersioning, initBlobSizes] = _graph->export_blob(stream);
+    std::stringstream blobStream;
+    auto [blobSizesBeforeVersioning, initBlobSizes] = _graph->export_blob(blobStream);
 
-    std::optional<std::vector<ov::Layout>> inputLayouts = std::vector<ov::Layout>();
-    std::optional<std::vector<ov::Layout>> outputLayouts = std::vector<ov::Layout>();
+    bool shouldExportRawBlobOnly =
+        (_config.isAvailable(ov::intel_npu::export_raw_blob.name()) && _config.get<EXPORT_RAW_BLOB>() == true);
 
-    for (const ov::Output<const ov::Node>& nodeOutput : inputs()) {
-        inputLayouts->push_back(
-            std::dynamic_pointer_cast<const ov::op::v0::Parameter>(nodeOutput.get_node_shared_ptr())->get_layout());
-    }
-    for (const ov::Output<const ov::Node>& nodeOutput : outputs()) {
-        outputLayouts->push_back(
-            std::dynamic_pointer_cast<const ov::op::v0::Result>(nodeOutput.get_node_shared_ptr())->get_layout());
-    }
+    if (!shouldExportRawBlobOnly) {
+        std::optional<std::vector<ov::Layout>> inputLayouts = std::vector<ov::Layout>();
+        std::optional<std::vector<ov::Layout>> outputLayouts = std::vector<ov::Layout>();
+
+        for (const ov::Output<const ov::Node>& nodeOutput : inputs()) {
+            inputLayouts->push_back(
+                std::dynamic_pointer_cast<const ov::op::v0::Parameter>(nodeOutput.get_node_shared_ptr())->get_layout());
+        }
+        for (const ov::Output<const ov::Node>& nodeOutput : outputs()) {
+            outputLayouts->push_back(
+                std::dynamic_pointer_cast<const ov::op::v0::Result>(nodeOutput.get_node_shared_ptr())->get_layout());
+        }
 
-    Metadata<CURRENT_METADATA_VERSION>(blobSizesBeforeVersioning,
-                                       CURRENT_OPENVINO_VERSION,
-                                       initBlobSizes,
-                                       _batchSize,
-                                       inputLayouts,
-                                       outputLayouts)
-        .write(stream);
+        Metadata<CURRENT_METADATA_VERSION>(CURRENT_OPENVINO_VERSION,
+                                           initBlobSizes,
+                                           _batchSize,
+                                           inputLayouts,
+                                           outputLayouts)
+            .write(stream);
+    }
+    stream << blobStream.rdbuf();
 }
 
 std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {