openvinotoolkit · praasz · Dec 4, 2025 · Dec 4, 2025 · Dec 4, 2025 · Dec 4, 2025
@@ -60,7 +60,15 @@ class OPENVINO_API ITensor : public std::enable_shared_from_this<ITensor> {
 
     /**
      * @brief Provides an access to the underlying host memory
-     * @param type Optional type parameter.
+     * @note The method throws an exception:
+     * - if tensor implementation does not allow non-const access to memory.
+     * @return A host pointer to tensor memory
+     */
+    virtual void* data_rw() = 0;
+
+    /**
+     * @brief Provides an access to the underlying host memory
+     * @param type Type parameter.
      * @note The method throws an exception
      * if specified type's fundamental type does not match with tensor element type's fundamental type
      * @return A host pointer to tensor memory
@@ -70,10 +78,20 @@ class OPENVINO_API ITensor : public std::enable_shared_from_this<ITensor> {
     virtual const void* data(const element::Type& type) const = 0;
     /// @}
 
+    /**
+     * @brief Provides an access to the underlying host memory
+     * @param type Type parameter.
+     * @note The method throws an exception:
+     * - if specified type's fundamental type does not match with tensor element type's fundamental type
+     * - if tensor implementation does not allow non-const access to memory.
+     * @return A host pointer to tensor memory
+     */
+    virtual void* data_rw(const element::Type& type) = 0;
+
     /**
      * @brief Provides an access to the underlying host memory casted to type `T`
-     * @return A host pointer to tensor memory casted to specified type `T`.
      * @note Throws exception if specified type does not match with tensor element type
+     * @return A host pointer to tensor memory casted to specified type `T`.
      */
     template <typename T, typename datatype = typename std::decay<T>::type>
     T* data() {

@@ -219,6 +219,8 @@ class OPENVINO_API Tensor {
 
     /**
      * @brief Provides an access to the underlying host memory
+     * @note The method throws an exception:
+     * - if tensor implementation does not allow non-const access to memory.
      * @return A host pointer to tensor memory
      * @{
      */
@@ -229,8 +231,9 @@ class OPENVINO_API Tensor {
     /**
      * @brief Provides an access to the underlying host memory
      * @param type Optional type parameter.
-     * @note The method throws an exception
-     * if specified type's fundamental type does not match with tensor element type's fundamental type
+     * @note The method throws an exception:
+     * - if specified type's fundamental type does not match with tensor element type's fundamental type
+     * - if tensor implementation does not allow non-const access to memory.
      * @return A host pointer to tensor memory
      * @{
      */

@@ -580,9 +580,9 @@ void extract_diagonal(ov::TensorVector& inputs, std::vector<std::string>& input_
         return;
     }
 
-    ov::Tensor multi_identity = build_multi_identity<T>(input_ptr, repeated_labels, label_dim_map);
+    const ov::Tensor multi_identity = build_multi_identity<T>(input_ptr, repeated_labels, label_dim_map);
 
-    ov::Tensor mul_output = input_ptr;
+    auto mul_output = ov::Tensor(input_ptr.get_element_type(), input_ptr.get_shape());
     reference::multiply<T>(input_ptr.data<T>(),
                            multi_identity.data<T>(),
                            mul_output.data<T>(),
@@ -926,17 +926,17 @@ void contract_two_inputs(ov::TensorVector& inputs,
                        reduced_sub_shape.get_shape(),
                        is_separate_first2);
 
-    ov::Tensor matmul_operand1 = reshape_input_for_matmul<T>(input1,
-                                                             common_sub_shape,
-                                                             separate1_sub_shape,
-                                                             reduced_sub_shape_prod,
-                                                             is_separate_first1);
-
-    ov::Tensor matmul_operand2 = reshape_input_for_matmul<T>(input2,
-                                                             common_sub_shape,
-                                                             separate2_sub_shape,
-                                                             reduced_sub_shape_prod,
-                                                             is_separate_first2);
+    const ov::Tensor matmul_operand1 = reshape_input_for_matmul<T>(input1,
+                                                                   common_sub_shape,
+                                                                   separate1_sub_shape,
+                                                                   reduced_sub_shape_prod,
+                                                                   is_separate_first1);
+
+    const ov::Tensor matmul_operand2 = reshape_input_for_matmul<T>(input2,
+                                                                   common_sub_shape,
+                                                                   separate2_sub_shape,
+                                                                   reduced_sub_shape_prod,
+                                                                   is_separate_first2);
 
     // step 3. apply MatMul operation for formatted inputs
     Shape matmul_output_shape = compute_matmul_output_shape(common_sub_shape, separate1_sub_shape, separate2_sub_shape);

@@ -150,21 +150,22 @@ bool ReadValue::evaluate(TensorVector& outputs,
 
     const auto use_context = var_value != variable_values.end() && !var_value->second->get_reset();
     auto& output = outputs[0];
-    Tensor input;
-    if (use_context) {
-        input = var_value->second->get_state();
-    } else {
-        if (!inputs.empty()) {
-            input = inputs[0];
+    const auto& input = [&] {
+        if (use_context) {
+            return var_value->second->get_state();
+        } else if (!inputs.empty()) {
+            return inputs[0];
         } else {
-            auto var_info = m_variable->get_info();
+            const auto var_info = m_variable->get_info();
             OPENVINO_ASSERT(var_info.data_shape.is_static() && var_info.data_type.is_static());
             const auto& shape = var_info.data_shape.get_shape();
             const auto& type = var_info.data_type;
-            input = ov::Tensor(type, shape);
+            auto input = ov::Tensor(type, shape);
             memset(input.data(), 0, input.get_byte_size());
+            return input;
         }
-    }
+    }();
+
     output.set_shape(input.get_shape());
     std::memcpy(output.data(), input.data(), output.get_byte_size());
     return true;

@@ -106,15 +106,15 @@ size_t Tensor::get_byte_size() const {
 }
 
 void* Tensor::data() {
-    OV_TENSOR_STATEMENT(return _impl->data());
+    OV_TENSOR_STATEMENT(return _impl->data_rw());
 }
 
 const void* Tensor::data() const {
     OV_TENSOR_STATEMENT(return std::as_const(*_impl).data());
 }
 
 void* Tensor::data(const element::Type& element_type) {
-    OV_TENSOR_STATEMENT(return _impl->data(element_type));
+    OV_TENSOR_STATEMENT(return _impl->data_rw(element_type));
 }
 
 const void* Tensor::data(const element::Type& element_type) const {

@@ -32,6 +32,14 @@ class OPENVINO_RUNTIME_API IRemoteTensor : public ITensor {
         OPENVINO_NOT_IMPLEMENTED;
     }
 
+    void* data_rw() override final {
+        OPENVINO_NOT_IMPLEMENTED;
+    }
+
+    void* data_rw(const element::Type&) override final {
+        OPENVINO_NOT_IMPLEMENTED;
+    }
+
     ~IRemoteTensor() override;
 
     /**

@@ -75,6 +75,14 @@ struct SoPtr {
         return _ptr.get();
     }
 
+    /**
+     * @brief Dereference stored pointer to T object.
+     * @return Reference to T object.
+     */
+    T& operator*() const noexcept {
+        return *_ptr;
+    }
+
     explicit operator bool() const noexcept {
         return _ptr != nullptr;
     }

@@ -192,7 +192,7 @@ void ov::ISyncInferRequest::convert_batched_tensors() {
         // Perform memory copy
         ov::parallel_for(item.second.size(), [&](size_t i) {
             const auto& tensor = item.second.at(i);
-            memcpy(ptr + i * tensor->get_byte_size(), static_cast<uint8_t*>(tensor->data()), tensor->get_byte_size());
+            memcpy(ptr + i * tensor->get_byte_size(), tensor->data(), tensor->get_byte_size());
         });
         prepared_tensors[item.first] = input_tensor;
     }

@@ -84,6 +84,19 @@ class ViewTensor : public ITensor {
         return m_ptr;
     }
 
+    void* data_rw() override {
+        return m_ptr;
+    }
+
+    void* data_rw(const element::Type& element_type) override {
+        OPENVINO_ASSERT(is_pointer_representable(element_type),
+                        "Tensor data with element type ",
+                        get_element_type(),
+                        ", is not representable as pointer to ",
+                        element_type);
+        return m_ptr;
+    }
+
     const element::Type& get_element_type() const override {
         return m_element_type;
     }
@@ -157,11 +170,11 @@ class ReadOnlyViewTensor : public ViewTensor {
 
     using ViewTensor::data;
 
-    [[noreturn]] void* data() override {
+    [[noreturn]] void* data_rw() override {
         OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast<const ov::Tensor&>.data()'");
     }
 
-    [[noreturn]] void* data(const element::Type& element_type) override {
+    [[noreturn]] void* data_rw(const element::Type& element_type) override {
         OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast<const ov::Tensor&>.data(element_type)'");
     }
 };
@@ -231,7 +244,11 @@ class ReadOnlyStridedViewTensor : public StridedViewTensor {
 
     using StridedViewTensor::data;
 
-    [[noreturn]] void* data(const element::Type& element_type) override {
+    [[noreturn]] void* data_rw() override {
+        OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast<const ov::Tensor&>.data()'");
+    }
+
+    [[noreturn]] void* data_rw(const element::Type& element_type) override {
         OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast<const ov::Tensor&>.data()'");
     }
 };
@@ -463,6 +480,14 @@ class RoiTensor : public BaseRoiTensor, public ITensor {
     const void* data(const element::Type& element_type) const override {
         return static_cast<uint8_t*>(m_owner->data()) + m_offset;
     }
+
+    void* data_rw() override {
+        return static_cast<uint8_t*>(m_owner->data_rw()) + m_offset;
+    }
+
+    void* data_rw(const element::Type& element_type) override {
+        return static_cast<uint8_t*>(m_owner->data_rw(element_type)) + m_offset;
+    }
 };
 
 /**

@@ -125,6 +125,19 @@ const void* Tensor::data(const element::Type& element_type) const {
     return m_memptr->getData();
 }
 
+void* Tensor::data_rw() {
+    return m_memptr->getData();
+}
+
+void* Tensor::data_rw(const element::Type& element_type) {
+    OPENVINO_ASSERT(is_pointer_representable(get_element_type(), element_type),
+                    "Tensor data with element type ",
+                    get_element_type(),
+                    ", is not representable as pointer to ",
+                    element_type);
+    return m_memptr->getData();
+}
+
 /**
  * @brief Creates tensor on graph memory
  *

@@ -37,6 +37,8 @@ class Tensor : public ITensor {
     void* data(const element::Type& type) override;
     const void* data() const override;
     const void* data(const element::Type& type) const override;
+    void* data_rw() override;
+    void* data_rw(const element::Type& type) override;
 
     MemoryPtr get_memory() {
         return m_memptr;

@@ -126,9 +126,10 @@ static std::string getTestCaseName(const testing::TestParamInfo<LoraPatternParam
         ASSERT_TRUE(inferRequestRef);
 
         generate_inputs(targetStaticShapes.front());
-        for (const auto& input : inputs) {
-            inferRequest.set_tensor(input.first, input.second);
-            inferRequestRef.set_tensor(input.first, input.second);
+        for (const auto& [port, tensor] : inputs) {
+            // Use read-only tensors as inputs, created from `const void*`
+            inferRequest.set_tensor(port, {tensor.get_element_type(), tensor.get_shape(), tensor.data()});
+            inferRequestRef.set_tensor(port, {tensor.get_element_type(), tensor.get_shape(), tensor.data()});
         }
 
         constexpr size_t lora_order = 25lu;

@@ -22,6 +22,8 @@ class USMHostTensor : public ov::ITensor {
 
     void* data() override;
     void* data(const element::Type& element_type) override;
+    void* data_rw() override;
+    void* data_rw(const element::Type& element_type) override;
     const void* data() const override;
     const void* data(const element::Type& element_type) const override;
 

@@ -533,8 +533,7 @@ TensorWrapper SyncInferRequest::create_or_share_device_tensor(const TensorWrappe
 
     // Note: currently, using USM Host memory for dGPUs in some scenarios (LLMs) leads to performance degradation,
     // so apply wider USM Host memory type detection only for iGPUs
-    auto user_tensor_mem_type = !generic_remote_tensor ? engine.detect_usm_allocation_type(user_tensor->data())
-                                                       : cldnn::allocation_type::unknown;
+    auto user_tensor_mem_type = !generic_remote_tensor ? engine.detect_usm_allocation_type(user_tensor->data()) : cldnn::allocation_type::unknown;
     auto usm_host_raw_ptr = engine.get_device_info().dev_type == cldnn::device_type::integrated_gpu &&
                             user_tensor_mem_type == cldnn::allocation_type::usm_host;
 
@@ -709,7 +708,7 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_batched_input(size_t in
             auto ptr = static_cast<uint8_t*>(merged_tensor->data());
             ov::parallel_for(user_tensors.size(), [&](size_t i) {
                 const auto& tensor = user_tensors.at(i);
-                std::memcpy(ptr + i * tensor->get_byte_size(), static_cast<uint8_t*>(tensor->data()), tensor->get_byte_size());
+                std::memcpy(ptr + i * tensor->get_byte_size(), tensor->data(), tensor->get_byte_size());
             });
         } else {
             const auto& stream = m_graph->get_network()->get_stream();
@@ -788,11 +787,12 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_input(const std::string
         }
     } else if (is_usm_host_tensor && !convert_needed) {
         if (element_type != ::data_type_for_remote_tensor(element_type)) {
-            m_plugin_inputs[input_idx] = { std::make_shared<RemoteTensorImpl>(m_context,
-                                                                              user_tensor->get_shape(),
-                                                                              ::data_type_for_remote_tensor(element_type),
-                                                                              TensorType::BT_USM_SHARED,
-                                                                              user_tensor->data()), TensorOwner::USER };
+            m_plugin_inputs[input_idx] = {std::make_shared<RemoteTensorImpl>(m_context,
+                                                                             user_tensor->get_shape(),
+                                                                             ::data_type_for_remote_tensor(element_type),
+                                                                             TensorType::BT_USM_SHARED,
+                                                                             user_tensor->data()),
+                                          TensorOwner::USER};
         } else {
             m_plugin_inputs[input_idx] = { usm_host_ptr->get_impl(), user_tensor_wrapper.owner };
         }
@@ -801,7 +801,7 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_input(const std::string
 
     auto user_tensor_mem_type = cldnn::allocation_type::unknown;
     if (!is_remote_tensor_impl && !is_generic_remote) {
-        user_tensor_mem_type = engine.detect_usm_allocation_type(user_tensor_wrapper.ptr->data());
+        user_tensor_mem_type = engine.detect_usm_allocation_type(user_tensor->data());
     }
 
     auto plugin_tensor_mem_type = cldnn::allocation_type::unknown;
@@ -876,7 +876,7 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_input(const std::string
         }
     } else {
         if (!is_remote_tensor_impl && !is_generic_remote) {
-            auto src_ptr = static_cast<uint8_t*>(user_tensor->data());
+            auto src_ptr = static_cast<const uint8_t*>(user_tensor->data());
             if (!same_host_mem(memory, src_ptr)) {
                 // WA: Set need_lockable_mem as a blocking argument
                 // The current input_layout (wait_for_events) does not provide proper synchronization for subsequent CPU implementations

@@ -23,6 +23,14 @@ void* USMHostTensor::data(const element::Type&) {
     return m_impl->get_original_memory_buf_ptr();
 }
 
+void* USMHostTensor::data_rw() {
+    return m_impl->get_original_memory_buf_ptr();
+}
+
+void* USMHostTensor::data_rw(const element::Type&) {
+    return m_impl->get_original_memory_buf_ptr();
+}
+
 const void* USMHostTensor::data() const {
     return m_impl->get_original_memory_buf_ptr();
 }

@@ -557,7 +557,8 @@ class KVCacheIssueTests: public ::testing::Test {
                                 {n_batch, context_size, n_heads, n_features}, -0.5f, 0.5f, 1);
         auto ireq1_input1 = ov::test::utils::create_and_fill_tensor_real_distribution(element_type,
                                 {n_batch, n_heads, context_size, context_size}, -0.5f, 0.5f, 1);
-        ireq1.set_tensor(input0, ireq1_input0);
+        // Create read-only tensor view to test inference with this const input data
+        ireq1.set_tensor(input0, {ireq1_input0.get_element_type(), ireq1_input0.get_shape(), std::as_const(ireq1_input0).data()});
         ireq1.set_tensor(input1, ireq1_input1);
 
         auto ireq2_input0 = ov::test::utils::create_and_fill_tensor_real_distribution(element_type,

@@ -52,6 +52,9 @@ class ZeroTensor final : public ov::ITensor {
     void* data() override;
     void* data(const ov::element::Type& type) override;
 
+    void* data_rw() override;
+    void* data_rw(const ov::element::Type& type) override;
+
     const void* data() const override;
     const void* data(const ov::element::Type& type) const override;