From e80eaa76e48eb33a3e177a368e4722251577d7d4 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Thu, 4 Dec 2025 07:05:50 +0000 Subject: [PATCH 01/14] Add dereference operator to ov::SoPtr Signed-off-by: Pawel Raasz --- src/inference/dev_api/openvino/runtime/so_ptr.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/inference/dev_api/openvino/runtime/so_ptr.hpp b/src/inference/dev_api/openvino/runtime/so_ptr.hpp index f1e3937743e311..dcd3952a281769 100644 --- a/src/inference/dev_api/openvino/runtime/so_ptr.hpp +++ b/src/inference/dev_api/openvino/runtime/so_ptr.hpp @@ -75,6 +75,14 @@ struct SoPtr { return _ptr.get(); } + /** + * @brief Dereference stored pointer to T object. + * @return Reference to T object. + */ + T& operator*() const noexcept { + return *_ptr; + } + explicit operator bool() const noexcept { return _ptr != nullptr; } From 23d2b62d39a89499d5a6aedcf759b8e03488796e Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Thu, 4 Dec 2025 07:07:27 +0000 Subject: [PATCH 02/14] Fix using data() for model inputs in core Signed-off-by: Pawel Raasz --- src/inference/src/dev/isync_infer_request.cpp | 8 +++--- .../ov_infer_request/batched_tensors.cpp | 28 +++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/inference/src/dev/isync_infer_request.cpp b/src/inference/src/dev/isync_infer_request.cpp index e6a89dc81c1874..55cd783c587ee1 100644 --- a/src/inference/src/dev/isync_infer_request.cpp +++ b/src/inference/src/dev/isync_infer_request.cpp @@ -192,7 +192,7 @@ void ov::ISyncInferRequest::convert_batched_tensors() { // Perform memory copy ov::parallel_for(item.second.size(), [&](size_t i) { const auto& tensor = item.second.at(i); - memcpy(ptr + i * tensor->get_byte_size(), static_cast(tensor->data()), tensor->get_byte_size()); + memcpy(ptr + i * tensor->get_byte_size(), std::as_const(*tensor).data(), tensor->get_byte_size()); }); prepared_tensors[item.first] = input_tensor; } @@ -285,9 +285,9 @@ void ov::ISyncInferRequest::check_tensor(const ov::Output& port, " expecting ", port.get_shape(), "."); - OPENVINO_ASSERT( - std::dynamic_pointer_cast(tensor._ptr) || tensor->data() != nullptr || is_dynamic, - "Tensor data equal nullptr!"); + OPENVINO_ASSERT(std::dynamic_pointer_cast(tensor._ptr) || + std::as_const(*tensor).data() != nullptr || is_dynamic, + "Tensor data equal nullptr!"); } void ov::ISyncInferRequest::allocate_tensor( diff --git a/src/tests/functional/base_func_tests/src/behavior/ov_infer_request/batched_tensors.cpp b/src/tests/functional/base_func_tests/src/behavior/ov_infer_request/batched_tensors.cpp index 4047d180999881..2cf9b8d9cb20d5 100644 --- a/src/tests/functional/base_func_tests/src/behavior/ov_infer_request/batched_tensors.cpp +++ b/src/tests/functional/base_func_tests/src/behavior/ov_infer_request/batched_tensors.cpp @@ -104,6 +104,34 @@ TEST_P(OVInferRequestBatchedTests, SetInputTensorsBase) { } } +TEST_P(OVInferRequestBatchedTests, SetReadOnlyInputTensorsBase) { + size_t batch = 4; + auto one_shape = Shape{1, 2, 2, 2}; + auto batch_shape = Shape{batch, 2, 2, 2}; + auto one_shape_size = ov::shape_size(one_shape); + auto model = OVInferRequestBatchedTests::create_n_inputs(2, element::f32, batch_shape, "N..."); + // Allocate 8 chunks, set 'user tensors' to 0, 2, 4, 6 chunks + const std::vector buffer(one_shape_size * batch * 2, 5.f); + auto execNet = ie->compile_model(model, target_device); + // Create InferRequest + ov::InferRequest req; + req = execNet.create_infer_request(); + std::vector tensors; + auto exp_tensor = ov::Tensor(element::f32, batch_shape); + for (auto i = 0; i < batch; ++i) { + // non contiguous memory (i*2) + auto tensor = ov::Tensor(element::f32, one_shape, &std::as_const(buffer)[(i * 2) * one_shape_size]); + tensors.push_back(std::move(tensor)); + } + req.set_tensors("tensor_input0", tensors); + const auto actual_tensor = req.get_tensor("tensor_output0"); + auto* actual = actual_tensor.data(); + req.infer(); // Adds '1' to each element + for (auto j = 0; j < one_shape_size * batch; ++j) { + EXPECT_NEAR(actual[j], 6.f, 1e-5) << "Expected=6, actual=" << actual[j] << " for index " << j; + } +} + TEST_P(OVInferRequestBatchedTests, SetInputTensorsAsync) { size_t batch = 4; auto one_shape = Shape{1, 2, 2, 2}; From bc584265e64f228efc23e18de543597c8d0c13f9 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Thu, 4 Dec 2025 07:14:47 +0000 Subject: [PATCH 03/14] Correct using data() for model inputs in CPU and TEMPLATE plugins Signed-off-by: Pawel Raasz --- src/plugins/intel_cpu/src/graph.cpp | 2 +- src/plugins/intel_cpu/src/infer_request.cpp | 22 +++++++++++-------- .../src/common/lora_pattern.cpp | 6 ++--- src/plugins/template/backend/backend.hpp | 6 +++++ src/plugins/template/backend/int_backend.cpp | 6 +++++ src/plugins/template/backend/int_backend.hpp | 1 + .../template/src/sync_infer_request.cpp | 8 +++---- 7 files changed, 34 insertions(+), 17 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 337ff590e852b8..adefd70b0d4c55 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -1220,7 +1220,7 @@ void Graph::PushInputData(const std::size_t& index, const ov::SoPtr& in auto childEdge = node->getChildEdgeAt(0); const auto& edgeMemory = childEdge->getMemory(); - const void* ext_data_ptr = input->data(); + const void* ext_data_ptr = std::as_const(*input).data(); void* inter_data_ptr = edgeMemory.getData(); if (ext_data_ptr != inter_data_ptr) { diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 2556c9da699a6f..db5a4d90de0877 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -170,7 +170,8 @@ static inline void change_edge_ptr(const EdgePtr& edge, ov::SoPtr& } else { auto memBlock = mem->getMemoryBlock(); OPENVINO_ASSERT(memBlock); - memBlock->setExtBuff(tensor->data(), tensor->get_byte_size()); + // Use const cast as as `MemoryBlockPtr` not supports const pointers. The model inputs may have const pointers. + memBlock->setExtBuff(const_cast(std::as_const(*tensor).data()), tensor->get_byte_size()); } } @@ -180,7 +181,7 @@ void SyncInferRequest::change_default_ptr(Graph& graph) { if (graph.IsDynamic()) { changeInpPtr = [&inputPtrs](const EdgePtr& edge, ov::SoPtr& tensor) { change_edge_ptr(edge, tensor); - inputPtrs.insert(tensor->data()); + inputPtrs.insert(std::as_const(*tensor._ptr).data()); }; } else { changeInpPtr = [](const EdgePtr& edge, ov::SoPtr& tensor) { @@ -188,10 +189,10 @@ void SyncInferRequest::change_default_ptr(Graph& graph) { }; } - for (auto& it : m_input_external_ptr) { - auto inputNodePtr = graph.getInputNodeByIndex(it.first); - OPENVINO_ASSERT(inputNodePtr, "Cannot find input tensor with index: ", it.first); - if (inputNodePtr->getDstDataAtPort(0) == it.second->data()) { + for (auto& [idx, tensor] : m_input_external_ptr) { + auto inputNodePtr = graph.getInputNodeByIndex(idx); + OPENVINO_ASSERT(inputNodePtr, "Cannot find input tensor with index: ", idx); + if (inputNodePtr->getDstDataAtPort(0) == std::as_const(*tensor).data()) { continue; } const auto& childEdges = inputNodePtr->getChildEdges(); @@ -230,7 +231,7 @@ void SyncInferRequest::change_default_ptr(Graph& graph) { if (!e) { OPENVINO_THROW("Node ", inputNodePtr->getName(), " contains empty child edge"); } - changeInpPtr(e, it.second); + changeInpPtr(e, tensor); } } } @@ -364,15 +365,18 @@ void SyncInferRequest::set_tensor(const ov::Output& in_port, con OPENVINO_ASSERT(in_tensor, "Failed to set empty tensor for port!"); auto port = get_internal_port(in_port); auto tensor = in_tensor; + auto port_found = find_port(in_port); // WA: legacy api create blob with ANY layout will not set BlockingDesc, which will lead to tensor.get_shape() // return empty shape but tensor.get_size() return correct value, and tensor.reshape() cannot update // BlockingDesc, so to construct new tensor with original tensor's data, which is only for ov legacy api usage. if (in_port.get_partial_shape().is_static() && in_tensor->get_size() > 0 && in_tensor->get_shape().empty() && in_tensor->get_size() == ov::shape_size(in_port.get_shape()) && !in_port.get_shape().empty()) { - tensor = ov::make_tensor(in_tensor->get_element_type(), in_port.get_shape(), in_tensor->data()); + tensor = ov::make_tensor(in_tensor->get_element_type(), + in_port.get_shape(), + (port_found.is_input() ? std::as_const(*in_tensor).data() : in_tensor->data())); } - auto port_found = find_port(in_port); + auto mem_desc_ptr = MemoryDescUtils::generateCpuBlockedMemoryDesc(tensor); if (port_found.is_input()) { auto input_index = port_found.idx; diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp index 2e5782fb008bb5..c16e0cc306bdd8 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp @@ -126,9 +126,9 @@ static std::string getTestCaseName(const testing::TestParamInfo ov::runtime::interpreter::INTBackend::compile( std::shared_ptr model) { return std::make_shared(model); diff --git a/src/plugins/template/backend/int_backend.hpp b/src/plugins/template/backend/int_backend.hpp index 942ba0bb8d11d1..e728e7a96ba593 100644 --- a/src/plugins/template/backend/int_backend.hpp +++ b/src/plugins/template/backend/int_backend.hpp @@ -28,6 +28,7 @@ class INTBackend : public Backend { ov::Tensor create_tensor() override; ov::Tensor create_tensor(const element::Type& type, const Shape& shape, void* memory_pointer) override; + ov::Tensor create_tensor(const element::Type& type, const Shape& shape, const void* memory_pointer) override; ov::Tensor create_tensor(const element::Type& type, const Shape& shape) override; diff --git a/src/plugins/template/src/sync_infer_request.cpp b/src/plugins/template/src/sync_infer_request.cpp index 938a79c9c6a2cd..0470e0179eb929 100644 --- a/src/plugins/template/src/sync_infer_request.cpp +++ b/src/plugins/template/src/sync_infer_request.cpp @@ -182,10 +182,10 @@ void ov::template_plugin::InferRequest::infer_preprocess() { data); } else if (tensor->is_continuous()) { // No ROI extraction is needed - m_backend_input_tensors[i] = - get_template_model()->get_template_plugin()->m_backend->create_tensor(tensor->get_element_type(), - tensor->get_shape(), - tensor->data()); + m_backend_input_tensors[i] = get_template_model()->get_template_plugin()->m_backend->create_tensor( + tensor->get_element_type(), + tensor->get_shape(), + std::as_const(*tensor).data()); } else { OPENVINO_ASSERT(tensor->get_element_type().bitwidth() % 8 == 0, "Template plugin: Unsupported ROI tensor with element type having ", From 677ed0dea6c830532bc3033780a8be2ac2a968ee Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Thu, 4 Dec 2025 07:15:50 +0000 Subject: [PATCH 04/14] Correct use data() for tensors in GPU plugin Signed-off-by: Pawel Raasz --- .../src/plugin/sync_infer_request.cpp | 21 +++++++++++-------- .../subgraph_tests/dynamic/kv_cache.cpp | 3 ++- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 42cccece3855c2..4988dcbf9f9cbb 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -533,7 +533,7 @@ TensorWrapper SyncInferRequest::create_or_share_device_tensor(const TensorWrappe // Note: currently, using USM Host memory for dGPUs in some scenarios (LLMs) leads to performance degradation, // so apply wider USM Host memory type detection only for iGPUs - auto user_tensor_mem_type = !generic_remote_tensor ? engine.detect_usm_allocation_type(user_tensor->data()) + auto user_tensor_mem_type = !generic_remote_tensor ? engine.detect_usm_allocation_type(std::as_const(*user_tensor).data()) : cldnn::allocation_type::unknown; auto usm_host_raw_ptr = engine.get_device_info().dev_type == cldnn::device_type::integrated_gpu && user_tensor_mem_type == cldnn::allocation_type::usm_host; @@ -709,7 +709,7 @@ std::vector SyncInferRequest::prepare_batched_input(size_t in auto ptr = static_cast(merged_tensor->data()); ov::parallel_for(user_tensors.size(), [&](size_t i) { const auto& tensor = user_tensors.at(i); - std::memcpy(ptr + i * tensor->get_byte_size(), static_cast(tensor->data()), tensor->get_byte_size()); + std::memcpy(ptr + i * tensor->get_byte_size(), std::as_const(*tensor).data(), tensor->get_byte_size()); }); } else { const auto& stream = m_graph->get_network()->get_stream(); @@ -788,11 +788,14 @@ std::vector SyncInferRequest::prepare_input(const std::string } } else if (is_usm_host_tensor && !convert_needed) { if (element_type != ::data_type_for_remote_tensor(element_type)) { - m_plugin_inputs[input_idx] = { std::make_shared(m_context, - user_tensor->get_shape(), - ::data_type_for_remote_tensor(element_type), - TensorType::BT_USM_SHARED, - user_tensor->data()), TensorOwner::USER }; + // use const_cast RemoteTensorImpl dont accept const data ptr + // The model inputs can be read-only and const pointer should be used + m_plugin_inputs[input_idx] = {std::make_shared(m_context, + user_tensor->get_shape(), + ::data_type_for_remote_tensor(element_type), + TensorType::BT_USM_SHARED, + const_cast(std::as_const(*user_tensor).data())), + TensorOwner::USER}; } else { m_plugin_inputs[input_idx] = { usm_host_ptr->get_impl(), user_tensor_wrapper.owner }; } @@ -801,7 +804,7 @@ std::vector SyncInferRequest::prepare_input(const std::string auto user_tensor_mem_type = cldnn::allocation_type::unknown; if (!is_remote_tensor_impl && !is_generic_remote) { - user_tensor_mem_type = engine.detect_usm_allocation_type(user_tensor_wrapper.ptr->data()); + user_tensor_mem_type = engine.detect_usm_allocation_type(std::as_const(*user_tensor_wrapper.ptr).data()); } auto plugin_tensor_mem_type = cldnn::allocation_type::unknown; @@ -876,7 +879,7 @@ std::vector SyncInferRequest::prepare_input(const std::string } } else { if (!is_remote_tensor_impl && !is_generic_remote) { - auto src_ptr = static_cast(user_tensor->data()); + auto src_ptr = static_cast(std::as_const(*user_tensor).data()); if (!same_host_mem(memory, src_ptr)) { // WA: Set need_lockable_mem as a blocking argument // The current input_layout (wait_for_events) does not provide proper synchronization for subsequent CPU implementations diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp index d5aa024dd31ebf..9b260aa5f723f2 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp @@ -557,7 +557,8 @@ class KVCacheIssueTests: public ::testing::Test { {n_batch, context_size, n_heads, n_features}, -0.5f, 0.5f, 1); auto ireq1_input1 = ov::test::utils::create_and_fill_tensor_real_distribution(element_type, {n_batch, n_heads, context_size, context_size}, -0.5f, 0.5f, 1); - ireq1.set_tensor(input0, ireq1_input0); + // Create read-only tensor view to test inference with this const input data + ireq1.set_tensor(input0, {ireq1_input0.get_element_type(), ireq1_input0.get_shape(), std::as_const(ireq1_input0).data()}); ireq1.set_tensor(input1, ireq1_input1); auto ireq2_input0 = ov::test::utils::create_and_fill_tensor_real_distribution(element_type, From a35b0d8e05f64eca0ce35cdde55e0acaf08ad3eb Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Thu, 4 Dec 2025 07:16:17 +0000 Subject: [PATCH 05/14] Correct use data() for model inputs tensors in NPU plugin Signed-off-by: Pawel Raasz --- src/plugins/intel_npu/src/common/src/sync_infer_request.cpp | 6 +++--- .../behavior/ov_infer_request/infer_request_dynamic.hpp | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp index d3eed4e7357005..dbefc8bfa64096 100644 --- a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp @@ -224,9 +224,9 @@ void SyncInferRequest::check_tensor(const ov::Output& port, " expecting ", port.get_shape(), "."); - OPENVINO_ASSERT( - std::dynamic_pointer_cast(tensor._ptr) || tensor->data() != nullptr || is_dynamic, - "Tensor data equal nullptr!"); + OPENVINO_ASSERT(std::dynamic_pointer_cast(tensor._ptr) || + std::as_const(*tensor).data() != nullptr || is_dynamic, + "Tensor data equal nullptr!"); } void SyncInferRequest::check_batched_tensors(const ov::Output& port, diff --git a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_dynamic.hpp b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_dynamic.hpp index 3ab240e334162a..392b5fc923ede4 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_dynamic.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_dynamic.hpp @@ -166,7 +166,9 @@ TEST_P(InferRequestDynamicTests, InferDynamicNetworkSetShapeCPUTensor) { auto inputTensor = ov::test::utils::create_and_fill_tensor(ov::element::f32, originalShape, 100, 0); OV_ASSERT_NO_THROW(req = model.create_infer_request()); - OV_ASSERT_NO_THROW(req.set_tensor(inputName, inputTensor)); + OV_ASSERT_NO_THROW( + req.set_tensor(inputName, + {inputTensor.get_element_type(), inputTensor.get_shape(), std::as_const(inputTensor).data()})); OV_ASSERT_NO_THROW(req.infer()); OV_ASSERT_NO_THROW(checkOutputFP16(inputTensor, req.get_tensor(outputName))); From 1c2c6d1e8e1fddbdba3adebe51214fc7fb5437fd Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Thu, 4 Dec 2025 08:57:40 +0000 Subject: [PATCH 06/14] Apply code style Signed-off-by: Pawel Raasz --- src/plugins/template/src/sync_infer_request.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/plugins/template/src/sync_infer_request.cpp b/src/plugins/template/src/sync_infer_request.cpp index 0470e0179eb929..798446d0930aeb 100644 --- a/src/plugins/template/src/sync_infer_request.cpp +++ b/src/plugins/template/src/sync_infer_request.cpp @@ -182,10 +182,10 @@ void ov::template_plugin::InferRequest::infer_preprocess() { data); } else if (tensor->is_continuous()) { // No ROI extraction is needed - m_backend_input_tensors[i] = get_template_model()->get_template_plugin()->m_backend->create_tensor( - tensor->get_element_type(), - tensor->get_shape(), - std::as_const(*tensor).data()); + m_backend_input_tensors[i] = + get_template_model()->get_template_plugin()->m_backend->create_tensor(tensor->get_element_type(), + tensor->get_shape(), + std::as_const(*tensor).data()); } else { OPENVINO_ASSERT(tensor->get_element_type().bitwidth() % 8 == 0, "Template plugin: Unsupported ROI tensor with element type having ", From d7fa02aaf93a442859d7bc9476e0c478d7f76b53 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Thu, 4 Dec 2025 11:50:05 +0000 Subject: [PATCH 07/14] Fix einsum ref impl Signed-off-by: Pawel Raasz --- src/core/reference/src/op/einsum.cpp | 26 +++++++++---------- .../src/common/lora_pattern.cpp | 1 + 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/core/reference/src/op/einsum.cpp b/src/core/reference/src/op/einsum.cpp index e83a8292b0d856..c950414a7ca9c9 100644 --- a/src/core/reference/src/op/einsum.cpp +++ b/src/core/reference/src/op/einsum.cpp @@ -580,9 +580,9 @@ void extract_diagonal(ov::TensorVector& inputs, std::vector& input_ return; } - ov::Tensor multi_identity = build_multi_identity(input_ptr, repeated_labels, label_dim_map); + const ov::Tensor multi_identity = build_multi_identity(input_ptr, repeated_labels, label_dim_map); - ov::Tensor mul_output = input_ptr; + auto mul_output = ov::Tensor(input_ptr.get_element_type(), input_ptr.get_shape()); reference::multiply(input_ptr.data(), multi_identity.data(), mul_output.data(), @@ -926,17 +926,17 @@ void contract_two_inputs(ov::TensorVector& inputs, reduced_sub_shape.get_shape(), is_separate_first2); - ov::Tensor matmul_operand1 = reshape_input_for_matmul(input1, - common_sub_shape, - separate1_sub_shape, - reduced_sub_shape_prod, - is_separate_first1); - - ov::Tensor matmul_operand2 = reshape_input_for_matmul(input2, - common_sub_shape, - separate2_sub_shape, - reduced_sub_shape_prod, - is_separate_first2); + const ov::Tensor matmul_operand1 = reshape_input_for_matmul(input1, + common_sub_shape, + separate1_sub_shape, + reduced_sub_shape_prod, + is_separate_first1); + + const ov::Tensor matmul_operand2 = reshape_input_for_matmul(input2, + common_sub_shape, + separate2_sub_shape, + reduced_sub_shape_prod, + is_separate_first2); // step 3. apply MatMul operation for formatted inputs Shape matmul_output_shape = compute_matmul_output_shape(common_sub_shape, separate1_sub_shape, separate2_sub_shape); diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp index c16e0cc306bdd8..943b4014fcc438 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp @@ -127,6 +127,7 @@ static std::string getTestCaseName(const testing::TestParamInfo Date: Thu, 4 Dec 2025 13:31:33 +0000 Subject: [PATCH 08/14] Fix Read value get input tensor Signed-off-by: Pawel Raasz --- src/core/src/op/read_value.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/core/src/op/read_value.cpp b/src/core/src/op/read_value.cpp index a05c994be1cf8b..175855e05ed61e 100644 --- a/src/core/src/op/read_value.cpp +++ b/src/core/src/op/read_value.cpp @@ -150,21 +150,22 @@ bool ReadValue::evaluate(TensorVector& outputs, const auto use_context = var_value != variable_values.end() && !var_value->second->get_reset(); auto& output = outputs[0]; - Tensor input; - if (use_context) { - input = var_value->second->get_state(); - } else { - if (!inputs.empty()) { - input = inputs[0]; + const auto& input = [&] { + if (use_context) { + return var_value->second->get_state(); + } else if (!inputs.empty()) { + return inputs[0]; } else { - auto var_info = m_variable->get_info(); + const auto var_info = m_variable->get_info(); OPENVINO_ASSERT(var_info.data_shape.is_static() && var_info.data_type.is_static()); const auto& shape = var_info.data_shape.get_shape(); const auto& type = var_info.data_type; - input = ov::Tensor(type, shape); + auto input = ov::Tensor(type, shape); memset(input.data(), 0, input.get_byte_size()); + return input; } - } + }(); + output.set_shape(input.get_shape()); std::memcpy(output.data(), input.data(), output.get_byte_size()); return true; From f5255f6932ebf5bf0a4bd92bce0760642075cd47 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Fri, 5 Dec 2025 11:03:36 +0000 Subject: [PATCH 09/14] Add new data member in ITensor: - public interface check if tensor can write data - ITensor::data() do not do check to be compatible with current implementation and it is for internal use Signed-off-by: Pawel Raasz --- src/core/dev_api/openvino/runtime/itensor.hpp | 22 +++++++++++-- src/core/include/openvino/runtime/tensor.hpp | 7 +++-- src/core/src/runtime/tensor.cpp | 4 +-- .../openvino/runtime/iremote_tensor.hpp | 8 +++++ src/inference/src/dev/make_tensor.cpp | 31 +++++++++++++++++-- src/plugins/intel_cpu/src/cpu_tensor.cpp | 13 ++++++++ src/plugins/intel_cpu/src/cpu_tensor.h | 2 ++ .../intel_gpu/plugin/usm_host_tensor.hpp | 2 ++ .../intel_gpu/src/plugin/usm_host_tensor.cpp | 8 +++++ .../src/backend/include/zero_tensor.hpp | 3 ++ .../intel_npu/src/backend/src/zero_tensor.cpp | 8 +++++ .../intel_npu/utils/zero/zero_host_tensor.hpp | 2 ++ .../src/utils/src/zero/zero_host_tensor.cpp | 8 +++++ 13 files changed, 109 insertions(+), 9 deletions(-) diff --git a/src/core/dev_api/openvino/runtime/itensor.hpp b/src/core/dev_api/openvino/runtime/itensor.hpp index f57c3259cbfa7e..672500d58edaaf 100644 --- a/src/core/dev_api/openvino/runtime/itensor.hpp +++ b/src/core/dev_api/openvino/runtime/itensor.hpp @@ -60,7 +60,15 @@ class OPENVINO_API ITensor : public std::enable_shared_from_this { /** * @brief Provides an access to the underlying host memory - * @param type Optional type parameter. + * @note The method throws an exception: + * - if tensor implementation does not allow non-const access to memory. + * @return A host pointer to tensor memory + */ + virtual void* data_rw() = 0; + + /** + * @brief Provides an access to the underlying host memory + * @param type Type parameter. * @note The method throws an exception * if specified type's fundamental type does not match with tensor element type's fundamental type * @return A host pointer to tensor memory @@ -70,10 +78,20 @@ class OPENVINO_API ITensor : public std::enable_shared_from_this { virtual const void* data(const element::Type& type) const = 0; /// @} + /** + * @brief Provides an access to the underlying host memory + * @param type Type parameter. + * @note The method throws an exception: + * - if specified type's fundamental type does not match with tensor element type's fundamental type + * - if tensor implementation does not allow non-const access to memory. + * @return A host pointer to tensor memory + */ + virtual void* data_rw(const element::Type& type) = 0; + /** * @brief Provides an access to the underlying host memory casted to type `T` - * @return A host pointer to tensor memory casted to specified type `T`. * @note Throws exception if specified type does not match with tensor element type + * @return A host pointer to tensor memory casted to specified type `T`. */ template ::type> T* data() { diff --git a/src/core/include/openvino/runtime/tensor.hpp b/src/core/include/openvino/runtime/tensor.hpp index 211d3b3328a75e..a1471c9ba4f415 100644 --- a/src/core/include/openvino/runtime/tensor.hpp +++ b/src/core/include/openvino/runtime/tensor.hpp @@ -219,6 +219,8 @@ class OPENVINO_API Tensor { /** * @brief Provides an access to the underlying host memory + * @note The method throws an exception: + * - if tensor implementation does not allow non-const access to memory. * @return A host pointer to tensor memory * @{ */ @@ -229,8 +231,9 @@ class OPENVINO_API Tensor { /** * @brief Provides an access to the underlying host memory * @param type Optional type parameter. - * @note The method throws an exception - * if specified type's fundamental type does not match with tensor element type's fundamental type + * @note The method throws an exception: + * - if specified type's fundamental type does not match with tensor element type's fundamental type + * - if tensor implementation does not allow non-const access to memory. * @return A host pointer to tensor memory * @{ */ diff --git a/src/core/src/runtime/tensor.cpp b/src/core/src/runtime/tensor.cpp index a172949dded2a1..6bae37045cb430 100644 --- a/src/core/src/runtime/tensor.cpp +++ b/src/core/src/runtime/tensor.cpp @@ -106,7 +106,7 @@ size_t Tensor::get_byte_size() const { } void* Tensor::data() { - OV_TENSOR_STATEMENT(return _impl->data()); + OV_TENSOR_STATEMENT(return _impl->data_rw()); } const void* Tensor::data() const { @@ -114,7 +114,7 @@ const void* Tensor::data() const { } void* Tensor::data(const element::Type& element_type) { - OV_TENSOR_STATEMENT(return _impl->data(element_type)); + OV_TENSOR_STATEMENT(return _impl->data_rw(element_type)); } const void* Tensor::data(const element::Type& element_type) const { diff --git a/src/inference/dev_api/openvino/runtime/iremote_tensor.hpp b/src/inference/dev_api/openvino/runtime/iremote_tensor.hpp index 18b46e916a9e9b..7a88775a0d8f2f 100644 --- a/src/inference/dev_api/openvino/runtime/iremote_tensor.hpp +++ b/src/inference/dev_api/openvino/runtime/iremote_tensor.hpp @@ -32,6 +32,14 @@ class OPENVINO_RUNTIME_API IRemoteTensor : public ITensor { OPENVINO_NOT_IMPLEMENTED; } + void* data_rw() override final { + OPENVINO_NOT_IMPLEMENTED; + } + + void* data_rw(const element::Type&) override final { + OPENVINO_NOT_IMPLEMENTED; + } + ~IRemoteTensor() override; /** diff --git a/src/inference/src/dev/make_tensor.cpp b/src/inference/src/dev/make_tensor.cpp index 834aac9ca08beb..72c19981a527c0 100644 --- a/src/inference/src/dev/make_tensor.cpp +++ b/src/inference/src/dev/make_tensor.cpp @@ -84,6 +84,19 @@ class ViewTensor : public ITensor { return m_ptr; } + void* data_rw() override { + return m_ptr; + } + + void* data_rw(const element::Type& element_type) override { + OPENVINO_ASSERT(is_pointer_representable(element_type), + "Tensor data with element type ", + get_element_type(), + ", is not representable as pointer to ", + element_type); + return m_ptr; + } + const element::Type& get_element_type() const override { return m_element_type; } @@ -157,11 +170,11 @@ class ReadOnlyViewTensor : public ViewTensor { using ViewTensor::data; - [[noreturn]] void* data() override { + [[noreturn]] void* data_rw() override { OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast.data()'"); } - [[noreturn]] void* data(const element::Type& element_type) override { + [[noreturn]] void* data_rw(const element::Type& element_type) override { OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast.data(element_type)'"); } }; @@ -231,7 +244,11 @@ class ReadOnlyStridedViewTensor : public StridedViewTensor { using StridedViewTensor::data; - [[noreturn]] void* data(const element::Type& element_type) override { + [[noreturn]] void* data_rw() override { + OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast.data()'"); + } + + [[noreturn]] void* data_rw(const element::Type& element_type) override { OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast.data()'"); } }; @@ -463,6 +480,14 @@ class RoiTensor : public BaseRoiTensor, public ITensor { const void* data(const element::Type& element_type) const override { return static_cast(m_owner->data()) + m_offset; } + + void* data_rw() override { + return static_cast(m_owner->data_rw()) + m_offset; + } + + void* data_rw(const element::Type& element_type) override { + return static_cast(m_owner->data_rw(element_type)) + m_offset; + } }; /** diff --git a/src/plugins/intel_cpu/src/cpu_tensor.cpp b/src/plugins/intel_cpu/src/cpu_tensor.cpp index 4d11c21631001f..341371e9bd6dd9 100644 --- a/src/plugins/intel_cpu/src/cpu_tensor.cpp +++ b/src/plugins/intel_cpu/src/cpu_tensor.cpp @@ -125,6 +125,19 @@ const void* Tensor::data(const element::Type& element_type) const { return m_memptr->getData(); } +void* Tensor::data_rw() { + return m_memptr->getData(); +} + +void* Tensor::data_rw(const element::Type& element_type) { + OPENVINO_ASSERT(is_pointer_representable(get_element_type(), element_type), + "Tensor data with element type ", + get_element_type(), + ", is not representable as pointer to ", + element_type); + return m_memptr->getData(); +} + /** * @brief Creates tensor on graph memory * diff --git a/src/plugins/intel_cpu/src/cpu_tensor.h b/src/plugins/intel_cpu/src/cpu_tensor.h index a36c4363546d53..1cac1fbc383cc8 100644 --- a/src/plugins/intel_cpu/src/cpu_tensor.h +++ b/src/plugins/intel_cpu/src/cpu_tensor.h @@ -37,6 +37,8 @@ class Tensor : public ITensor { void* data(const element::Type& type) override; const void* data() const override; const void* data(const element::Type& type) const override; + void* data_rw() override; + void* data_rw(const element::Type& type) override; MemoryPtr get_memory() { return m_memptr; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/usm_host_tensor.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/usm_host_tensor.hpp index 6d86a8f4a0a3cb..9457aa154d52ce 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/usm_host_tensor.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/usm_host_tensor.hpp @@ -22,6 +22,8 @@ class USMHostTensor : public ov::ITensor { void* data() override; void* data(const element::Type& element_type) override; + void* data_rw() override; + void* data_rw(const element::Type& element_type) override; const void* data() const override; const void* data(const element::Type& element_type) const override; diff --git a/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp b/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp index a439e10dd040e9..f991d0aee4c0e1 100644 --- a/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp +++ b/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp @@ -23,6 +23,14 @@ void* USMHostTensor::data(const element::Type&) { return m_impl->get_original_memory_buf_ptr(); } +void* USMHostTensor::data_rw() { + return m_impl->get_original_memory_buf_ptr(); +} + +void* USMHostTensor::data_rw(const element::Type&) { + return m_impl->get_original_memory_buf_ptr(); +} + const void* USMHostTensor::data() const { return m_impl->get_original_memory_buf_ptr(); } diff --git a/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp b/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp index f09a7c32cb4643..42f00062af3e67 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp @@ -52,6 +52,9 @@ class ZeroTensor final : public ov::ITensor { void* data() override; void* data(const ov::element::Type& type) override; + void* data_rw() override; + void* data_rw(const ov::element::Type& type) override; + const void* data() const override; const void* data(const ov::element::Type& type) const override; diff --git a/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp b/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp index f7782817bfdee0..9f81050c9971a1 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp @@ -100,6 +100,14 @@ void* ZeroTensor::data(const ov::element::Type& type) { return data(); } +void* ZeroTensor::data_rw() { + return data(); +} + +void* ZeroTensor::data_rw(const ov::element::Type& type) { + return data(type); +} + const void* ZeroTensor::data() const { return _ptr; } diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_host_tensor.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_host_tensor.hpp index 01199426b6f77b..7baa67164a6bfa 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_host_tensor.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_host_tensor.hpp @@ -22,6 +22,8 @@ class ZeroHostTensor : public ov::ITensor { void* data() override; void* data(const ov::element::Type& element_type) override; + void* data_rw() override; + void* data_rw(const ov::element::Type& element_type) override; const void* data() const override; const void* data(const ov::element::Type& element_type) const override; const ov::element::Type& get_element_type() const override; diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_host_tensor.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_host_tensor.cpp index c82331ede6415b..db77d89e832eee 100644 --- a/src/plugins/intel_npu/src/utils/src/zero/zero_host_tensor.cpp +++ b/src/plugins/intel_npu/src/utils/src/zero/zero_host_tensor.cpp @@ -29,6 +29,14 @@ void* ZeroHostTensor::data(const ov::element::Type&) { return _impl->get_original_memory(); } +void* ZeroHostTensor::data_rw() { + return data(); +} + +void* ZeroHostTensor::data_rw(const ov::element::Type&) { + return data(); +} + const void* ZeroHostTensor::data() const { return _impl->get_original_memory(); } From 497cf21b418272e47e8cc2ffe243459882052a56 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Fri, 5 Dec 2025 11:04:26 +0000 Subject: [PATCH 10/14] Revert not required changes Signed-off-by: Pawel Raasz --- src/inference/src/dev/isync_infer_request.cpp | 4 ++-- src/plugins/intel_cpu/src/graph.cpp | 2 +- src/plugins/intel_cpu/src/infer_request.cpp | 13 +++++-------- .../intel_gpu/src/plugin/sync_infer_request.cpp | 13 +++++-------- .../intel_npu/src/common/src/sync_infer_request.cpp | 6 +++--- src/plugins/template/backend/backend.hpp | 6 ------ src/plugins/template/backend/int_backend.cpp | 6 ------ src/plugins/template/backend/int_backend.hpp | 1 - src/plugins/template/src/sync_infer_request.cpp | 2 +- .../behavior/ov_infer_request/batched_tensors.cpp | 1 - 10 files changed, 17 insertions(+), 37 deletions(-) diff --git a/src/inference/src/dev/isync_infer_request.cpp b/src/inference/src/dev/isync_infer_request.cpp index 55cd783c587ee1..df70252e3dd160 100644 --- a/src/inference/src/dev/isync_infer_request.cpp +++ b/src/inference/src/dev/isync_infer_request.cpp @@ -192,7 +192,7 @@ void ov::ISyncInferRequest::convert_batched_tensors() { // Perform memory copy ov::parallel_for(item.second.size(), [&](size_t i) { const auto& tensor = item.second.at(i); - memcpy(ptr + i * tensor->get_byte_size(), std::as_const(*tensor).data(), tensor->get_byte_size()); + memcpy(ptr + i * tensor->get_byte_size(), tensor->data(), tensor->get_byte_size()); }); prepared_tensors[item.first] = input_tensor; } @@ -286,7 +286,7 @@ void ov::ISyncInferRequest::check_tensor(const ov::Output& port, port.get_shape(), "."); OPENVINO_ASSERT(std::dynamic_pointer_cast(tensor._ptr) || - std::as_const(*tensor).data() != nullptr || is_dynamic, + tensor->data() != nullptr || is_dynamic, "Tensor data equal nullptr!"); } diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index adefd70b0d4c55..337ff590e852b8 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -1220,7 +1220,7 @@ void Graph::PushInputData(const std::size_t& index, const ov::SoPtr& in auto childEdge = node->getChildEdgeAt(0); const auto& edgeMemory = childEdge->getMemory(); - const void* ext_data_ptr = std::as_const(*input).data(); + const void* ext_data_ptr = input->data(); void* inter_data_ptr = edgeMemory.getData(); if (ext_data_ptr != inter_data_ptr) { diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index db5a4d90de0877..438007c1db3ef4 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -171,7 +171,7 @@ static inline void change_edge_ptr(const EdgePtr& edge, ov::SoPtr& auto memBlock = mem->getMemoryBlock(); OPENVINO_ASSERT(memBlock); // Use const cast as as `MemoryBlockPtr` not supports const pointers. The model inputs may have const pointers. - memBlock->setExtBuff(const_cast(std::as_const(*tensor).data()), tensor->get_byte_size()); + memBlock->setExtBuff(tensor->data(), tensor->get_byte_size()); } } @@ -181,7 +181,7 @@ void SyncInferRequest::change_default_ptr(Graph& graph) { if (graph.IsDynamic()) { changeInpPtr = [&inputPtrs](const EdgePtr& edge, ov::SoPtr& tensor) { change_edge_ptr(edge, tensor); - inputPtrs.insert(std::as_const(*tensor._ptr).data()); + inputPtrs.insert(tensor->data()); }; } else { changeInpPtr = [](const EdgePtr& edge, ov::SoPtr& tensor) { @@ -192,7 +192,7 @@ void SyncInferRequest::change_default_ptr(Graph& graph) { for (auto& [idx, tensor] : m_input_external_ptr) { auto inputNodePtr = graph.getInputNodeByIndex(idx); OPENVINO_ASSERT(inputNodePtr, "Cannot find input tensor with index: ", idx); - if (inputNodePtr->getDstDataAtPort(0) == std::as_const(*tensor).data()) { + if (inputNodePtr->getDstDataAtPort(0) == tensor->data()) { continue; } const auto& childEdges = inputNodePtr->getChildEdges(); @@ -365,18 +365,15 @@ void SyncInferRequest::set_tensor(const ov::Output& in_port, con OPENVINO_ASSERT(in_tensor, "Failed to set empty tensor for port!"); auto port = get_internal_port(in_port); auto tensor = in_tensor; - auto port_found = find_port(in_port); // WA: legacy api create blob with ANY layout will not set BlockingDesc, which will lead to tensor.get_shape() // return empty shape but tensor.get_size() return correct value, and tensor.reshape() cannot update // BlockingDesc, so to construct new tensor with original tensor's data, which is only for ov legacy api usage. if (in_port.get_partial_shape().is_static() && in_tensor->get_size() > 0 && in_tensor->get_shape().empty() && in_tensor->get_size() == ov::shape_size(in_port.get_shape()) && !in_port.get_shape().empty()) { - tensor = ov::make_tensor(in_tensor->get_element_type(), - in_port.get_shape(), - (port_found.is_input() ? std::as_const(*in_tensor).data() : in_tensor->data())); + tensor = ov::make_tensor(in_tensor->get_element_type(), in_port.get_shape(), in_tensor->data()); } - + auto port_found = find_port(in_port); auto mem_desc_ptr = MemoryDescUtils::generateCpuBlockedMemoryDesc(tensor); if (port_found.is_input()) { auto input_index = port_found.idx; diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 4988dcbf9f9cbb..f2fbd43ee1c1c8 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -533,8 +533,7 @@ TensorWrapper SyncInferRequest::create_or_share_device_tensor(const TensorWrappe // Note: currently, using USM Host memory for dGPUs in some scenarios (LLMs) leads to performance degradation, // so apply wider USM Host memory type detection only for iGPUs - auto user_tensor_mem_type = !generic_remote_tensor ? engine.detect_usm_allocation_type(std::as_const(*user_tensor).data()) - : cldnn::allocation_type::unknown; + auto user_tensor_mem_type = !generic_remote_tensor ? engine.detect_usm_allocation_type(user_tensor->data()) : cldnn::allocation_type::unknown; auto usm_host_raw_ptr = engine.get_device_info().dev_type == cldnn::device_type::integrated_gpu && user_tensor_mem_type == cldnn::allocation_type::usm_host; @@ -709,7 +708,7 @@ std::vector SyncInferRequest::prepare_batched_input(size_t in auto ptr = static_cast(merged_tensor->data()); ov::parallel_for(user_tensors.size(), [&](size_t i) { const auto& tensor = user_tensors.at(i); - std::memcpy(ptr + i * tensor->get_byte_size(), std::as_const(*tensor).data(), tensor->get_byte_size()); + std::memcpy(ptr + i * tensor->get_byte_size(), tensor->data(), tensor->get_byte_size()); }); } else { const auto& stream = m_graph->get_network()->get_stream(); @@ -788,13 +787,11 @@ std::vector SyncInferRequest::prepare_input(const std::string } } else if (is_usm_host_tensor && !convert_needed) { if (element_type != ::data_type_for_remote_tensor(element_type)) { - // use const_cast RemoteTensorImpl dont accept const data ptr - // The model inputs can be read-only and const pointer should be used m_plugin_inputs[input_idx] = {std::make_shared(m_context, user_tensor->get_shape(), ::data_type_for_remote_tensor(element_type), TensorType::BT_USM_SHARED, - const_cast(std::as_const(*user_tensor).data())), + user_tensor->data()), TensorOwner::USER}; } else { m_plugin_inputs[input_idx] = { usm_host_ptr->get_impl(), user_tensor_wrapper.owner }; @@ -804,7 +801,7 @@ std::vector SyncInferRequest::prepare_input(const std::string auto user_tensor_mem_type = cldnn::allocation_type::unknown; if (!is_remote_tensor_impl && !is_generic_remote) { - user_tensor_mem_type = engine.detect_usm_allocation_type(std::as_const(*user_tensor_wrapper.ptr).data()); + user_tensor_mem_type = engine.detect_usm_allocation_type(user_tensor->data()); } auto plugin_tensor_mem_type = cldnn::allocation_type::unknown; @@ -879,7 +876,7 @@ std::vector SyncInferRequest::prepare_input(const std::string } } else { if (!is_remote_tensor_impl && !is_generic_remote) { - auto src_ptr = static_cast(std::as_const(*user_tensor).data()); + auto src_ptr = static_cast(user_tensor->data()); if (!same_host_mem(memory, src_ptr)) { // WA: Set need_lockable_mem as a blocking argument // The current input_layout (wait_for_events) does not provide proper synchronization for subsequent CPU implementations diff --git a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp index dbefc8bfa64096..d3eed4e7357005 100644 --- a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp @@ -224,9 +224,9 @@ void SyncInferRequest::check_tensor(const ov::Output& port, " expecting ", port.get_shape(), "."); - OPENVINO_ASSERT(std::dynamic_pointer_cast(tensor._ptr) || - std::as_const(*tensor).data() != nullptr || is_dynamic, - "Tensor data equal nullptr!"); + OPENVINO_ASSERT( + std::dynamic_pointer_cast(tensor._ptr) || tensor->data() != nullptr || is_dynamic, + "Tensor data equal nullptr!"); } void SyncInferRequest::check_batched_tensors(const ov::Output& port, diff --git a/src/plugins/template/backend/backend.hpp b/src/plugins/template/backend/backend.hpp index 1d5f430beb3f7b..9273c66216d8b0 100644 --- a/src/plugins/template/backend/backend.hpp +++ b/src/plugins/template/backend/backend.hpp @@ -52,16 +52,10 @@ class Backend { /// must be sufficient to contain the tensor. The lifetime of the buffer is the /// responsibility of the caller. /// \returns shared_ptr to a new backend-specific tensor - /// \{ virtual ov::Tensor create_tensor(const ov::element::Type& element_type, const Shape& shape, void* memory_pointer) = 0; - virtual ov::Tensor create_tensor(const ov::element::Type& element_type, - const Shape& shape, - const void* memory_pointer) = 0; - /// \} - /// \brief Create a tensor of C type T specific to this backend /// \param shape The shape of the tensor /// \returns shared_ptr to a new backend specific tensor diff --git a/src/plugins/template/backend/int_backend.cpp b/src/plugins/template/backend/int_backend.cpp index 4aca1d6926e58f..a2b008398140b5 100644 --- a/src/plugins/template/backend/int_backend.cpp +++ b/src/plugins/template/backend/int_backend.cpp @@ -25,12 +25,6 @@ ov::Tensor ov::runtime::interpreter::INTBackend::create_tensor(const element::Ty return ov::Tensor(type, shape, memory_pointer); } -ov::Tensor ov::runtime::interpreter::INTBackend::create_tensor(const element::Type& type, - const Shape& shape, - const void* memory_pointer) { - return ov::Tensor(type, shape, memory_pointer); -} - std::shared_ptr ov::runtime::interpreter::INTBackend::compile( std::shared_ptr model) { return std::make_shared(model); diff --git a/src/plugins/template/backend/int_backend.hpp b/src/plugins/template/backend/int_backend.hpp index e728e7a96ba593..942ba0bb8d11d1 100644 --- a/src/plugins/template/backend/int_backend.hpp +++ b/src/plugins/template/backend/int_backend.hpp @@ -28,7 +28,6 @@ class INTBackend : public Backend { ov::Tensor create_tensor() override; ov::Tensor create_tensor(const element::Type& type, const Shape& shape, void* memory_pointer) override; - ov::Tensor create_tensor(const element::Type& type, const Shape& shape, const void* memory_pointer) override; ov::Tensor create_tensor(const element::Type& type, const Shape& shape) override; diff --git a/src/plugins/template/src/sync_infer_request.cpp b/src/plugins/template/src/sync_infer_request.cpp index 798446d0930aeb..938a79c9c6a2cd 100644 --- a/src/plugins/template/src/sync_infer_request.cpp +++ b/src/plugins/template/src/sync_infer_request.cpp @@ -185,7 +185,7 @@ void ov::template_plugin::InferRequest::infer_preprocess() { m_backend_input_tensors[i] = get_template_model()->get_template_plugin()->m_backend->create_tensor(tensor->get_element_type(), tensor->get_shape(), - std::as_const(*tensor).data()); + tensor->data()); } else { OPENVINO_ASSERT(tensor->get_element_type().bitwidth() % 8 == 0, "Template plugin: Unsupported ROI tensor with element type having ", diff --git a/src/tests/functional/base_func_tests/src/behavior/ov_infer_request/batched_tensors.cpp b/src/tests/functional/base_func_tests/src/behavior/ov_infer_request/batched_tensors.cpp index 2cf9b8d9cb20d5..3abd13fa316d1c 100644 --- a/src/tests/functional/base_func_tests/src/behavior/ov_infer_request/batched_tensors.cpp +++ b/src/tests/functional/base_func_tests/src/behavior/ov_infer_request/batched_tensors.cpp @@ -117,7 +117,6 @@ TEST_P(OVInferRequestBatchedTests, SetReadOnlyInputTensorsBase) { ov::InferRequest req; req = execNet.create_infer_request(); std::vector tensors; - auto exp_tensor = ov::Tensor(element::f32, batch_shape); for (auto i = 0; i < batch; ++i) { // non contiguous memory (i*2) auto tensor = ov::Tensor(element::f32, one_shape, &std::as_const(buffer)[(i * 2) * one_shape_size]); From a302c9cc214753770aad16facb39275626161f19 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Fri, 5 Dec 2025 11:15:14 +0000 Subject: [PATCH 11/14] Revert changes in infer request CPU Signed-off-by: Pawel Raasz --- src/plugins/intel_cpu/src/infer_request.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 438007c1db3ef4..2556c9da699a6f 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -170,7 +170,6 @@ static inline void change_edge_ptr(const EdgePtr& edge, ov::SoPtr& } else { auto memBlock = mem->getMemoryBlock(); OPENVINO_ASSERT(memBlock); - // Use const cast as as `MemoryBlockPtr` not supports const pointers. The model inputs may have const pointers. memBlock->setExtBuff(tensor->data(), tensor->get_byte_size()); } } @@ -189,10 +188,10 @@ void SyncInferRequest::change_default_ptr(Graph& graph) { }; } - for (auto& [idx, tensor] : m_input_external_ptr) { - auto inputNodePtr = graph.getInputNodeByIndex(idx); - OPENVINO_ASSERT(inputNodePtr, "Cannot find input tensor with index: ", idx); - if (inputNodePtr->getDstDataAtPort(0) == tensor->data()) { + for (auto& it : m_input_external_ptr) { + auto inputNodePtr = graph.getInputNodeByIndex(it.first); + OPENVINO_ASSERT(inputNodePtr, "Cannot find input tensor with index: ", it.first); + if (inputNodePtr->getDstDataAtPort(0) == it.second->data()) { continue; } const auto& childEdges = inputNodePtr->getChildEdges(); @@ -231,7 +230,7 @@ void SyncInferRequest::change_default_ptr(Graph& graph) { if (!e) { OPENVINO_THROW("Node ", inputNodePtr->getName(), " contains empty child edge"); } - changeInpPtr(e, tensor); + changeInpPtr(e, it.second); } } } From b09a25e459b4a62843cbfec2ad7fde8a17d62424 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Fri, 5 Dec 2025 11:17:58 +0000 Subject: [PATCH 12/14] Fix code style Signed-off-by: Pawel Raasz --- src/inference/src/dev/isync_infer_request.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/inference/src/dev/isync_infer_request.cpp b/src/inference/src/dev/isync_infer_request.cpp index df70252e3dd160..8bed393e9c21e2 100644 --- a/src/inference/src/dev/isync_infer_request.cpp +++ b/src/inference/src/dev/isync_infer_request.cpp @@ -285,9 +285,9 @@ void ov::ISyncInferRequest::check_tensor(const ov::Output& port, " expecting ", port.get_shape(), "."); - OPENVINO_ASSERT(std::dynamic_pointer_cast(tensor._ptr) || - tensor->data() != nullptr || is_dynamic, - "Tensor data equal nullptr!"); + OPENVINO_ASSERT( + std::dynamic_pointer_cast(tensor._ptr) || tensor->data() != nullptr || is_dynamic, + "Tensor data equal nullptr!"); } void ov::ISyncInferRequest::allocate_tensor( From 033039c96df9df78d153f928810fdf439674e582 Mon Sep 17 00:00:00 2001 From: "Raasz, Pawel" Date: Tue, 9 Dec 2025 11:34:37 +0000 Subject: [PATCH 13/14] Update NPU test for read-only tensor Signed-off-by: Raasz, Pawel --- .../ov_infer_request/infer_request_dynamic.hpp | 4 +--- .../behavior/ov_infer_request/infer_request_run.hpp | 13 +++++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_dynamic.hpp b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_dynamic.hpp index 392b5fc923ede4..3ab240e334162a 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_dynamic.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_dynamic.hpp @@ -166,9 +166,7 @@ TEST_P(InferRequestDynamicTests, InferDynamicNetworkSetShapeCPUTensor) { auto inputTensor = ov::test::utils::create_and_fill_tensor(ov::element::f32, originalShape, 100, 0); OV_ASSERT_NO_THROW(req = model.create_infer_request()); - OV_ASSERT_NO_THROW( - req.set_tensor(inputName, - {inputTensor.get_element_type(), inputTensor.get_shape(), std::as_const(inputTensor).data()})); + OV_ASSERT_NO_THROW(req.set_tensor(inputName, inputTensor)); OV_ASSERT_NO_THROW(req.infer()); OV_ASSERT_NO_THROW(checkOutputFP16(inputTensor, req.get_tensor(outputName))); diff --git a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_run.hpp index eb6bc48db85c13..8a5f6436fc213d 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_run.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_run.hpp @@ -336,6 +336,19 @@ TEST_P(InferRequestRunTests, RecreateL0TensorIfNeeded) { } } +TEST_P(InferRequestRunTests, RunWithConstData) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + + ov::CompiledModel compiled_model; + ov::InferRequest inference_request; + OV_ASSERT_NO_THROW(compiled_model = core->compile_model(ov_model, target_device, configuration)); + OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request()); + const auto tensor = inference_request.get_input_tensor(); + const std::vector data(tensor.get_size()); + OV_ASSERT_NO_THROW(inference_request.set_input_tensor({ov::element::f32, tensor.get_shape(), data.data()})); + OV_ASSERT_NO_THROW(inference_request.infer()); +} + using RandomTensorOverZeroTensorRunTests = InferRequestRunTests; TEST_P(RandomTensorOverZeroTensorRunTests, SetRandomTensorOverZeroTensor0) { From 4fae83670436ebbbaa04930dadbc97dc92368a3b Mon Sep 17 00:00:00 2001 From: "Raasz, Pawel" Date: Tue, 9 Dec 2025 11:36:48 +0000 Subject: [PATCH 14/14] Update Signed-off-by: Raasz, Pawel --- .../functional/behavior/ov_infer_request/infer_request_run.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_run.hpp index 8a5f6436fc213d..2cbfa2aefbb2f8 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_run.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_run.hpp @@ -344,7 +344,7 @@ TEST_P(InferRequestRunTests, RunWithConstData) { OV_ASSERT_NO_THROW(compiled_model = core->compile_model(ov_model, target_device, configuration)); OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request()); const auto tensor = inference_request.get_input_tensor(); - const std::vector data(tensor.get_size()); + const std::vector data(tensor.get_byte_size() / sizeof(float)); OV_ASSERT_NO_THROW(inference_request.set_input_tensor({ov::element::f32, tensor.get_shape(), data.data()})); OV_ASSERT_NO_THROW(inference_request.infer()); }