diff --git a/src/core/dev_api/openvino/runtime/itensor.hpp b/src/core/dev_api/openvino/runtime/itensor.hpp index f57c3259cbfa7e..672500d58edaaf 100644 --- a/src/core/dev_api/openvino/runtime/itensor.hpp +++ b/src/core/dev_api/openvino/runtime/itensor.hpp @@ -60,7 +60,15 @@ class OPENVINO_API ITensor : public std::enable_shared_from_this { /** * @brief Provides an access to the underlying host memory - * @param type Optional type parameter. + * @note The method throws an exception: + * - if tensor implementation does not allow non-const access to memory. + * @return A host pointer to tensor memory + */ + virtual void* data_rw() = 0; + + /** + * @brief Provides an access to the underlying host memory + * @param type Type parameter. * @note The method throws an exception * if specified type's fundamental type does not match with tensor element type's fundamental type * @return A host pointer to tensor memory @@ -70,10 +78,20 @@ class OPENVINO_API ITensor : public std::enable_shared_from_this { virtual const void* data(const element::Type& type) const = 0; /// @} + /** + * @brief Provides an access to the underlying host memory + * @param type Type parameter. + * @note The method throws an exception: + * - if specified type's fundamental type does not match with tensor element type's fundamental type + * - if tensor implementation does not allow non-const access to memory. + * @return A host pointer to tensor memory + */ + virtual void* data_rw(const element::Type& type) = 0; + /** * @brief Provides an access to the underlying host memory casted to type `T` - * @return A host pointer to tensor memory casted to specified type `T`. * @note Throws exception if specified type does not match with tensor element type + * @return A host pointer to tensor memory casted to specified type `T`. */ template ::type> T* data() { diff --git a/src/core/include/openvino/runtime/tensor.hpp b/src/core/include/openvino/runtime/tensor.hpp index 211d3b3328a75e..a1471c9ba4f415 100644 --- a/src/core/include/openvino/runtime/tensor.hpp +++ b/src/core/include/openvino/runtime/tensor.hpp @@ -219,6 +219,8 @@ class OPENVINO_API Tensor { /** * @brief Provides an access to the underlying host memory + * @note The method throws an exception: + * - if tensor implementation does not allow non-const access to memory. * @return A host pointer to tensor memory * @{ */ @@ -229,8 +231,9 @@ class OPENVINO_API Tensor { /** * @brief Provides an access to the underlying host memory * @param type Optional type parameter. - * @note The method throws an exception - * if specified type's fundamental type does not match with tensor element type's fundamental type + * @note The method throws an exception: + * - if specified type's fundamental type does not match with tensor element type's fundamental type + * - if tensor implementation does not allow non-const access to memory. * @return A host pointer to tensor memory * @{ */ diff --git a/src/core/reference/src/op/einsum.cpp b/src/core/reference/src/op/einsum.cpp index e83a8292b0d856..c950414a7ca9c9 100644 --- a/src/core/reference/src/op/einsum.cpp +++ b/src/core/reference/src/op/einsum.cpp @@ -580,9 +580,9 @@ void extract_diagonal(ov::TensorVector& inputs, std::vector& input_ return; } - ov::Tensor multi_identity = build_multi_identity(input_ptr, repeated_labels, label_dim_map); + const ov::Tensor multi_identity = build_multi_identity(input_ptr, repeated_labels, label_dim_map); - ov::Tensor mul_output = input_ptr; + auto mul_output = ov::Tensor(input_ptr.get_element_type(), input_ptr.get_shape()); reference::multiply(input_ptr.data(), multi_identity.data(), mul_output.data(), @@ -926,17 +926,17 @@ void contract_two_inputs(ov::TensorVector& inputs, reduced_sub_shape.get_shape(), is_separate_first2); - ov::Tensor matmul_operand1 = reshape_input_for_matmul(input1, - common_sub_shape, - separate1_sub_shape, - reduced_sub_shape_prod, - is_separate_first1); - - ov::Tensor matmul_operand2 = reshape_input_for_matmul(input2, - common_sub_shape, - separate2_sub_shape, - reduced_sub_shape_prod, - is_separate_first2); + const ov::Tensor matmul_operand1 = reshape_input_for_matmul(input1, + common_sub_shape, + separate1_sub_shape, + reduced_sub_shape_prod, + is_separate_first1); + + const ov::Tensor matmul_operand2 = reshape_input_for_matmul(input2, + common_sub_shape, + separate2_sub_shape, + reduced_sub_shape_prod, + is_separate_first2); // step 3. apply MatMul operation for formatted inputs Shape matmul_output_shape = compute_matmul_output_shape(common_sub_shape, separate1_sub_shape, separate2_sub_shape); diff --git a/src/core/src/op/read_value.cpp b/src/core/src/op/read_value.cpp index a05c994be1cf8b..175855e05ed61e 100644 --- a/src/core/src/op/read_value.cpp +++ b/src/core/src/op/read_value.cpp @@ -150,21 +150,22 @@ bool ReadValue::evaluate(TensorVector& outputs, const auto use_context = var_value != variable_values.end() && !var_value->second->get_reset(); auto& output = outputs[0]; - Tensor input; - if (use_context) { - input = var_value->second->get_state(); - } else { - if (!inputs.empty()) { - input = inputs[0]; + const auto& input = [&] { + if (use_context) { + return var_value->second->get_state(); + } else if (!inputs.empty()) { + return inputs[0]; } else { - auto var_info = m_variable->get_info(); + const auto var_info = m_variable->get_info(); OPENVINO_ASSERT(var_info.data_shape.is_static() && var_info.data_type.is_static()); const auto& shape = var_info.data_shape.get_shape(); const auto& type = var_info.data_type; - input = ov::Tensor(type, shape); + auto input = ov::Tensor(type, shape); memset(input.data(), 0, input.get_byte_size()); + return input; } - } + }(); + output.set_shape(input.get_shape()); std::memcpy(output.data(), input.data(), output.get_byte_size()); return true; diff --git a/src/core/src/runtime/tensor.cpp b/src/core/src/runtime/tensor.cpp index a172949dded2a1..6bae37045cb430 100644 --- a/src/core/src/runtime/tensor.cpp +++ b/src/core/src/runtime/tensor.cpp @@ -106,7 +106,7 @@ size_t Tensor::get_byte_size() const { } void* Tensor::data() { - OV_TENSOR_STATEMENT(return _impl->data()); + OV_TENSOR_STATEMENT(return _impl->data_rw()); } const void* Tensor::data() const { @@ -114,7 +114,7 @@ const void* Tensor::data() const { } void* Tensor::data(const element::Type& element_type) { - OV_TENSOR_STATEMENT(return _impl->data(element_type)); + OV_TENSOR_STATEMENT(return _impl->data_rw(element_type)); } const void* Tensor::data(const element::Type& element_type) const { diff --git a/src/inference/dev_api/openvino/runtime/iremote_tensor.hpp b/src/inference/dev_api/openvino/runtime/iremote_tensor.hpp index 18b46e916a9e9b..7a88775a0d8f2f 100644 --- a/src/inference/dev_api/openvino/runtime/iremote_tensor.hpp +++ b/src/inference/dev_api/openvino/runtime/iremote_tensor.hpp @@ -32,6 +32,14 @@ class OPENVINO_RUNTIME_API IRemoteTensor : public ITensor { OPENVINO_NOT_IMPLEMENTED; } + void* data_rw() override final { + OPENVINO_NOT_IMPLEMENTED; + } + + void* data_rw(const element::Type&) override final { + OPENVINO_NOT_IMPLEMENTED; + } + ~IRemoteTensor() override; /** diff --git a/src/inference/dev_api/openvino/runtime/so_ptr.hpp b/src/inference/dev_api/openvino/runtime/so_ptr.hpp index f1e3937743e311..dcd3952a281769 100644 --- a/src/inference/dev_api/openvino/runtime/so_ptr.hpp +++ b/src/inference/dev_api/openvino/runtime/so_ptr.hpp @@ -75,6 +75,14 @@ struct SoPtr { return _ptr.get(); } + /** + * @brief Dereference stored pointer to T object. + * @return Reference to T object. + */ + T& operator*() const noexcept { + return *_ptr; + } + explicit operator bool() const noexcept { return _ptr != nullptr; } diff --git a/src/inference/src/dev/isync_infer_request.cpp b/src/inference/src/dev/isync_infer_request.cpp index e6a89dc81c1874..8bed393e9c21e2 100644 --- a/src/inference/src/dev/isync_infer_request.cpp +++ b/src/inference/src/dev/isync_infer_request.cpp @@ -192,7 +192,7 @@ void ov::ISyncInferRequest::convert_batched_tensors() { // Perform memory copy ov::parallel_for(item.second.size(), [&](size_t i) { const auto& tensor = item.second.at(i); - memcpy(ptr + i * tensor->get_byte_size(), static_cast(tensor->data()), tensor->get_byte_size()); + memcpy(ptr + i * tensor->get_byte_size(), tensor->data(), tensor->get_byte_size()); }); prepared_tensors[item.first] = input_tensor; } diff --git a/src/inference/src/dev/make_tensor.cpp b/src/inference/src/dev/make_tensor.cpp index 834aac9ca08beb..72c19981a527c0 100644 --- a/src/inference/src/dev/make_tensor.cpp +++ b/src/inference/src/dev/make_tensor.cpp @@ -84,6 +84,19 @@ class ViewTensor : public ITensor { return m_ptr; } + void* data_rw() override { + return m_ptr; + } + + void* data_rw(const element::Type& element_type) override { + OPENVINO_ASSERT(is_pointer_representable(element_type), + "Tensor data with element type ", + get_element_type(), + ", is not representable as pointer to ", + element_type); + return m_ptr; + } + const element::Type& get_element_type() const override { return m_element_type; } @@ -157,11 +170,11 @@ class ReadOnlyViewTensor : public ViewTensor { using ViewTensor::data; - [[noreturn]] void* data() override { + [[noreturn]] void* data_rw() override { OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast.data()'"); } - [[noreturn]] void* data(const element::Type& element_type) override { + [[noreturn]] void* data_rw(const element::Type& element_type) override { OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast.data(element_type)'"); } }; @@ -231,7 +244,11 @@ class ReadOnlyStridedViewTensor : public StridedViewTensor { using StridedViewTensor::data; - [[noreturn]] void* data(const element::Type& element_type) override { + [[noreturn]] void* data_rw() override { + OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast.data()'"); + } + + [[noreturn]] void* data_rw(const element::Type& element_type) override { OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast.data()'"); } }; @@ -463,6 +480,14 @@ class RoiTensor : public BaseRoiTensor, public ITensor { const void* data(const element::Type& element_type) const override { return static_cast(m_owner->data()) + m_offset; } + + void* data_rw() override { + return static_cast(m_owner->data_rw()) + m_offset; + } + + void* data_rw(const element::Type& element_type) override { + return static_cast(m_owner->data_rw(element_type)) + m_offset; + } }; /** diff --git a/src/plugins/intel_cpu/src/cpu_tensor.cpp b/src/plugins/intel_cpu/src/cpu_tensor.cpp index 4d11c21631001f..341371e9bd6dd9 100644 --- a/src/plugins/intel_cpu/src/cpu_tensor.cpp +++ b/src/plugins/intel_cpu/src/cpu_tensor.cpp @@ -125,6 +125,19 @@ const void* Tensor::data(const element::Type& element_type) const { return m_memptr->getData(); } +void* Tensor::data_rw() { + return m_memptr->getData(); +} + +void* Tensor::data_rw(const element::Type& element_type) { + OPENVINO_ASSERT(is_pointer_representable(get_element_type(), element_type), + "Tensor data with element type ", + get_element_type(), + ", is not representable as pointer to ", + element_type); + return m_memptr->getData(); +} + /** * @brief Creates tensor on graph memory * diff --git a/src/plugins/intel_cpu/src/cpu_tensor.h b/src/plugins/intel_cpu/src/cpu_tensor.h index a36c4363546d53..1cac1fbc383cc8 100644 --- a/src/plugins/intel_cpu/src/cpu_tensor.h +++ b/src/plugins/intel_cpu/src/cpu_tensor.h @@ -37,6 +37,8 @@ class Tensor : public ITensor { void* data(const element::Type& type) override; const void* data() const override; const void* data(const element::Type& type) const override; + void* data_rw() override; + void* data_rw(const element::Type& type) override; MemoryPtr get_memory() { return m_memptr; diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp index 2e5782fb008bb5..943b4014fcc438 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp @@ -126,9 +126,10 @@ static std::string getTestCaseName(const testing::TestParamInfodata()) - : cldnn::allocation_type::unknown; + auto user_tensor_mem_type = !generic_remote_tensor ? engine.detect_usm_allocation_type(user_tensor->data()) : cldnn::allocation_type::unknown; auto usm_host_raw_ptr = engine.get_device_info().dev_type == cldnn::device_type::integrated_gpu && user_tensor_mem_type == cldnn::allocation_type::usm_host; @@ -709,7 +708,7 @@ std::vector SyncInferRequest::prepare_batched_input(size_t in auto ptr = static_cast(merged_tensor->data()); ov::parallel_for(user_tensors.size(), [&](size_t i) { const auto& tensor = user_tensors.at(i); - std::memcpy(ptr + i * tensor->get_byte_size(), static_cast(tensor->data()), tensor->get_byte_size()); + std::memcpy(ptr + i * tensor->get_byte_size(), tensor->data(), tensor->get_byte_size()); }); } else { const auto& stream = m_graph->get_network()->get_stream(); @@ -788,11 +787,12 @@ std::vector SyncInferRequest::prepare_input(const std::string } } else if (is_usm_host_tensor && !convert_needed) { if (element_type != ::data_type_for_remote_tensor(element_type)) { - m_plugin_inputs[input_idx] = { std::make_shared(m_context, - user_tensor->get_shape(), - ::data_type_for_remote_tensor(element_type), - TensorType::BT_USM_SHARED, - user_tensor->data()), TensorOwner::USER }; + m_plugin_inputs[input_idx] = {std::make_shared(m_context, + user_tensor->get_shape(), + ::data_type_for_remote_tensor(element_type), + TensorType::BT_USM_SHARED, + user_tensor->data()), + TensorOwner::USER}; } else { m_plugin_inputs[input_idx] = { usm_host_ptr->get_impl(), user_tensor_wrapper.owner }; } @@ -801,7 +801,7 @@ std::vector SyncInferRequest::prepare_input(const std::string auto user_tensor_mem_type = cldnn::allocation_type::unknown; if (!is_remote_tensor_impl && !is_generic_remote) { - user_tensor_mem_type = engine.detect_usm_allocation_type(user_tensor_wrapper.ptr->data()); + user_tensor_mem_type = engine.detect_usm_allocation_type(user_tensor->data()); } auto plugin_tensor_mem_type = cldnn::allocation_type::unknown; @@ -876,7 +876,7 @@ std::vector SyncInferRequest::prepare_input(const std::string } } else { if (!is_remote_tensor_impl && !is_generic_remote) { - auto src_ptr = static_cast(user_tensor->data()); + auto src_ptr = static_cast(user_tensor->data()); if (!same_host_mem(memory, src_ptr)) { // WA: Set need_lockable_mem as a blocking argument // The current input_layout (wait_for_events) does not provide proper synchronization for subsequent CPU implementations diff --git a/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp b/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp index a439e10dd040e9..f991d0aee4c0e1 100644 --- a/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp +++ b/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp @@ -23,6 +23,14 @@ void* USMHostTensor::data(const element::Type&) { return m_impl->get_original_memory_buf_ptr(); } +void* USMHostTensor::data_rw() { + return m_impl->get_original_memory_buf_ptr(); +} + +void* USMHostTensor::data_rw(const element::Type&) { + return m_impl->get_original_memory_buf_ptr(); +} + const void* USMHostTensor::data() const { return m_impl->get_original_memory_buf_ptr(); } diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp index d5aa024dd31ebf..9b260aa5f723f2 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp @@ -557,7 +557,8 @@ class KVCacheIssueTests: public ::testing::Test { {n_batch, context_size, n_heads, n_features}, -0.5f, 0.5f, 1); auto ireq1_input1 = ov::test::utils::create_and_fill_tensor_real_distribution(element_type, {n_batch, n_heads, context_size, context_size}, -0.5f, 0.5f, 1); - ireq1.set_tensor(input0, ireq1_input0); + // Create read-only tensor view to test inference with this const input data + ireq1.set_tensor(input0, {ireq1_input0.get_element_type(), ireq1_input0.get_shape(), std::as_const(ireq1_input0).data()}); ireq1.set_tensor(input1, ireq1_input1); auto ireq2_input0 = ov::test::utils::create_and_fill_tensor_real_distribution(element_type, diff --git a/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp b/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp index f09a7c32cb4643..42f00062af3e67 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp @@ -52,6 +52,9 @@ class ZeroTensor final : public ov::ITensor { void* data() override; void* data(const ov::element::Type& type) override; + void* data_rw() override; + void* data_rw(const ov::element::Type& type) override; + const void* data() const override; const void* data(const ov::element::Type& type) const override; diff --git a/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp b/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp index f7782817bfdee0..9f81050c9971a1 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp @@ -100,6 +100,14 @@ void* ZeroTensor::data(const ov::element::Type& type) { return data(); } +void* ZeroTensor::data_rw() { + return data(); +} + +void* ZeroTensor::data_rw(const ov::element::Type& type) { + return data(type); +} + const void* ZeroTensor::data() const { return _ptr; } diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_host_tensor.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_host_tensor.hpp index 01199426b6f77b..7baa67164a6bfa 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_host_tensor.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_host_tensor.hpp @@ -22,6 +22,8 @@ class ZeroHostTensor : public ov::ITensor { void* data() override; void* data(const ov::element::Type& element_type) override; + void* data_rw() override; + void* data_rw(const ov::element::Type& element_type) override; const void* data() const override; const void* data(const ov::element::Type& element_type) const override; const ov::element::Type& get_element_type() const override; diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_host_tensor.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_host_tensor.cpp index c82331ede6415b..db77d89e832eee 100644 --- a/src/plugins/intel_npu/src/utils/src/zero/zero_host_tensor.cpp +++ b/src/plugins/intel_npu/src/utils/src/zero/zero_host_tensor.cpp @@ -29,6 +29,14 @@ void* ZeroHostTensor::data(const ov::element::Type&) { return _impl->get_original_memory(); } +void* ZeroHostTensor::data_rw() { + return data(); +} + +void* ZeroHostTensor::data_rw(const ov::element::Type&) { + return data(); +} + const void* ZeroHostTensor::data() const { return _impl->get_original_memory(); } diff --git a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_run.hpp index eb6bc48db85c13..2cbfa2aefbb2f8 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_run.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/infer_request_run.hpp @@ -336,6 +336,19 @@ TEST_P(InferRequestRunTests, RecreateL0TensorIfNeeded) { } } +TEST_P(InferRequestRunTests, RunWithConstData) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + + ov::CompiledModel compiled_model; + ov::InferRequest inference_request; + OV_ASSERT_NO_THROW(compiled_model = core->compile_model(ov_model, target_device, configuration)); + OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request()); + const auto tensor = inference_request.get_input_tensor(); + const std::vector data(tensor.get_byte_size() / sizeof(float)); + OV_ASSERT_NO_THROW(inference_request.set_input_tensor({ov::element::f32, tensor.get_shape(), data.data()})); + OV_ASSERT_NO_THROW(inference_request.infer()); +} + using RandomTensorOverZeroTensorRunTests = InferRequestRunTests; TEST_P(RandomTensorOverZeroTensorRunTests, SetRandomTensorOverZeroTensor0) { diff --git a/src/tests/functional/base_func_tests/src/behavior/ov_infer_request/batched_tensors.cpp b/src/tests/functional/base_func_tests/src/behavior/ov_infer_request/batched_tensors.cpp index 4047d180999881..3abd13fa316d1c 100644 --- a/src/tests/functional/base_func_tests/src/behavior/ov_infer_request/batched_tensors.cpp +++ b/src/tests/functional/base_func_tests/src/behavior/ov_infer_request/batched_tensors.cpp @@ -104,6 +104,33 @@ TEST_P(OVInferRequestBatchedTests, SetInputTensorsBase) { } } +TEST_P(OVInferRequestBatchedTests, SetReadOnlyInputTensorsBase) { + size_t batch = 4; + auto one_shape = Shape{1, 2, 2, 2}; + auto batch_shape = Shape{batch, 2, 2, 2}; + auto one_shape_size = ov::shape_size(one_shape); + auto model = OVInferRequestBatchedTests::create_n_inputs(2, element::f32, batch_shape, "N..."); + // Allocate 8 chunks, set 'user tensors' to 0, 2, 4, 6 chunks + const std::vector buffer(one_shape_size * batch * 2, 5.f); + auto execNet = ie->compile_model(model, target_device); + // Create InferRequest + ov::InferRequest req; + req = execNet.create_infer_request(); + std::vector tensors; + for (auto i = 0; i < batch; ++i) { + // non contiguous memory (i*2) + auto tensor = ov::Tensor(element::f32, one_shape, &std::as_const(buffer)[(i * 2) * one_shape_size]); + tensors.push_back(std::move(tensor)); + } + req.set_tensors("tensor_input0", tensors); + const auto actual_tensor = req.get_tensor("tensor_output0"); + auto* actual = actual_tensor.data(); + req.infer(); // Adds '1' to each element + for (auto j = 0; j < one_shape_size * batch; ++j) { + EXPECT_NEAR(actual[j], 6.f, 1e-5) << "Expected=6, actual=" << actual[j] << " for index " << j; + } +} + TEST_P(OVInferRequestBatchedTests, SetInputTensorsAsync) { size_t batch = 4; auto one_shape = Shape{1, 2, 2, 2};