Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions src/core/dev_api/openvino/runtime/itensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,15 @@ class OPENVINO_API ITensor : public std::enable_shared_from_this<ITensor> {

/**
* @brief Provides an access to the underlying host memory
* @param type Optional type parameter.
* @note The method throws an exception:
* - if tensor implementation does not allow non-const access to memory.
* @return A host pointer to tensor memory
*/
virtual void* data_rw() = 0;

/**
* @brief Provides an access to the underlying host memory
* @param type Type parameter.
* @note The method throws an exception
* if specified type's fundamental type does not match with tensor element type's fundamental type
* @return A host pointer to tensor memory
Expand All @@ -70,10 +78,20 @@ class OPENVINO_API ITensor : public std::enable_shared_from_this<ITensor> {
virtual const void* data(const element::Type& type) const = 0;
/// @}

/**
* @brief Provides an access to the underlying host memory
* @param type Type parameter.
* @note The method throws an exception:
* - if specified type's fundamental type does not match with tensor element type's fundamental type
* - if tensor implementation does not allow non-const access to memory.
* @return A host pointer to tensor memory
*/
virtual void* data_rw(const element::Type& type) = 0;

/**
* @brief Provides an access to the underlying host memory casted to type `T`
* @return A host pointer to tensor memory casted to specified type `T`.
* @note Throws exception if specified type does not match with tensor element type
* @return A host pointer to tensor memory casted to specified type `T`.
*/
template <typename T, typename datatype = typename std::decay<T>::type>
T* data() {
Expand Down
7 changes: 5 additions & 2 deletions src/core/include/openvino/runtime/tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,8 @@ class OPENVINO_API Tensor {

/**
* @brief Provides an access to the underlying host memory
* @note The method throws an exception:
* - if tensor implementation does not allow non-const access to memory.
* @return A host pointer to tensor memory
* @{
*/
Expand All @@ -229,8 +231,9 @@ class OPENVINO_API Tensor {
/**
* @brief Provides an access to the underlying host memory
* @param type Optional type parameter.
* @note The method throws an exception
* if specified type's fundamental type does not match with tensor element type's fundamental type
* @note The method throws an exception:
* - if specified type's fundamental type does not match with tensor element type's fundamental type
* - if tensor implementation does not allow non-const access to memory.
* @return A host pointer to tensor memory
* @{
*/
Expand Down
26 changes: 13 additions & 13 deletions src/core/reference/src/op/einsum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -580,9 +580,9 @@ void extract_diagonal(ov::TensorVector& inputs, std::vector<std::string>& input_
return;
}

ov::Tensor multi_identity = build_multi_identity<T>(input_ptr, repeated_labels, label_dim_map);
const ov::Tensor multi_identity = build_multi_identity<T>(input_ptr, repeated_labels, label_dim_map);

ov::Tensor mul_output = input_ptr;
auto mul_output = ov::Tensor(input_ptr.get_element_type(), input_ptr.get_shape());
reference::multiply<T>(input_ptr.data<T>(),
multi_identity.data<T>(),
mul_output.data<T>(),
Expand Down Expand Up @@ -926,17 +926,17 @@ void contract_two_inputs(ov::TensorVector& inputs,
reduced_sub_shape.get_shape(),
is_separate_first2);

ov::Tensor matmul_operand1 = reshape_input_for_matmul<T>(input1,
common_sub_shape,
separate1_sub_shape,
reduced_sub_shape_prod,
is_separate_first1);

ov::Tensor matmul_operand2 = reshape_input_for_matmul<T>(input2,
common_sub_shape,
separate2_sub_shape,
reduced_sub_shape_prod,
is_separate_first2);
const ov::Tensor matmul_operand1 = reshape_input_for_matmul<T>(input1,
common_sub_shape,
separate1_sub_shape,
reduced_sub_shape_prod,
is_separate_first1);

const ov::Tensor matmul_operand2 = reshape_input_for_matmul<T>(input2,
common_sub_shape,
separate2_sub_shape,
reduced_sub_shape_prod,
is_separate_first2);

// step 3. apply MatMul operation for formatted inputs
Shape matmul_output_shape = compute_matmul_output_shape(common_sub_shape, separate1_sub_shape, separate2_sub_shape);
Expand Down
19 changes: 10 additions & 9 deletions src/core/src/op/read_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,21 +150,22 @@ bool ReadValue::evaluate(TensorVector& outputs,

const auto use_context = var_value != variable_values.end() && !var_value->second->get_reset();
auto& output = outputs[0];
Tensor input;
if (use_context) {
input = var_value->second->get_state();
} else {
if (!inputs.empty()) {
input = inputs[0];
const auto& input = [&] {
if (use_context) {
return var_value->second->get_state();
} else if (!inputs.empty()) {
return inputs[0];
} else {
auto var_info = m_variable->get_info();
const auto var_info = m_variable->get_info();
OPENVINO_ASSERT(var_info.data_shape.is_static() && var_info.data_type.is_static());
const auto& shape = var_info.data_shape.get_shape();
const auto& type = var_info.data_type;
input = ov::Tensor(type, shape);
auto input = ov::Tensor(type, shape);
memset(input.data(), 0, input.get_byte_size());
return input;
}
}
}();

output.set_shape(input.get_shape());
std::memcpy(output.data(), input.data(), output.get_byte_size());
return true;
Expand Down
4 changes: 2 additions & 2 deletions src/core/src/runtime/tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,15 +106,15 @@ size_t Tensor::get_byte_size() const {
}

void* Tensor::data() {
OV_TENSOR_STATEMENT(return _impl->data());
OV_TENSOR_STATEMENT(return _impl->data_rw());
}

const void* Tensor::data() const {
OV_TENSOR_STATEMENT(return std::as_const(*_impl).data());
}

void* Tensor::data(const element::Type& element_type) {
OV_TENSOR_STATEMENT(return _impl->data(element_type));
OV_TENSOR_STATEMENT(return _impl->data_rw(element_type));
}

const void* Tensor::data(const element::Type& element_type) const {
Expand Down
8 changes: 8 additions & 0 deletions src/inference/dev_api/openvino/runtime/iremote_tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@ class OPENVINO_RUNTIME_API IRemoteTensor : public ITensor {
OPENVINO_NOT_IMPLEMENTED;
}

void* data_rw() override final {
OPENVINO_NOT_IMPLEMENTED;
}

void* data_rw(const element::Type&) override final {
OPENVINO_NOT_IMPLEMENTED;
}

~IRemoteTensor() override;

/**
Expand Down
8 changes: 8 additions & 0 deletions src/inference/dev_api/openvino/runtime/so_ptr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@ struct SoPtr {
return _ptr.get();
}

/**
* @brief Dereference stored pointer to T object.
* @return Reference to T object.
*/
T& operator*() const noexcept {
return *_ptr;
}

explicit operator bool() const noexcept {
return _ptr != nullptr;
}
Expand Down
2 changes: 1 addition & 1 deletion src/inference/src/dev/isync_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ void ov::ISyncInferRequest::convert_batched_tensors() {
// Perform memory copy
ov::parallel_for(item.second.size(), [&](size_t i) {
const auto& tensor = item.second.at(i);
memcpy(ptr + i * tensor->get_byte_size(), static_cast<uint8_t*>(tensor->data()), tensor->get_byte_size());
memcpy(ptr + i * tensor->get_byte_size(), tensor->data(), tensor->get_byte_size());
});
prepared_tensors[item.first] = input_tensor;
}
Expand Down
31 changes: 28 additions & 3 deletions src/inference/src/dev/make_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,19 @@ class ViewTensor : public ITensor {
return m_ptr;
}

void* data_rw() override {
return m_ptr;
}

void* data_rw(const element::Type& element_type) override {
OPENVINO_ASSERT(is_pointer_representable(element_type),
"Tensor data with element type ",
get_element_type(),
", is not representable as pointer to ",
element_type);
return m_ptr;
}

const element::Type& get_element_type() const override {
return m_element_type;
}
Expand Down Expand Up @@ -157,11 +170,11 @@ class ReadOnlyViewTensor : public ViewTensor {

using ViewTensor::data;

[[noreturn]] void* data() override {
[[noreturn]] void* data_rw() override {
OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast<const ov::Tensor&>.data()'");
}

[[noreturn]] void* data(const element::Type& element_type) override {
[[noreturn]] void* data_rw(const element::Type& element_type) override {
OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast<const ov::Tensor&>.data(element_type)'");
}
};
Expand Down Expand Up @@ -231,7 +244,11 @@ class ReadOnlyStridedViewTensor : public StridedViewTensor {

using StridedViewTensor::data;

[[noreturn]] void* data(const element::Type& element_type) override {
[[noreturn]] void* data_rw() override {
OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast<const ov::Tensor&>.data()'");
}

[[noreturn]] void* data_rw(const element::Type& element_type) override {
OPENVINO_THROW("Can not access non-const pointer use e.g. 'static_cast<const ov::Tensor&>.data()'");
}
};
Expand Down Expand Up @@ -463,6 +480,14 @@ class RoiTensor : public BaseRoiTensor, public ITensor {
const void* data(const element::Type& element_type) const override {
return static_cast<uint8_t*>(m_owner->data()) + m_offset;
}

void* data_rw() override {
return static_cast<uint8_t*>(m_owner->data_rw()) + m_offset;
}

void* data_rw(const element::Type& element_type) override {
return static_cast<uint8_t*>(m_owner->data_rw(element_type)) + m_offset;
}
};

/**
Expand Down
13 changes: 13 additions & 0 deletions src/plugins/intel_cpu/src/cpu_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,19 @@ const void* Tensor::data(const element::Type& element_type) const {
return m_memptr->getData();
}

void* Tensor::data_rw() {
return m_memptr->getData();
}

void* Tensor::data_rw(const element::Type& element_type) {
OPENVINO_ASSERT(is_pointer_representable(get_element_type(), element_type),
"Tensor data with element type ",
get_element_type(),
", is not representable as pointer to ",
element_type);
return m_memptr->getData();
}

/**
* @brief Creates tensor on graph memory
*
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_cpu/src/cpu_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class Tensor : public ITensor {
void* data(const element::Type& type) override;
const void* data() const override;
const void* data(const element::Type& type) const override;
void* data_rw() override;
void* data_rw(const element::Type& type) override;

MemoryPtr get_memory() {
return m_memptr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,10 @@ static std::string getTestCaseName(const testing::TestParamInfo<LoraPatternParam
ASSERT_TRUE(inferRequestRef);

generate_inputs(targetStaticShapes.front());
for (const auto& input : inputs) {
inferRequest.set_tensor(input.first, input.second);
inferRequestRef.set_tensor(input.first, input.second);
for (const auto& [port, tensor] : inputs) {
// Use read-only tensors as inputs, created from `const void*`
inferRequest.set_tensor(port, {tensor.get_element_type(), tensor.get_shape(), tensor.data()});
inferRequestRef.set_tensor(port, {tensor.get_element_type(), tensor.get_shape(), tensor.data()});
}

constexpr size_t lora_order = 25lu;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ class USMHostTensor : public ov::ITensor {

void* data() override;
void* data(const element::Type& element_type) override;
void* data_rw() override;
void* data_rw(const element::Type& element_type) override;
const void* data() const override;
const void* data(const element::Type& element_type) const override;

Expand Down
20 changes: 10 additions & 10 deletions src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,8 +533,7 @@ TensorWrapper SyncInferRequest::create_or_share_device_tensor(const TensorWrappe

// Note: currently, using USM Host memory for dGPUs in some scenarios (LLMs) leads to performance degradation,
// so apply wider USM Host memory type detection only for iGPUs
auto user_tensor_mem_type = !generic_remote_tensor ? engine.detect_usm_allocation_type(user_tensor->data())
: cldnn::allocation_type::unknown;
auto user_tensor_mem_type = !generic_remote_tensor ? engine.detect_usm_allocation_type(user_tensor->data()) : cldnn::allocation_type::unknown;
auto usm_host_raw_ptr = engine.get_device_info().dev_type == cldnn::device_type::integrated_gpu &&
user_tensor_mem_type == cldnn::allocation_type::usm_host;

Expand Down Expand Up @@ -709,7 +708,7 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_batched_input(size_t in
auto ptr = static_cast<uint8_t*>(merged_tensor->data());
ov::parallel_for(user_tensors.size(), [&](size_t i) {
const auto& tensor = user_tensors.at(i);
std::memcpy(ptr + i * tensor->get_byte_size(), static_cast<uint8_t*>(tensor->data()), tensor->get_byte_size());
std::memcpy(ptr + i * tensor->get_byte_size(), tensor->data(), tensor->get_byte_size());
});
} else {
const auto& stream = m_graph->get_network()->get_stream();
Expand Down Expand Up @@ -788,11 +787,12 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_input(const std::string
}
} else if (is_usm_host_tensor && !convert_needed) {
if (element_type != ::data_type_for_remote_tensor(element_type)) {
m_plugin_inputs[input_idx] = { std::make_shared<RemoteTensorImpl>(m_context,
user_tensor->get_shape(),
::data_type_for_remote_tensor(element_type),
TensorType::BT_USM_SHARED,
user_tensor->data()), TensorOwner::USER };
m_plugin_inputs[input_idx] = {std::make_shared<RemoteTensorImpl>(m_context,
user_tensor->get_shape(),
::data_type_for_remote_tensor(element_type),
TensorType::BT_USM_SHARED,
user_tensor->data()),
TensorOwner::USER};
} else {
m_plugin_inputs[input_idx] = { usm_host_ptr->get_impl(), user_tensor_wrapper.owner };
}
Expand All @@ -801,7 +801,7 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_input(const std::string

auto user_tensor_mem_type = cldnn::allocation_type::unknown;
if (!is_remote_tensor_impl && !is_generic_remote) {
user_tensor_mem_type = engine.detect_usm_allocation_type(user_tensor_wrapper.ptr->data());
user_tensor_mem_type = engine.detect_usm_allocation_type(user_tensor->data());
}

auto plugin_tensor_mem_type = cldnn::allocation_type::unknown;
Expand Down Expand Up @@ -876,7 +876,7 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_input(const std::string
}
} else {
if (!is_remote_tensor_impl && !is_generic_remote) {
auto src_ptr = static_cast<uint8_t*>(user_tensor->data());
auto src_ptr = static_cast<const uint8_t*>(user_tensor->data());
if (!same_host_mem(memory, src_ptr)) {
// WA: Set need_lockable_mem as a blocking argument
// The current input_layout (wait_for_events) does not provide proper synchronization for subsequent CPU implementations
Expand Down
8 changes: 8 additions & 0 deletions src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ void* USMHostTensor::data(const element::Type&) {
return m_impl->get_original_memory_buf_ptr();
}

void* USMHostTensor::data_rw() {
return m_impl->get_original_memory_buf_ptr();
}

void* USMHostTensor::data_rw(const element::Type&) {
return m_impl->get_original_memory_buf_ptr();
}

const void* USMHostTensor::data() const {
return m_impl->get_original_memory_buf_ptr();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,8 @@ class KVCacheIssueTests: public ::testing::Test {
{n_batch, context_size, n_heads, n_features}, -0.5f, 0.5f, 1);
auto ireq1_input1 = ov::test::utils::create_and_fill_tensor_real_distribution(element_type,
{n_batch, n_heads, context_size, context_size}, -0.5f, 0.5f, 1);
ireq1.set_tensor(input0, ireq1_input0);
// Create read-only tensor view to test inference with this const input data
ireq1.set_tensor(input0, {ireq1_input0.get_element_type(), ireq1_input0.get_shape(), std::as_const(ireq1_input0).data()});
ireq1.set_tensor(input1, ireq1_input1);

auto ireq2_input0 = ov::test::utils::create_and_fill_tensor_real_distribution(element_type,
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_npu/src/backend/include/zero_tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ class ZeroTensor final : public ov::ITensor {
void* data() override;
void* data(const ov::element::Type& type) override;

void* data_rw() override;
void* data_rw(const ov::element::Type& type) override;

const void* data() const override;
const void* data(const ov::element::Type& type) const override;

Expand Down
Loading
Loading