[`intel_npu`] Implementation of chained `ze_mutable_graph_argument_exp_desc_t` objects for `updateMutableCommandList` #33981

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft

MirceaDan99 wants to merge 6 commits into openvinotoolkit:master from MirceaDan99:intel_npu/set_tensor/chained_update_mutable_command_list

src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp

-Original file line number
+Diff line change
@@ Expand Up / @@ -27,6 +27,7 @@ class ZeroInferRequest final : public SyncInferRequest { @@
         void set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) override;
         void set_tensors(const ov::Output<const ov::Node>& port,
                          const std::vector<ov::SoPtr<ov::ITensor>>& tensors) override;
+        void set_tensors(const std::map<ov::Output<const ov::Node>, ov::SoPtr<ov::ITensor>>& ports_tensors);
         void infer() override;
         void infer_async() override;
@@ Expand Down @@

src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp

-Original file line number
+Diff line change
@@ Expand Up / @@ -30,6 +30,7 @@ struct Pipeline final { @@
         void reset() const;
         void update_graph_arguments(uint32_t index, const std::shared_ptr<ZeroTensor>& tensor);
+        void update_graph_arguments(const std::vector<std::pair<uint32_t, std::shared_ptr<ZeroTensor>>>& tensors);
         void update_graph_arguments(uint32_t index, const std::shared_ptr<ZeroTensor>& tensor, size_t batch_index);
         std::vector<ov::ProfilingInfo> get_profiling_info() const;
@@ Expand Down @@

src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp

-Original file line number
+Diff line change
@@ Expand Up @@
         // If command list updates are not supported, fallback to copying tensors every time.
     }
+    void ZeroInferRequest::set_tensors(const std::map<ov::Output<const ov::Node>, ov::SoPtr<ov::ITensor>>& ports_tensors) {
+        OV_ITT_TASK_CHAIN(ZERO_SET_TENSOR, itt::domains::LevelZeroBackend, "set_tensors", "set_tensors");
+        bool updateCommandListArg = false;
+        std::vector<std::pair<uint32_t, std::shared_ptr<ZeroTensor>>> zeroTensors(ports_tensors.size(), {0, nullptr});
+        size_t ioIndex = 0;
+        for (const auto& [port, tensor] : ports_tensors) {
+            auto foundPort = find_port(port);
+            OPENVINO_ASSERT(foundPort.found(), "Cannot find tensor for port ", port);
+            try {
+                check_tensor(port,
+                             tensor,
+                             foundPort.is_input() ? _metadata.inputs.at(foundPort.idx).supportsStridedLayout
+                                                  : _metadata.outputs.at(foundPort.idx).supportsStridedLayout);
+            } catch (const ov::Exception& ex) {
+                OPENVINO_THROW("Failed to set tensor. ", ex.what());
+            }
+            if (foundPort.is_input()) {
+                if (get_user_input(foundPort.idx)._ptr == tensor._ptr) {
+                    // set_tensor called with the same tensor object; no action needed
+                    _logger.debug("ZeroInferRequest::set_tensor - got the same tensor, do nothing");
+                    return;
+                }
+                const auto& ioShape = _compiledModel->inputs()[foundPort.idx].get_partial_shape();
+                auto batchSizeCandidate =
+                    determine_dynamic_batch_size(_metadata.inputs.at(foundPort.idx), ioShape, tensor._ptr, std::nullopt);
+                if (batchSizeCandidate.has_value()) {
+                    if (!_dynamicBatchValueChanged) {
+                        if (get_user_input(foundPort.idx)._ptr != nullptr &&
+                            get_user_input(foundPort.idx)->get_byte_size() * get_user_inputs(foundPort.idx).size() !=
+                                tensor->get_byte_size()) {
+                            _dynamicBatchValueChanged = true;
+                            _graph->set_batch_size(batchSizeCandidate.value());
+                        } else if (_graph->get_batch_size().has_value()) {
+                            if (batchSizeCandidate.value() != _graph->get_batch_size().value()) {
+                                _dynamicBatchValueChanged = true;
+                                _graph->set_batch_size(batchSizeCandidate.value());
+                            }
+                        } else {
+                            _graph->set_batch_size(batchSizeCandidate.value());
+                        }
+                    } else if (batchSizeCandidate.value() != _graph->get_batch_size().value()) {
+                        OPENVINO_THROW("Batching size is not matching all the tensors.");
+                    }
+                }
+                if (is_batched_input(foundPort.idx)) {
+                    // Reset vector size to 1 if set_tensor is called after set_tensors
+                    get_level_zero_inputs(foundPort.idx).resize(1);
+                    get_level_zero_inputs(foundPort.idx).shrink_to_fit();
+                    get_level_zero_input(foundPort.idx) = {};
+                    get_user_inputs(foundPort.idx).resize(1);
+                    get_user_inputs(foundPort.idx).shrink_to_fit();
+                    get_user_input(foundPort.idx) = {};
+                }
+                get_user_input(foundPort.idx) = tensor;
+            } else {
+                if (_userOutputTensors.at(foundPort.idx)._ptr == tensor._ptr) {
+                    // set_tensor called with the same tensor object; no action needed
+                    _logger.debug("ZeroInferRequest::set_tensor - got the same tensor, do nothing");
+                    return;
+                }
+                const auto& ioShape = _compiledModel->outputs()[foundPort.idx].get_partial_shape();
+                auto batchSizeCandidate =
+                    determine_dynamic_batch_size(_metadata.outputs.at(foundPort.idx), ioShape, tensor._ptr, std::nullopt);
+                if (batchSizeCandidate.has_value()) {
+                    if (!_dynamicBatchValueChanged) {
+                        if (_userOutputTensors.at(foundPort.idx)._ptr != nullptr &&
+                            _userOutputTensors.at(foundPort.idx)->get_byte_size() != tensor->get_byte_size()) {
+                            _dynamicBatchValueChanged = true;
+                            _graph->set_batch_size(batchSizeCandidate.value());
+                        } else if (_graph->get_batch_size().has_value()) {
+                            if (batchSizeCandidate.value() != _graph->get_batch_size().value()) {
+                                _dynamicBatchValueChanged = true;
+                                _graph->set_batch_size(batchSizeCandidate.value());
+                            }
+                        } else {
+                            _graph->set_batch_size(batchSizeCandidate.value());
+                        }
+                    } else if (batchSizeCandidate.value() != _graph->get_batch_size().value()) {
+                        OPENVINO_THROW("Batching size is not matching all the tensors.");
+                    }
+                }
+                _userOutputTensors.at(foundPort.idx) = tensor;
+            }
+            if (_initStructs->getMutableCommandListExtVersion() >= ZE_MAKE_VERSION(1, 0)) {
+                auto& levelZeroTensor =
+                    foundPort.is_input() ? get_level_zero_input(foundPort.idx) : _levelZeroOutputTensors.at(foundPort.idx);
+                try {
+                    _logger.debug("ZeroInferRequest::set_tensor - create zero tensor");
+                    OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "create zero tensor");
+                    // Try to use the user tensor directly if its underlying data is already allocated in the same Level
+                    // Zero context.
+                    levelZeroTensor = std::make_shared<ZeroTensor>(_initStructs, _config, tensor);
+                    updateCommandListArg = true;
+                } catch (const ZeroMemException& exception) {
+                    _logger.debug(
+                        "ZeroInferRequest::set_tensor - exception caught while trying to create a Level Zero tensor "
+                        "from the user tensor: %s",
+                        exception.what());
+                    // Check if the current Level Zero tensor was previously shared with the user. If so, it cannot be
+                    // reused; allocate a new tensor to back up the user tensor (which cannot be imported or used directly).
+                    if (_dynamicBatchValueChanged || levelZeroTensor == nullptr || !levelZeroTensor->can_be_reused()) {
+                        _logger.debug("ZeroInferRequest::set_tensor - allocate locally L0 tensor");
+                        OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "allocate tensor");
+                        auto batch = _graph->get_batch_size();
+                        levelZeroTensor = allocate_tensor(foundPort.idx, foundPort.is_input(), batch);
+                        updateCommandListArg = true;
+                    } else {
+                        _logger.debug("ZeroInferRequest::set_tensor - reusing the level zero tensor since it is not shared "
+                                      "with the user");
+                    }
+                }
+                if (_pipelineIsCreated && updateCommandListArg && !_dynamicBatchValueChanged) {
+                    OPENVINO_ASSERT(levelZeroTensor->data(), "Empty buffer");
+                }
+                zeroTensors.at(ioIndex++) =
+                    std::make_pair(foundPort.is_input() ? _metadata.inputs.at(foundPort.idx).indexUsedByDriver
+                                                        : _metadata.outputs.at(foundPort.idx).indexUsedByDriver,
+                                   levelZeroTensor);
+            }
+            // If command list updates are not supported, fallback to copying tensors every time.
+        }
+        if (_pipelineIsCreated && updateCommandListArg && !_dynamicBatchValueChanged) {
+            _logger.debug("ZeroInferRequest::infer_async - update command list");
+            OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "update_graph_arguments");
+            _pipeline->update_graph_arguments(zeroTensors);
+        }
+    }
     ov::SoPtr<ov::ITensor> ZeroInferRequest::get_tensor(const ov::Output<const ov::Node>& port) const {
         OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "get_tensor");
@@ Expand Down @@

src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp

-Original file line number
+Diff line change
@@ Expand Up @@
         }
     };
+    void Pipeline::update_graph_arguments(const std::vector<std::pair<uint32_t, std::shared_ptr<ZeroTensor>>>& tensors) {
+        OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_UMCL, itt::domains::LevelZeroBackend, "Pipeline", "updateCommandList");
+        _logger.debug("Pipeline - updateCommandList");
+        const size_t number_of_command_lists = _command_lists.size();
+        for (size_t i = 0; i < number_of_command_lists; i++) {
+            std::vector<std::pair<ze_mutable_graph_argument_exp_desc_t, std::optional<ze_graph_argument_value_strides_t>>>
+                descs;
+            descs.reserve(tensors.size());
+            for (const auto& [index, tensor] : tensors) {
+                if (tensor->get_element_type().bitwidth() < 8 || tensor->is_continuous() || tensor->get_strides().empty()) {
+                    _command_lists.at(i)->updateMutableCommandList(
+                        index,
+                        static_cast<const unsigned char*>(tensor->data()) +
+                            (i * tensor->get_byte_size()) / number_of_command_lists,
+                        descs);
+                } else {
+                    _command_lists.at(i)->updateMutableCommandListWithStrides(
+                        index,
+                        static_cast<const unsigned char*>(tensor->data()) + (i * tensor->get_strides()[0]),
+                        get_strides(tensor->get_strides(), tensor->get_element_type().size()),
+                        descs);
+                }
+            }
+            ze_mutable_commands_exp_desc_t mutable_commands_exp_desc_t = {ZE_STRUCTURE_TYPE_MUTABLE_COMMANDS_EXP_DESC,
+                                                                          &descs.at(0).first,
+};
+            THROW_ON_FAIL_FOR_LEVELZERO(
+                "zeCommandListUpdateMutableCommandsExp",
+                zeCommandListUpdateMutableCommandsExp(_command_lists.at(i)->handle(), &mutable_commands_exp_desc_t));
+        }
+    };
     void Pipeline::update_graph_arguments(uint32_t index, const std::shared_ptr<ZeroTensor>& tensor, size_t batch_index) {
         OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_UMCL, itt::domains::LevelZeroBackend, "Pipeline", "updateCommandListIndex");
         _logger.debug("Pipeline - updateCommandListIndex");
@@ Expand Down @@

src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp

-Original file line number
+Diff line change
@@ Expand Up / @@ -73,9 +73,20 @@ class CommandList { @@
         void appendGraphExecute(const ze_graph_handle_t& graph_handle,
                                 const ze_graph_profiling_query_handle_t& profiling_query_handle) const;
         void updateMutableCommandList(uint32_t index, const void* data) const;
+        void updateMutableCommandList(
+            uint32_t index,
+            const void* data,
+            std::vector<std::pair<ze_mutable_graph_argument_exp_desc_t, std::optional<ze_graph_argument_value_strides_t>>>&
+                descs) const;
         void updateMutableCommandListWithStrides(uint32_t index,
                                                  const void* data,
                                                  const std::vector<size_t>& strides) const;
+        void updateMutableCommandListWithStrides(
+            uint32_t index,
+            const void* data,
+            const std::vector<size_t>& strides,
+            std::vector<std::pair<ze_mutable_graph_argument_exp_desc_t, std::optional<ze_graph_argument_value_strides_t>>>&
+                descs) const;
         void appendNpuTimestamp(uint64_t* timestamp_buff) const;
         void appendBarrier() const;
         void close() const;
@@ Expand Down @@

src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp

-Original file line number
+Diff line change
@@ Expand Up @@
         THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListUpdateMutableCommandsExp",
                                     zeCommandListUpdateMutableCommandsExp(_handle, &mutable_commands_exp_desc_t));
     }
+    void CommandList::updateMutableCommandList(
+        uint32_t index,
+        const void* data,
+        std::vector<std::pair<ze_mutable_graph_argument_exp_desc_t, std::optional<ze_graph_argument_value_strides_t>>>&
+            descs) const {
+        auto desc = ze_mutable_graph_argument_exp_desc_t{};
+        desc.stype = (_init_structs->getZeDrvApiVersion() >= ZE_MAKE_VERSION(1, 11))
+                         ? ZE_STRUCTURE_TYPE_MUTABLE_GRAPH_ARGUMENT_EXP_DESC
+                         : static_cast<ze_structure_type_t>(ZE_STRUCTURE_TYPE_MUTABLE_GRAPH_ARGUMENT_EXP_DESC_DEPRECATED);
+        desc.commandId = _command_id;
+        desc.argIndex = index;
+        desc.pArgValue = data;
+        auto it = descs.insert(descs.end(), {desc, std::nullopt});
+        if (it > descs.begin()) {
+            auto prev_elm = std::prev(it);
+            if (prev_elm->second != std::nullopt) {
+                prev_elm->second->pNext = &it->first;
+            } else {
+                prev_elm->first.pNext = &it->first;
+            }
+        }
+    }
     void CommandList::updateMutableCommandListWithStrides(uint32_t index,
                                                           const void* data,
                                                           const std::vector<size_t>& strides) const {
@@ Expand Down Expand Up @@
                                     zeCommandListUpdateMutableCommandsExp(_handle, &mutable_commands_exp_desc_t));
     }
+    void CommandList::updateMutableCommandListWithStrides(
+        uint32_t index,
+        const void* data,
+        const std::vector<size_t>& strides,
+        std::vector<std::pair<ze_mutable_graph_argument_exp_desc_t, std::optional<ze_graph_argument_value_strides_t>>>&
+            descs) const {
+        auto desc = ze_mutable_graph_argument_exp_desc_t{};
+        desc.commandId = _command_id;
+        desc.argIndex = index;
+        desc.pArgValue = data;
+        auto it = descs.insert(descs.end(), {desc, std::nullopt});
+        if (!strides.empty()) {
+            if (_init_structs->getGraphDdiTable().version() < ZE_MAKE_VERSION(1, 15)) {
+                OPENVINO_THROW("Strides are not supported by the current driver version.");
+            }
+            if (strides.size() > ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE) {
+                OPENVINO_THROW("The driver does not support strides with more than",
+                               ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE,
+                               "dimensions.");
+            }
+            it->second = ze_graph_argument_value_strides_t{};
+            it->second->stype = ZE_STRUCTURE_TYPE_GRAPH_ARGUMENT_STRIDES;
+            for (size_t i = 0; i < strides.size(); ++i) {
+                if (strides[i] > std::numeric_limits<uint32_t>::max()) {
+                    OPENVINO_THROW("Stride value exceeds uint32_t range supported by the driver");
+                }
+                it->second->userStrides[i] = static_cast<uint32_t>(strides[i]);
+            }
+            it->first.pNext = &it->second;
+        }
+        if (it > descs.begin()) {
+            auto prev_elm = std::prev(it);
+            if (prev_elm->second != std::nullopt) {
+                prev_elm->second->pNext = &it->first;
+            } else {
+                prev_elm->first.pNext = &it->first;
+            }
+        }
+    }
     CommandQueue::CommandQueue(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
                                const ze_command_queue_priority_t& priority,
                                const uint32_t group_ordinal,
@@ Expand Down @@

src/plugins/intel_npu/tests/functional/internal/utils/zero/zero_wrappers.cpp

-Original file line number
+Diff line change
@@ -0,0 +1,17 @@
+    // Copyright (C) 2018-2026 Intel Corporation
+    // SPDX-License-Identifier: Apache-2.0
+    //
+    #include "internal/utils/zero/zero_wrappers.hpp"
+    #include "common/npu_test_env_cfg.hpp"
+    #include "common/utils.hpp"
+    #include "intel_npu/config/options.hpp"
+    #include "intel_npu/npu_private_properties.hpp"
+    using namespace ov::test::behavior;
+    INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTest,
+                             ZeroCommandListsTests,
+                             ::testing::Values(ov::test::utils::DEVICE_NPU),
+                             ZeroCommandListsTests::getTestCaseName);

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[`intel_npu`] Implementation of chained `ze_mutable_graph_argument_exp_desc_t` objects for `updateMutableCommandList` #33981

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

[intel_npu] Implementation of chained ze_mutable_graph_argument_exp_desc_t objects for updateMutableCommandList #33981

Are you sure you want to change the base?

Uh oh!

[intel_npu] Implementation of chained ze_mutable_graph_argument_exp_desc_t objects for updateMutableCommandList #33981

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

[`intel_npu`] Implementation of chained `ze_mutable_graph_argument_exp_desc_t` objects for `updateMutableCommandList` #33981

[`intel_npu`] Implementation of chained `ze_mutable_graph_argument_exp_desc_t` objects for `updateMutableCommandList` #33981