Skip to content

Commit 35ce383

Browse files
committed
Adding support for ZE_GRAPH_PROPERTIES_FLAG_NO_STANDARD_ALLOCATION
Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
1 parent b84ccf8 commit 35ce383

File tree

6 files changed

+146
-93
lines changed

6 files changed

+146
-93
lines changed

src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ class ZeGraphExtWrappers {
5757

5858
void initializeGraph(const GraphDescriptor& graphDescriptor, uint32_t commandQueueGroupOrdinal) const;
5959

60+
bool isBlobDataImported(const GraphDescriptor& graphDescriptor) const;
61+
6062
private:
6163
void getMetadata(ze_graph_handle_t graphHandle,
6264
uint32_t index,
@@ -65,7 +67,7 @@ class ZeGraphExtWrappers {
6567

6668
void initializeGraphThroughCommandList(ze_graph_handle_t graphHandle, uint32_t commandQueueGroupOrdinal) const;
6769

68-
bool canCpuVaBeImported(void* data, size_t size, const uint32_t flags = 0) const;
70+
bool canCpuVaBeImported(void* data, size_t size) const;
6971

7072
std::shared_ptr<ZeroInitStructsHolder> _zeroInitStruct;
7173
uint32_t _graphExtVersion;

src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,9 @@ void Graph::initialize(const Config& config) {
243243
}
244244

245245
bool Graph::release_blob(const Config& config) {
246-
if (_graphDesc._memoryPersistent || _blobIsPersistent || _blob == std::nullopt ||
247-
_zeroInitStruct->getGraphDdiTable().version() < ZE_MAKE_VERSION(1, 8) || config.get<PERF_COUNT>()) {
246+
if ((_zeGraphExt != nullptr && _zeGraphExt->isBlobDataImported(_graphDesc)) || _blobIsPersistent ||
247+
_blob == std::nullopt || _zeroInitStruct->getGraphDdiTable().version() < ZE_MAKE_VERSION(1, 8) ||
248+
config.get<PERF_COUNT>()) {
248249
return false;
249250
}
250251

src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -600,8 +600,8 @@ void WeightlessGraph::set_weights_inputs() {
600600
}
601601

602602
void WeightlessGraph::release_init_blob(const size_t initIndex) {
603-
if (_initsGraphDesc.at(initIndex)._memoryPersistent || _blobIsPersistent || _initBlobs == std::nullopt ||
604-
_zeroInitStruct->getGraphDdiTable().version() < ZE_MAKE_VERSION(1, 8)) {
603+
if ((_zeGraphExt != nullptr && _zeGraphExt->isBlobDataImported(_graphDesc)) || _blobIsPersistent ||
604+
_initBlobs == std::nullopt || _zeroInitStruct->getGraphDdiTable().version() < ZE_MAKE_VERSION(1, 8)) {
605605
return;
606606
}
607607

src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp

Lines changed: 110 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,99 @@
2424

2525
#define UseCopyForNativeBinary(T) (T < ZE_GRAPH_EXT_VERSION_1_7)
2626

27+
namespace {
28+
using namespace intel_npu;
29+
/**
30+
* @brief Extracts the I/O metadata from Level Zero specific structures and converts them into OpenVINO specific
31+
* ones.
32+
*
33+
* @param arg The main Level Zero structure from which most metadata will be extracted.
34+
* @param metadata The secondary Level Zero structure from which metadata will be extracted. More specifically, the
35+
* argument is used for populating "shapeFromIRModel". Not providing this argument will lead to an empty value for
36+
* the referenced attribute.
37+
* @returns A descriptor object containing the metadata converted in OpenVINO specific structures.
38+
*/
39+
static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
40+
const std::optional<ze_graph_argument_metadata_t>& metadata) {
41+
auto logger = Logger::global().clone("getIODescriptor");
42+
ov::element::Type_t precision = zeroUtils::toOVElementType(arg.devicePrecision);
43+
ov::Shape shapeFromCompiler;
44+
ov::PartialShape shapeFromIRModel;
45+
std::unordered_set<std::string> outputTensorNames;
46+
47+
for (uint32_t id = 0; id < arg.associated_tensor_names_count; id++) {
48+
outputTensorNames.insert(arg.associated_tensor_names[id]);
49+
}
50+
for (uint32_t id = 0; id < arg.dims_count; id++) {
51+
shapeFromCompiler.push_back(arg.dims[id]);
52+
}
53+
if (metadata.has_value()) {
54+
const auto dynamicDim = std::numeric_limits<uint64_t>::max();
55+
shapeFromIRModel.reserve(metadata->shape_size);
56+
for (uint32_t id = 0; id < metadata->shape_size; id++) {
57+
if (metadata->shape[id] != dynamicDim) {
58+
shapeFromIRModel.push_back(metadata->shape[id]);
59+
} else {
60+
// lower bound is ignored, so we set it to 1 just to satisfy the Dimension constructor,
61+
// upper bound is set to the value from shapeFromCompiler as it is filled with upper bounds
62+
// in case of dynamic dimensions
63+
if (id == utils::BATCH_AXIS && shapeFromCompiler[id] == utils::DEFAULT_BATCH_SIZE) {
64+
logger.info("Ignore dynamic batch size upper limit, but keep the dimension dynamic as a metadata "
65+
"from compiler has been lost.");
66+
// We need to kepp batch dimension dynamic
67+
shapeFromIRModel.push_back(ov::Dimension(1, dynamicDim));
68+
} else {
69+
shapeFromIRModel.push_back(ov::Dimension(1, shapeFromCompiler[id]));
70+
}
71+
}
72+
}
73+
}
74+
75+
// Flags will be used instead of indices for informing the type of the current entry
76+
std::string nameFromCompiler = arg.name;
77+
const bool isInput = (arg.type == ZE_GRAPH_ARGUMENT_TYPE_INPUT);
78+
bool isStateInput = false;
79+
bool isStateOutput = false;
80+
bool isShapeTensor = false;
81+
bool isInitInputWeights = false;
82+
bool isInitOutputWeights = false;
83+
bool isMainInputWeights = false;
84+
if (isInput && isStateInputName(nameFromCompiler)) {
85+
nameFromCompiler = nameFromCompiler.substr(READVALUE_PREFIX.length());
86+
isStateInput = true;
87+
} else if (!isInput && isStateOutputName(nameFromCompiler)) {
88+
nameFromCompiler = nameFromCompiler.substr(ASSIGN_PREFIX.length());
89+
isStateOutput = true;
90+
} else if (isShapeTensorName(nameFromCompiler)) {
91+
nameFromCompiler = nameFromCompiler.substr(SHAPE_TENSOR_PREFIX.length());
92+
isShapeTensor = true;
93+
} else if (isInput && isInitInputWeightsName(nameFromCompiler)) {
94+
nameFromCompiler = nameFromCompiler.substr(INIT_INPUT_WEIGHTS_PREFIX.length());
95+
isInitInputWeights = true;
96+
} else if (!isInput && isInitOutputWeightsName(nameFromCompiler)) {
97+
nameFromCompiler = nameFromCompiler.substr(INIT_OUTPUT_WEIGHTS_PREFIX.length());
98+
isInitOutputWeights = true;
99+
} else if (isInput && isMainInputWeightsName(nameFromCompiler)) {
100+
nameFromCompiler = nameFromCompiler.substr(MAIN_INPUT_WEIGHTS_PREFIX.length());
101+
isMainInputWeights = true;
102+
}
103+
104+
return {std::move(nameFromCompiler),
105+
precision,
106+
shapeFromCompiler,
107+
isStateInput,
108+
isStateOutput,
109+
isShapeTensor,
110+
isInitInputWeights,
111+
isInitOutputWeights,
112+
isMainInputWeights,
113+
std::nullopt,
114+
arg.debug_friendly_name,
115+
std::move(outputTensorNames),
116+
metadata.has_value() ? std::optional(shapeFromIRModel) : std::nullopt};
117+
}
118+
} // namespace
119+
27120
namespace intel_npu {
28121

29122
GraphDescriptor::GraphDescriptor(ze_graph_handle_t handle, bool memoryPersistent)
@@ -225,7 +318,7 @@ std::unordered_set<std::string> ZeGraphExtWrappers::queryGraph(SerializedIR seri
225318
return parseQueryResult(supportedLayers);
226319
}
227320

228-
bool ZeGraphExtWrappers::canCpuVaBeImported(void* data, size_t size, const uint32_t flags) const {
321+
bool ZeGraphExtWrappers::canCpuVaBeImported(void* data, size_t size) const {
229322
if (_graphExtVersion < ZE_MAKE_VERSION(1, 13) ||
230323
!utils::memory_and_size_aligned_to_standard_page_size(data, size)) {
231324
return false;
@@ -275,8 +368,11 @@ GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(void* blobData, size_t bl
275368
}
276369

277370
uint32_t flags = 0;
278-
279-
bool setPersistentFlag = canCpuVaBeImported(blobData, blobSize, ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED);
371+
bool setPersistentFlag = true;
372+
if (_graphExtVersion < ZE_MAKE_VERSION(1, 14)) {
373+
// special case for ext version 1.13
374+
setPersistentFlag = canCpuVaBeImported(blobData, blobSize);
375+
}
280376

281377
if (setPersistentFlag) {
282378
_logger.debug("getGraphDescriptor - set ZE_GRAPH_FLAG_INPUT_GRAPH_PERSISTENT");
@@ -302,94 +398,21 @@ GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(void* blobData, size_t bl
302398
return GraphDescriptor{graphHandle, setPersistentFlag};
303399
}
304400

305-
/**
306-
* @brief Extracts the I/O metadata from Level Zero specific structures and converts them into OpenVINO specific
307-
* ones.
308-
*
309-
* @param arg The main Level Zero structure from which most metadata will be extracted.
310-
* @param metadata The secondary Level Zero structure from which metadata will be extracted. More specifically, the
311-
* argument is used for populating "shapeFromIRModel". Not providing this argument will lead to an empty value for
312-
* the referenced attribute.
313-
* @returns A descriptor object containing the metadata converted in OpenVINO specific structures.
314-
*/
315-
static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
316-
const std::optional<ze_graph_argument_metadata_t>& metadata) {
317-
auto logger = Logger::global().clone("getIODescriptor");
318-
ov::element::Type_t precision = zeroUtils::toOVElementType(arg.devicePrecision);
319-
ov::Shape shapeFromCompiler;
320-
ov::PartialShape shapeFromIRModel;
321-
std::unordered_set<std::string> outputTensorNames;
322-
323-
for (uint32_t id = 0; id < arg.associated_tensor_names_count; id++) {
324-
outputTensorNames.insert(arg.associated_tensor_names[id]);
325-
}
326-
for (uint32_t id = 0; id < arg.dims_count; id++) {
327-
shapeFromCompiler.push_back(arg.dims[id]);
328-
}
329-
if (metadata.has_value()) {
330-
const auto dynamicDim = std::numeric_limits<uint64_t>::max();
331-
shapeFromIRModel.reserve(metadata->shape_size);
332-
for (uint32_t id = 0; id < metadata->shape_size; id++) {
333-
if (metadata->shape[id] != dynamicDim) {
334-
shapeFromIRModel.push_back(metadata->shape[id]);
335-
} else {
336-
// lower bound is ignored, so we set it to 1 just to satisfy the Dimension constructor,
337-
// upper bound is set to the value from shapeFromCompiler as it is filled with upper bounds
338-
// in case of dynamic dimensions
339-
if (id == utils::BATCH_AXIS && shapeFromCompiler[id] == utils::DEFAULT_BATCH_SIZE) {
340-
logger.info("Ignore dynamic batch size upper limit, but keep the dimension dynamic as a metadata "
341-
"from compiler has been lost.");
342-
// We need to kepp batch dimension dynamic
343-
shapeFromIRModel.push_back(ov::Dimension(1, dynamicDim));
344-
} else {
345-
shapeFromIRModel.push_back(ov::Dimension(1, shapeFromCompiler[id]));
346-
}
347-
}
348-
}
401+
bool ZeGraphExtWrappers::isBlobDataImported(const GraphDescriptor& graphDescriptor) const {
402+
if (_graphExtVersion < ZE_MAKE_VERSION(1, 14)) {
403+
return graphDescriptor._memoryPersistent;
349404
}
350405

351-
// Flags will be used instead of indices for informing the type of the current entry
352-
std::string nameFromCompiler = arg.name;
353-
const bool isInput = (arg.type == ZE_GRAPH_ARGUMENT_TYPE_INPUT);
354-
bool isStateInput = false;
355-
bool isStateOutput = false;
356-
bool isShapeTensor = false;
357-
bool isInitInputWeights = false;
358-
bool isInitOutputWeights = false;
359-
bool isMainInputWeights = false;
360-
if (isInput && isStateInputName(nameFromCompiler)) {
361-
nameFromCompiler = nameFromCompiler.substr(READVALUE_PREFIX.length());
362-
isStateInput = true;
363-
} else if (!isInput && isStateOutputName(nameFromCompiler)) {
364-
nameFromCompiler = nameFromCompiler.substr(ASSIGN_PREFIX.length());
365-
isStateOutput = true;
366-
} else if (isShapeTensorName(nameFromCompiler)) {
367-
nameFromCompiler = nameFromCompiler.substr(SHAPE_TENSOR_PREFIX.length());
368-
isShapeTensor = true;
369-
} else if (isInput && isInitInputWeightsName(nameFromCompiler)) {
370-
nameFromCompiler = nameFromCompiler.substr(INIT_INPUT_WEIGHTS_PREFIX.length());
371-
isInitInputWeights = true;
372-
} else if (!isInput && isInitOutputWeightsName(nameFromCompiler)) {
373-
nameFromCompiler = nameFromCompiler.substr(INIT_OUTPUT_WEIGHTS_PREFIX.length());
374-
isInitOutputWeights = true;
375-
} else if (isInput && isMainInputWeightsName(nameFromCompiler)) {
376-
nameFromCompiler = nameFromCompiler.substr(MAIN_INPUT_WEIGHTS_PREFIX.length());
377-
isMainInputWeights = true;
406+
ze_graph_properties_3_t graphProperties = {};
407+
graphProperties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES;
408+
auto result = _zeroInitStruct->getGraphDdiTable().pfnGetProperties3(graphDescriptor._handle, &graphProperties);
409+
THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetProperties3", result, _zeroInitStruct->getGraphDdiTable());
410+
411+
if (graphProperties.flags & ZE_GRAPH_PROPERTIES_FLAG_NO_STANDARD_ALLOCATION) {
412+
return false;
378413
}
379414

380-
return {std::move(nameFromCompiler),
381-
precision,
382-
shapeFromCompiler,
383-
isStateInput,
384-
isStateOutput,
385-
isShapeTensor,
386-
isInitInputWeights,
387-
isInitOutputWeights,
388-
isMainInputWeights,
389-
std::nullopt,
390-
arg.debug_friendly_name,
391-
std::move(outputTensorNames),
392-
metadata.has_value() ? std::optional(shapeFromIRModel) : std::nullopt};
415+
return true;
393416
}
394417

395418
void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle,

src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_types.hpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,33 @@ struct ze_graph_dditable_ext_decorator final {
188188
throwWhenUnsupported("pfnCompilerIsOptionSupported", ZE_GRAPH_EXT_VERSION_1_11);
189189
return _impl->pfnCompilerIsOptionSupported(hDevice, type, pOption, pValue);
190190
}
191+
192+
// version 1.12
193+
ze_result_t ZE_APICALL pfnCreate3(ze_context_handle_t hContext,
194+
ze_device_handle_t hDevice,
195+
const ze_graph_desc_2_t* desc,
196+
ze_graph_handle_t* phGraph,
197+
ze_graph_build_log_handle_t* phGraphBuildLog) {
198+
throwWhenUnsupported("pfnCreate3", ZE_GRAPH_EXT_VERSION_1_12);
199+
return _impl->pfnCreate3(hContext, hDevice, desc, phGraph, phGraphBuildLog);
200+
}
201+
202+
ze_result_t ZE_APICALL pfnGetProperties3(ze_graph_handle_t hGraph, ze_graph_properties_3_t* pGraphProperties) {
203+
throwWhenUnsupported("pfnGetProperties3", ZE_GRAPH_EXT_VERSION_1_12);
204+
return _impl->pfnGetProperties3(hGraph, pGraphProperties);
205+
}
206+
207+
ze_result_t ZE_APICALL pfnBuildLogGetString2(ze_graph_build_log_handle_t hGraphBuildLog,
208+
uint32_t* pSize,
209+
char* pBuildLog) {
210+
throwWhenUnsupported("pfnBuildLogGetString2", ZE_GRAPH_EXT_VERSION_1_12);
211+
return _impl->pfnBuildLogGetString2(hGraphBuildLog, pSize, pBuildLog);
212+
}
213+
214+
ze_result_t ZE_APICALL pfnBuildLogDestroy(ze_graph_build_log_handle_t hGraphBuildLog) {
215+
throwWhenUnsupported("pfnBuildLogDestroy", ZE_GRAPH_EXT_VERSION_1_12);
216+
return _impl->pfnBuildLogDestroy(hGraphBuildLog);
217+
}
191218
};
192219

193220
/**

0 commit comments

Comments
 (0)