2424
2525#define UseCopyForNativeBinary (T ) (T < ZE_GRAPH_EXT_VERSION_1_7)
2626
27+ namespace {
28+ using namespace intel_npu ;
29+ /* *
30+ * @brief Extracts the I/O metadata from Level Zero specific structures and converts them into OpenVINO specific
31+ * ones.
32+ *
33+ * @param arg The main Level Zero structure from which most metadata will be extracted.
34+ * @param metadata The secondary Level Zero structure from which metadata will be extracted. More specifically, the
35+ * argument is used for populating "shapeFromIRModel". Not providing this argument will lead to an empty value for
36+ * the referenced attribute.
37+ * @returns A descriptor object containing the metadata converted in OpenVINO specific structures.
38+ */
39+ static IODescriptor getIODescriptor (const ze_graph_argument_properties_3_t & arg,
40+ const std::optional<ze_graph_argument_metadata_t >& metadata) {
41+ auto logger = Logger::global ().clone (" getIODescriptor" );
42+ ov::element::Type_t precision = zeroUtils::toOVElementType (arg.devicePrecision );
43+ ov::Shape shapeFromCompiler;
44+ ov::PartialShape shapeFromIRModel;
45+ std::unordered_set<std::string> outputTensorNames;
46+
47+ for (uint32_t id = 0 ; id < arg.associated_tensor_names_count ; id++) {
48+ outputTensorNames.insert (arg.associated_tensor_names [id]);
49+ }
50+ for (uint32_t id = 0 ; id < arg.dims_count ; id++) {
51+ shapeFromCompiler.push_back (arg.dims [id]);
52+ }
53+ if (metadata.has_value ()) {
54+ const auto dynamicDim = std::numeric_limits<uint64_t >::max ();
55+ shapeFromIRModel.reserve (metadata->shape_size );
56+ for (uint32_t id = 0 ; id < metadata->shape_size ; id++) {
57+ if (metadata->shape [id] != dynamicDim) {
58+ shapeFromIRModel.push_back (metadata->shape [id]);
59+ } else {
60+ // lower bound is ignored, so we set it to 1 just to satisfy the Dimension constructor,
61+ // upper bound is set to the value from shapeFromCompiler as it is filled with upper bounds
62+ // in case of dynamic dimensions
63+ if (id == utils::BATCH_AXIS && shapeFromCompiler[id] == utils::DEFAULT_BATCH_SIZE) {
64+ logger.info (" Ignore dynamic batch size upper limit, but keep the dimension dynamic as a metadata "
65+ " from compiler has been lost." );
66+ // We need to kepp batch dimension dynamic
67+ shapeFromIRModel.push_back (ov::Dimension (1 , dynamicDim));
68+ } else {
69+ shapeFromIRModel.push_back (ov::Dimension (1 , shapeFromCompiler[id]));
70+ }
71+ }
72+ }
73+ }
74+
75+ // Flags will be used instead of indices for informing the type of the current entry
76+ std::string nameFromCompiler = arg.name ;
77+ const bool isInput = (arg.type == ZE_GRAPH_ARGUMENT_TYPE_INPUT);
78+ bool isStateInput = false ;
79+ bool isStateOutput = false ;
80+ bool isShapeTensor = false ;
81+ bool isInitInputWeights = false ;
82+ bool isInitOutputWeights = false ;
83+ bool isMainInputWeights = false ;
84+ if (isInput && isStateInputName (nameFromCompiler)) {
85+ nameFromCompiler = nameFromCompiler.substr (READVALUE_PREFIX.length ());
86+ isStateInput = true ;
87+ } else if (!isInput && isStateOutputName (nameFromCompiler)) {
88+ nameFromCompiler = nameFromCompiler.substr (ASSIGN_PREFIX.length ());
89+ isStateOutput = true ;
90+ } else if (isShapeTensorName (nameFromCompiler)) {
91+ nameFromCompiler = nameFromCompiler.substr (SHAPE_TENSOR_PREFIX.length ());
92+ isShapeTensor = true ;
93+ } else if (isInput && isInitInputWeightsName (nameFromCompiler)) {
94+ nameFromCompiler = nameFromCompiler.substr (INIT_INPUT_WEIGHTS_PREFIX.length ());
95+ isInitInputWeights = true ;
96+ } else if (!isInput && isInitOutputWeightsName (nameFromCompiler)) {
97+ nameFromCompiler = nameFromCompiler.substr (INIT_OUTPUT_WEIGHTS_PREFIX.length ());
98+ isInitOutputWeights = true ;
99+ } else if (isInput && isMainInputWeightsName (nameFromCompiler)) {
100+ nameFromCompiler = nameFromCompiler.substr (MAIN_INPUT_WEIGHTS_PREFIX.length ());
101+ isMainInputWeights = true ;
102+ }
103+
104+ return {std::move (nameFromCompiler),
105+ precision,
106+ shapeFromCompiler,
107+ isStateInput,
108+ isStateOutput,
109+ isShapeTensor,
110+ isInitInputWeights,
111+ isInitOutputWeights,
112+ isMainInputWeights,
113+ std::nullopt ,
114+ arg.debug_friendly_name ,
115+ std::move (outputTensorNames),
116+ metadata.has_value () ? std::optional (shapeFromIRModel) : std::nullopt };
117+ }
118+ } // namespace
119+
27120namespace intel_npu {
28121
29122GraphDescriptor::GraphDescriptor (ze_graph_handle_t handle, bool memoryPersistent)
@@ -225,7 +318,7 @@ std::unordered_set<std::string> ZeGraphExtWrappers::queryGraph(SerializedIR seri
225318 return parseQueryResult (supportedLayers);
226319}
227320
228- bool ZeGraphExtWrappers::canCpuVaBeImported (void * data, size_t size, const uint32_t flags ) const {
321+ bool ZeGraphExtWrappers::canCpuVaBeImported (void * data, size_t size) const {
229322 if (_graphExtVersion < ZE_MAKE_VERSION (1 , 13 ) ||
230323 !utils::memory_and_size_aligned_to_standard_page_size (data, size)) {
231324 return false ;
@@ -275,8 +368,11 @@ GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(void* blobData, size_t bl
275368 }
276369
277370 uint32_t flags = 0 ;
278-
279- bool setPersistentFlag = canCpuVaBeImported (blobData, blobSize, ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED);
371+ bool setPersistentFlag = true ;
372+ if (_graphExtVersion < ZE_MAKE_VERSION (1 , 14 )) {
373+ // special case for ext version 1.13
374+ setPersistentFlag = canCpuVaBeImported (blobData, blobSize);
375+ }
280376
281377 if (setPersistentFlag) {
282378 _logger.debug (" getGraphDescriptor - set ZE_GRAPH_FLAG_INPUT_GRAPH_PERSISTENT" );
@@ -302,94 +398,21 @@ GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(void* blobData, size_t bl
302398 return GraphDescriptor{graphHandle, setPersistentFlag};
303399}
304400
305- /* *
306- * @brief Extracts the I/O metadata from Level Zero specific structures and converts them into OpenVINO specific
307- * ones.
308- *
309- * @param arg The main Level Zero structure from which most metadata will be extracted.
310- * @param metadata The secondary Level Zero structure from which metadata will be extracted. More specifically, the
311- * argument is used for populating "shapeFromIRModel". Not providing this argument will lead to an empty value for
312- * the referenced attribute.
313- * @returns A descriptor object containing the metadata converted in OpenVINO specific structures.
314- */
315- static IODescriptor getIODescriptor (const ze_graph_argument_properties_3_t & arg,
316- const std::optional<ze_graph_argument_metadata_t >& metadata) {
317- auto logger = Logger::global ().clone (" getIODescriptor" );
318- ov::element::Type_t precision = zeroUtils::toOVElementType (arg.devicePrecision );
319- ov::Shape shapeFromCompiler;
320- ov::PartialShape shapeFromIRModel;
321- std::unordered_set<std::string> outputTensorNames;
322-
323- for (uint32_t id = 0 ; id < arg.associated_tensor_names_count ; id++) {
324- outputTensorNames.insert (arg.associated_tensor_names [id]);
325- }
326- for (uint32_t id = 0 ; id < arg.dims_count ; id++) {
327- shapeFromCompiler.push_back (arg.dims [id]);
328- }
329- if (metadata.has_value ()) {
330- const auto dynamicDim = std::numeric_limits<uint64_t >::max ();
331- shapeFromIRModel.reserve (metadata->shape_size );
332- for (uint32_t id = 0 ; id < metadata->shape_size ; id++) {
333- if (metadata->shape [id] != dynamicDim) {
334- shapeFromIRModel.push_back (metadata->shape [id]);
335- } else {
336- // lower bound is ignored, so we set it to 1 just to satisfy the Dimension constructor,
337- // upper bound is set to the value from shapeFromCompiler as it is filled with upper bounds
338- // in case of dynamic dimensions
339- if (id == utils::BATCH_AXIS && shapeFromCompiler[id] == utils::DEFAULT_BATCH_SIZE) {
340- logger.info (" Ignore dynamic batch size upper limit, but keep the dimension dynamic as a metadata "
341- " from compiler has been lost." );
342- // We need to kepp batch dimension dynamic
343- shapeFromIRModel.push_back (ov::Dimension (1 , dynamicDim));
344- } else {
345- shapeFromIRModel.push_back (ov::Dimension (1 , shapeFromCompiler[id]));
346- }
347- }
348- }
401+ bool ZeGraphExtWrappers::isBlobDataImported (const GraphDescriptor& graphDescriptor) const {
402+ if (_graphExtVersion < ZE_MAKE_VERSION (1 , 14 )) {
403+ return graphDescriptor._memoryPersistent ;
349404 }
350405
351- // Flags will be used instead of indices for informing the type of the current entry
352- std::string nameFromCompiler = arg.name ;
353- const bool isInput = (arg.type == ZE_GRAPH_ARGUMENT_TYPE_INPUT);
354- bool isStateInput = false ;
355- bool isStateOutput = false ;
356- bool isShapeTensor = false ;
357- bool isInitInputWeights = false ;
358- bool isInitOutputWeights = false ;
359- bool isMainInputWeights = false ;
360- if (isInput && isStateInputName (nameFromCompiler)) {
361- nameFromCompiler = nameFromCompiler.substr (READVALUE_PREFIX.length ());
362- isStateInput = true ;
363- } else if (!isInput && isStateOutputName (nameFromCompiler)) {
364- nameFromCompiler = nameFromCompiler.substr (ASSIGN_PREFIX.length ());
365- isStateOutput = true ;
366- } else if (isShapeTensorName (nameFromCompiler)) {
367- nameFromCompiler = nameFromCompiler.substr (SHAPE_TENSOR_PREFIX.length ());
368- isShapeTensor = true ;
369- } else if (isInput && isInitInputWeightsName (nameFromCompiler)) {
370- nameFromCompiler = nameFromCompiler.substr (INIT_INPUT_WEIGHTS_PREFIX.length ());
371- isInitInputWeights = true ;
372- } else if (!isInput && isInitOutputWeightsName (nameFromCompiler)) {
373- nameFromCompiler = nameFromCompiler.substr (INIT_OUTPUT_WEIGHTS_PREFIX.length ());
374- isInitOutputWeights = true ;
375- } else if (isInput && isMainInputWeightsName (nameFromCompiler)) {
376- nameFromCompiler = nameFromCompiler.substr (MAIN_INPUT_WEIGHTS_PREFIX.length ());
377- isMainInputWeights = true ;
406+ ze_graph_properties_3_t graphProperties = {};
407+ graphProperties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES;
408+ auto result = _zeroInitStruct->getGraphDdiTable ().pfnGetProperties3 (graphDescriptor._handle , &graphProperties);
409+ THROW_ON_FAIL_FOR_LEVELZERO_EXT (" pfnGetProperties3" , result, _zeroInitStruct->getGraphDdiTable ());
410+
411+ if (graphProperties.flags & ZE_GRAPH_PROPERTIES_FLAG_NO_STANDARD_ALLOCATION) {
412+ return false ;
378413 }
379414
380- return {std::move (nameFromCompiler),
381- precision,
382- shapeFromCompiler,
383- isStateInput,
384- isStateOutput,
385- isShapeTensor,
386- isInitInputWeights,
387- isInitOutputWeights,
388- isMainInputWeights,
389- std::nullopt ,
390- arg.debug_friendly_name ,
391- std::move (outputTensorNames),
392- metadata.has_value () ? std::optional (shapeFromIRModel) : std::nullopt };
415+ return true ;
393416}
394417
395418void ZeGraphExtWrappers::getMetadata (ze_graph_handle_t graphHandle,
0 commit comments