From fd9e4b5500a1b4f281dd93890a14140fc1d1218b Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 17 Dec 2024 13:25:10 +0400 Subject: [PATCH 01/18] Base impl Signed-off-by: Vladimir Paramuzov --- .../tests/unit/module_tests/config_common.cpp | 77 +++++++++ .../tests/unit/module_tests/config_common.hpp | 127 ++++++++++++++ .../tests/unit/module_tests/config_gpu.cpp | 160 ++++++++++++++++++ .../tests/unit/module_tests/config_gpu.hpp | 48 ++++++ .../module_tests/config_gpu_debug_options.inl | 31 ++++ .../config_gpu_debug_properties.hpp | 42 +++++ .../unit/module_tests/config_gpu_options.inl | 50 ++++++ .../tests/unit/module_tests/device_test.cpp | 41 +++++ 8 files changed, 576 insertions(+) create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp new file mode 100644 index 00000000000000..06625d33c40307 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp @@ -0,0 +1,77 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "config_common.hpp" +#include "openvino/core/except.hpp" + + +namespace ov { + +void PluginConfig::set_property(const AnyMap& config) { + for (auto& kv : config) { + auto& name = kv.first; + auto& val = kv.second; + + const auto& known_options = m_options_map; + auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); + OPENVINO_ASSERT(it != known_options.end()); + + it->second->set_any(val); + } +} + +ov::Any PluginConfig::get_property(const std::string& name) const { + const auto& known_options = m_options_map; + auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); + OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); + + return it->second->get_any(); +} + +void PluginConfig::set_user_property(const AnyMap& config) { + for (auto& kv : config) { + auto& name = kv.first; + auto& val = kv.second; + + const auto& known_options = m_options_map; + auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); + OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); + OPENVINO_ASSERT(it->second->is_valid_value(val), "Invalid value: ", val.as(), " for property: ", name); + + user_properties[name] = val; + } +} + +void PluginConfig::finalize(std::shared_ptr context, const ov::RTMap& rt_info) { + // Copy internal properties before applying hints to ensure that + // a property set by hint won't be overriden by a value in user config. + // E.g num_streams=AUTO && hint=THROUGHPUT + // If we apply hints first and then copy all values from user config to internal one, + // then we'll get num_streams=AUTO in final config while some integer number is expected. + for (const auto& prop : user_properties) { + auto& option = m_options_map.at(prop.first); + option->set_any(prop.second); + } + + finalize_impl(context, rt_info); +} + +std::string PluginConfig::to_string() const { + std::stringstream s; + + s << "-----------------------------------------\n"; + s << "PROPERTIES:\n"; + + for (const auto& option : m_options_map) { + s << "\t" << option.first << ":" << option.second->get_any().as() << std::endl; + } + s << "USER PROPERTIES:\n"; + for (const auto& user_prop : user_properties) { + s << "\t" << user_prop.first << ": " << user_prop.second.as() << std::endl; + } + + return s.str(); +} + +} // namespace ov diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp new file mode 100644 index 00000000000000..3a6a2bc0d8de51 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp @@ -0,0 +1,127 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "openvino/runtime/iremote_context.hpp" +#include "openvino/runtime/properties.hpp" +#include "openvino/core/except.hpp" + +#ifndef COUNT_N + #define COUNT_N(_1, _2, _3, _4, _5, N, ...) N +#endif + +#ifndef COUNT + #define COUNT(...) EXPAND(COUNT_N(__VA_ARGS__, 5, 4, 3, 2, 1)) +#endif + +#ifndef CAT + #define CAT(a, b) a ## b +#endif + +#ifndef EXPAND + #define EXPAND(N) N +#endif + +#define GET_EXCEPT_LAST_IMPL(N, ...) CAT(GET_EXCEPT_LAST_IMPL_, N)(__VA_ARGS__) +#define GET_EXCEPT_LAST_IMPL_2(_0, _1) _0 +#define GET_EXCEPT_LAST_IMPL_3(_0, _1, _2) _0, _1 +#define GET_EXCEPT_LAST_IMPL_4(_0, _1, _2, _3) _0, _1, _2 + +#define GET_EXCEPT_LAST(...) EXPAND(GET_EXCEPT_LAST_IMPL(COUNT(__VA_ARGS__), __VA_ARGS__)) + +namespace ov { + + +struct ConfigOptionBase { + explicit ConfigOptionBase() {} + virtual ~ConfigOptionBase() = default; + + virtual void set_any(const ov::Any any) = 0; + virtual ov::Any get_any() const = 0; + virtual bool is_valid_value(ov::Any val) = 0; +}; + +template +struct ConfigOption : public ConfigOptionBase { + ConfigOption(const T& default_val, std::function validator = nullptr) + : ConfigOptionBase(), value(default_val), validator(validator) {} + T value; + std::function validator; + + void set_any(const ov::Any any) override { + if (validator) + OPENVINO_ASSERT(validator(any.as()), "Invalid value: ", any.as()); + value = any.as(); + } + + ov::Any get_any() const override { + return ov::Any(value); + } + + bool is_valid_value(ov::Any val) override { + try { + return validator ? validator(val.as()) : true; + } catch (std::exception&) { + return false; + } + + } +}; + +class PluginConfig { +public: + PluginConfig() {} + PluginConfig(std::initializer_list values) : PluginConfig() { set_property(ov::AnyMap(values)); } + explicit PluginConfig(const ov::AnyMap& properties) : PluginConfig() { set_property(properties); } + explicit PluginConfig(const ov::AnyMap::value_type& property) : PluginConfig() { set_property(property); } + + void set_property(const ov::AnyMap& properties); + Any get_property(const std::string& name) const; + void set_user_property(const ov::AnyMap& properties); + + template + util::EnableIfAllStringAny set_property(Properties&&... properties) { + set_property(ov::AnyMap{std::forward(properties)...}); + } + + template + util::EnableIfAllStringAny set_user_property(Properties&&... properties) { + set_user_property(ov::AnyMap{std::forward(properties)...}); + } + + template + T get_property(const ov::Property& property) const { + OPENVINO_ASSERT(m_options_map.find(property.name()) != m_options_map.end(), "Property not found: ", property.name()); + return static_cast*>(m_options_map.at(property.name()))->value; + } + + std::string to_string() const; + + void finalize(std::shared_ptr context, const ov::RTMap& rt_info); + virtual void finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) = 0; + +protected: + template + bool is_set_by_user(const ov::Property& property) const { + return user_properties.find(property.name()) != user_properties.end(); + } + + template + void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { + if (!is_set_by_user(property)) { + auto rt_info_val = rt_info.find(property.name()); + if (rt_info_val != rt_info.end()) { + set_user_property(property(rt_info_val->second.template as())); + } + } + } + std::unordered_map m_options_map; + ov::AnyMap user_properties; + using OptionMapEntry = decltype(m_options_map)::value_type; +}; + +} // namespace ov diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp new file mode 100644 index 00000000000000..b3c81da368ae68 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp @@ -0,0 +1,160 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "config_gpu.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "openvino/runtime/internal_properties.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" +#include "config_gpu_debug_properties.hpp" + + +namespace ov { +namespace intel_gpu { + +NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() { + #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ + m_options_map[PropertyNamespace::PropertyVar.name()] = &PropertyVar; + + OV_CONFIG_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin") + #include "config_gpu_options.inl" + #include "config_gpu_debug_options.inl" + + #undef OV_CONFIG_OPTION +} + +void NewExecutionConfig::finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) { + const auto& device_info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + apply_user_properties(device_info); + apply_rt_info(device_info, rt_info); +} + +void NewExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { + if (is_set_by_user(ov::hint::execution_mode)) { + const auto mode = get_property(ov::hint::execution_mode); + if (!is_set_by_user(ov::hint::inference_precision)) { + if (mode == ov::hint::ExecutionMode::ACCURACY) { + set_property(ov::hint::inference_precision(ov::element::undefined)); + } else if (mode == ov::hint::ExecutionMode::PERFORMANCE) { + if (info.supports_fp16) + set_property(ov::hint::inference_precision(ov::element::f16)); + else + set_property(ov::hint::inference_precision(ov::element::f32)); + } + } + } +} + +void NewExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { + if (is_set_by_user(ov::hint::performance_mode)) { + const auto mode = get_property(ov::hint::performance_mode); + if (!is_set_by_user(ov::num_streams)) { + if (mode == ov::hint::PerformanceMode::LATENCY) { + set_property(ov::num_streams(1)); + } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) { + set_property(ov::num_streams(ov::streams::AUTO)); + } + } + } + + if (get_property(ov::num_streams) == ov::streams::AUTO) { + int32_t n_streams = std::max(info.num_ccs, 2); + set_property(ov::num_streams(n_streams)); + } + + if (get_property(ov::internal::exclusive_async_requests)) { + set_property(ov::num_streams(1)); + } + + // Allow kernels reuse only for single-stream scenarios + if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) { + if (get_property(ov::num_streams) != 1) { + set_property(ov::intel_gpu::hint::enable_kernels_reuse(false)); + } + } +} + +void NewExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { + if (is_set_by_user(ov::hint::model_priority)) { + const auto priority = get_property(ov::hint::model_priority); + if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { + set_property(ov::intel_gpu::hint::queue_priority(priority)); + } + } +} + +void NewExecutionConfig::apply_debug_options(const cldnn::device_info& info) { + // GPU_DEBUG_GET_INSTANCE(debug_config); + // GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { + // set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); + // } + + // GPU_DEBUG_IF(debug_config->serialize_compile == 1) { + // set_property(ov::compilation_num_threads(1)); + // } + + // GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + // GPU_DEBUG_COUT << "[WARNING] ov::enable_profiling property was forced because of enabled OV_GPU_DumpProfilingData debug option\n"; + // set_property(ov::enable_profiling(true)); + // } + + // GPU_DEBUG_IF(debug_config->disable_dynamic_impl == 1) { + // set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); + // } + + // GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { + // if (debug_config->dynamic_quantize_group_size == -1) + // set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX)); + // else + // set_property(ov::hint::dynamic_quantization_group_size(debug_config->dynamic_quantize_group_size)); + // } + + // GPU_DEBUG_IF(debug_config->use_kv_cache_compression != -1) { + // GPU_DEBUG_IF(debug_config->use_kv_cache_compression == 1) { + // set_property(ov::hint::kv_cache_precision(ov::element::i8)); + // } else { + // set_property(ov::hint::kv_cache_precision(ov::element::undefined)); + // } + // } +} + +void NewExecutionConfig::apply_hints(const cldnn::device_info& info) { + apply_execution_hints(info); + apply_performance_hints(info); + apply_priority_hints(info); + apply_debug_options(info); +} + +void NewExecutionConfig::apply_user_properties(const cldnn::device_info& info) { + apply_hints(info); + if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { + set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); + } + if (info.supports_immad) { + set_property(ov::intel_gpu::use_onednn(true)); + } + if (get_property(ov::intel_gpu::use_onednn)) { + set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + } + + // Enable KV-cache compression by default for non-systolic platforms + if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { + set_property(ov::hint::kv_cache_precision(ov::element::i8)); + } + + // Enable dynamic quantization by default for non-systolic platforms + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { + set_property(ov::hint::dynamic_quantization_group_size(32)); + } +} + +void NewExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info) { + if (!info.supports_immad) { + apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); + apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); + } + apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp new file mode 100644 index 00000000000000..69b9c321863c03 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp @@ -0,0 +1,48 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "config_common.hpp" +#include "intel_gpu/runtime/device_info.hpp" +#include "intel_gpu/runtime/utils.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" +#include "openvino/runtime/internal_properties.hpp" +#include "config_gpu_debug_properties.hpp" +#include + +namespace ov { +namespace intel_gpu { + +struct NewExecutionConfig : public ov::PluginConfig { + NewExecutionConfig(); + + #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ + ConfigOption PropertyVar = \ + ConfigOption(GET_EXCEPT_LAST(__VA_ARGS__)); + + + #include "config_gpu_options.inl" + #include "config_gpu_debug_options.inl" + + #undef OV_CONFIG_OPTION + + void finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) override; + +protected: + // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call + // So this method should be called after setting all user properties, but before apply_user_properties() call. + void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info); + + void apply_user_properties(const cldnn::device_info& info); + void apply_hints(const cldnn::device_info& info); + void apply_execution_hints(const cldnn::device_info& info); + void apply_performance_hints(const cldnn::device_info& info); + void apply_priority_hints(const cldnn::device_info& info); + void apply_debug_options(const cldnn::device_info& info); +}; + + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl new file mode 100644 index 00000000000000..687475f67f287c --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl @@ -0,0 +1,31 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifdef GPU_DEBUG_CONFIG +OV_CONFIG_OPTION(ov::intel_gpu, verbose, false, "Enable") +OV_CONFIG_OPTION(ov::intel_gpu, help, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_usm, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_onednn_post_ops, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, dump_profiling_data, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, dump_graphs, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, dump_sources, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, dump_tensors, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, dump_memory_pool, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, dump_iterations, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, host_time_profiling, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "") +OV_CONFIG_OPTION(ov::intel_gpu, impls_cache_capacity, 0, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_async_compilation, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_shape_agnostic_impls, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_memory_reuse, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, use_usm_host, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, enable_kv_cache_compression, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, load_dump_raw_binary, "", "") + +#endif diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp new file mode 100644 index 00000000000000..f3ff878ce07740 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp @@ -0,0 +1,42 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/runtime/properties.hpp" + +#ifdef GPU_DEBUG_CONFIG + +namespace ov { +namespace intel_gpu { + +static constexpr Property verbose{"VERBOSE"}; +static constexpr Property help{"HELP"}; +static constexpr Property disable_usm{"DISABLE_USM"}; +static constexpr Property disable_onednn_post_ops{"DISABLE_ONEDNN_POST_OPS"}; +static constexpr Property dump_profiling_data{"DUMP_PROFILING_DATA"}; +// static constexpr Property dump_graphs{"DUMP_GRAPHS"}; +static constexpr Property dump_sources{"DUMP_SOURCES"}; +static constexpr Property dump_tensors{"DUMP_TENSORS"}; +static constexpr Property dump_memory_pool{"DUMP_MEMORY_POOL"}; +static constexpr Property dump_iterations{"DUMP_ITERATIONS"}; +static constexpr Property host_time_profiling{"HOST_TIME_PROFILING"}; +// static constexpr Property max_kernels_per_batch{"MAX_KERNELS_PER_BATCH"}; +static constexpr Property impls_cache_capacity{"IMPLS_CACHE_CAPACITY"}; +static constexpr Property disable_async_compilation{"DISABLE_ASYNC_COMPILATION"}; +static constexpr Property disable_shape_agnostic_impls{"DISABLE_SHAPE_AGNOSTIC_IMPLS"}; +static constexpr Property disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"}; +static constexpr Property disable_memory_reuse{"DISABLE_MEMORY_REUSE"}; +static constexpr Property disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"}; +static constexpr Property disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"}; +static constexpr Property use_usm_host{"USE_USM_HOST"}; +static constexpr Property enable_kv_cache_compression{"ENABLE_KV_CACHE_COMPRESSION"}; +static constexpr Property asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"}; +static constexpr Property mem_prealloc_options{"MEM_PREALLOC_OPTIONS"}; +static constexpr Property load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"}; + +} // namespace intel_gpu +} // namespace ov + +#endif // GPU_DEBUG_CONFIG diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl new file mode 100644 index 00000000000000..b3aa12dc75c49b --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl @@ -0,0 +1,50 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// Namespace, property name, default value, [validator], description +OV_CONFIG_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin") +OV_CONFIG_OPTION(ov::device, id, "0", "ID of the current device") +OV_CONFIG_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty") +OV_CONFIG_OPTION(ov, num_streams, 1, "Number of streams to be used for inference") +OV_CONFIG_OPTION(ov, compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism") +OV_CONFIG_OPTION(ov::hint, inference_precision, ov::element::f16, + [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision") +OV_CONFIG_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings") +OV_CONFIG_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc") +OV_CONFIG_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy") +OV_CONFIG_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application") +OV_CONFIG_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not") + +OV_CONFIG_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks") +OV_CONFIG_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level") +OV_CONFIG_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property") +OV_CONFIG_OPTION(ov::intel_gpu::hint, enable_sdpa_optimization, true, "Enable/Disable fused SDPA primitive execution") +OV_CONFIG_OPTION(ov::intel_gpu, enable_loop_unrolling, true, "Enable/Disable Loop/TensorIterator operation unrolling") +OV_CONFIG_OPTION(ov::intel_gpu, disable_winograd_convolution, false, "Enable/Disable winograd convolution implementation if available") +OV_CONFIG_OPTION(ov::internal, exclusive_async_requests, false, "") +OV_CONFIG_OPTION(ov::internal, query_model_ratio, 1.0f, "") +OV_CONFIG_OPTION(ov, cache_mode, ov::CacheMode::OPTIMIZE_SPEED, "Cache mode defines the trade-off between the model compilation time and the disk space required for the cache") +OV_CONFIG_OPTION(ov, cache_encryption_callbacks, EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model") +OV_CONFIG_OPTION(ov::hint, dynamic_quantization_group_size, 0, "") +OV_CONFIG_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "") +OV_CONFIG_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "") +OV_CONFIG_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching") +OV_CONFIG_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision") + +OV_CONFIG_OPTION(ov::intel_gpu, nv12_two_inputs, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, config_file, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, enable_lp_transformations, false, "") + +OV_CONFIG_OPTION(ov::intel_gpu, max_dynamic_batch, 1, "") +OV_CONFIG_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "") +OV_CONFIG_OPTION(ov::intel_gpu, optimize_data, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, enable_memory_pool, true, "") +OV_CONFIG_OPTION(ov::intel_gpu, allow_static_input_reorder, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, custom_outputs, std::vector{}, "") +OV_CONFIG_OPTION(ov::intel_gpu, force_implementations, ImplForcingMap{}, "") +OV_CONFIG_OPTION(ov::intel_gpu, partial_build_program, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, use_only_static_kernels_for_dynamic_shape, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "") +OV_CONFIG_OPTION(ov::intel_gpu, use_onednn, false, "") diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp index b27275d0f03d99..b296242905b958 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp @@ -2,6 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/execution_config.hpp" +#include "module_tests/config_gpu.hpp" +#include "openvino/runtime/properties.hpp" #include "test_utils.h" #include "intel_gpu/runtime/device.hpp" #include "runtime/ocl/ocl_device_detector.hpp" @@ -101,3 +104,41 @@ TEST(devices_test, sort_order_three_vendors) { ASSERT_EQ(expected_devices_order, actual_devices_order); } + +// class Test { +// public: +// int i; +// constexpr Test(int i) : i(i) {} +// }; + +// constexpr const Test test1(1); +// constexpr const Test test2(2); + +// template +// int get_prop() { +// static_assert(false, "FAIL"); +// } + +// template class prop, typename T, ov::PropertyMutability mutability> +// T get_prop() { +// static_assert(false, "FAIL"); +// } + + +TEST(config_test, basic) { + ov::intel_gpu::NewExecutionConfig cfg; + std::cerr << cfg.to_string(); + + cfg.set_user_property(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)); + cfg.set_property(ov::hint::inference_precision(ov::element::f32)); + + std::cerr << "PROF: " << cfg.enable_profiling.value << std::endl; + + std::cerr << cfg.to_string(); + + std::cerr << cfg.get_property(ov::hint::inference_precision) << std::endl; + std::cerr << cfg.get_property(ov::hint::execution_mode) << std::endl; + +// std::cerr << get_prop() << std::endl; +// std::cerr << get_prop() << std::endl; +} From 4df5050acf5e5eec77e5f0c9f458506b9b1a51b2 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 17 Dec 2024 14:19:13 +0400 Subject: [PATCH 02/18] make it common Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp} | 30 +++++++++--- .../src/dev/plugin_config.cpp} | 5 +- .../intel_gpu/runtime/internal_properties.hpp | 24 ++++++++- .../intel_gpu/runtime/options_debug.inl} | 4 +- .../intel_gpu/runtime/options_release.inl} | 0 .../intel_gpu/runtime/plugin_config.hpp} | 13 ++--- .../runtime/plugin_config.cpp} | 49 ++++--------------- .../config_gpu_debug_properties.hpp | 42 ---------------- .../tests/unit/module_tests/config_test.cpp | 28 +++++++++++ .../tests/unit/module_tests/device_test.cpp | 41 ---------------- 10 files changed, 96 insertions(+), 140 deletions(-) rename src/{plugins/intel_gpu/tests/unit/module_tests/config_common.hpp => inference/dev_api/openvino/runtime/plugin_config.hpp} (74%) rename src/{plugins/intel_gpu/tests/unit/module_tests/config_common.cpp => inference/src/dev/plugin_config.cpp} (93%) rename src/plugins/intel_gpu/{tests/unit/module_tests/config_gpu_debug_options.inl => include/intel_gpu/runtime/options_debug.inl} (91%) rename src/plugins/intel_gpu/{tests/unit/module_tests/config_gpu_options.inl => include/intel_gpu/runtime/options_release.inl} (100%) rename src/plugins/intel_gpu/{tests/unit/module_tests/config_gpu.hpp => include/intel_gpu/runtime/plugin_config.hpp} (84%) rename src/plugins/intel_gpu/{tests/unit/module_tests/config_gpu.cpp => src/runtime/plugin_config.cpp} (71%) delete mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp similarity index 74% rename from src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp rename to src/inference/dev_api/openvino/runtime/plugin_config.hpp index 3a6a2bc0d8de51..acccd0bf343604 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -5,7 +5,7 @@ #pragma once #include -#include +#include #include "openvino/runtime/iremote_context.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/core/except.hpp" @@ -35,7 +35,6 @@ namespace ov { - struct ConfigOptionBase { explicit ConfigOptionBase() {} virtual ~ConfigOptionBase() = default; @@ -50,7 +49,6 @@ struct ConfigOption : public ConfigOptionBase { ConfigOption(const T& default_val, std::function validator = nullptr) : ConfigOptionBase(), value(default_val), validator(validator) {} T value; - std::function validator; void set_any(const ov::Any any) override { if (validator) @@ -68,11 +66,29 @@ struct ConfigOption : public ConfigOptionBase { } catch (std::exception&) { return false; } - } + +private: + std::function validator; }; -class PluginConfig { +// Base class for configuration of plugins +// Implementation should provide a list of properties with default values and validators (optional) +// For the sake of efficiency, we expect that plugin properties are defined as class members of the derived class +// and accessed directly in the plugin's code (i.e. w/o get_property()/set_property() calls) +// get/set property members are provided to handle external property access +// The class provides a helpers to read the properties from configuration file and from environment variables +// +// Expected order of properties resolution: +// 1. Assign default value for each property per device +// 2. Save user properties passed via Core::set_property() call to user_properties +// 3. Save user properties passed via Core::compile_model() call to user_properties +// 4. Apply RT info properties to user_properties if they were not set by user +// 5. Read and apply properties from the config file as user_properties +// 6. Read and apply properties from the the environment variables as user_properties +// 7. Apply user_properties to actual plugin properties +// 8. Update dependant properties if they were not set by user either way +class OPENVINO_RUNTIME_API PluginConfig { public: PluginConfig() {} PluginConfig(std::initializer_list values) : PluginConfig() { set_property(ov::AnyMap(values)); } @@ -119,7 +135,9 @@ class PluginConfig { } } } - std::unordered_map m_options_map; + std::map m_options_map; + + // List of properties explicitly set by user via Core::set_property() or Core::compile_model() or ov::Model's runtime info ov::AnyMap user_properties; using OptionMapEntry = decltype(m_options_map)::value_type; }; diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp b/src/inference/src/dev/plugin_config.cpp similarity index 93% rename from src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp rename to src/inference/src/dev/plugin_config.cpp index 06625d33c40307..9f169c07663a40 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "config_common.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/core/except.hpp" @@ -55,6 +55,9 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R } finalize_impl(context, rt_info); + + // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization + user_properties.clear(); } std::string PluginConfig::to_string() const { diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index 199261772dcf2e..56a7bf25acc998 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -47,7 +47,6 @@ static constexpr Property allow_static_input_reord static constexpr Property partial_build_program{"GPU_PARTIAL_BUILD"}; static constexpr Property allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"}; static constexpr Property use_only_static_kernels_for_dynamic_shape{"GPU_USE_ONLY_STATIC_KERNELS_FOR_DYNAMIC_SHAPE"}; -static constexpr Property dump_graphs{"GPU_DUMP_GRAPHS"}; static constexpr Property, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"}; static constexpr Property force_implementations{"GPU_FORCE_IMPLEMENTATIONS"}; static constexpr Property config_file{"CONFIG_FILE"}; @@ -58,6 +57,29 @@ static constexpr Property buffers_preallocation_r static constexpr Property max_kernels_per_batch{"GPU_MAX_KERNELS_PER_BATCH"}; static constexpr Property use_onednn{"USE_ONEDNN"}; +static constexpr Property help{"HELP"}; +static constexpr Property verbose{"VERBOSE"}; +static constexpr Property disable_usm{"DISABLE_USM"}; +static constexpr Property disable_onednn_post_ops{"DISABLE_ONEDNN_POST_OPS"}; +static constexpr Property dump_graphs{"GPU_DUMP_GRAPHS"}; +static constexpr Property dump_profiling_data{"DUMP_PROFILING_DATA"}; +static constexpr Property dump_sources{"DUMP_SOURCES"}; +static constexpr Property dump_tensors{"DUMP_TENSORS"}; +static constexpr Property dump_memory_pool{"DUMP_MEMORY_POOL"}; +static constexpr Property dump_iterations{"DUMP_ITERATIONS"}; +static constexpr Property host_time_profiling{"HOST_TIME_PROFILING"}; +static constexpr Property impls_cache_capacity{"IMPLS_CACHE_CAPACITY"}; +static constexpr Property disable_async_compilation{"DISABLE_ASYNC_COMPILATION"}; +static constexpr Property disable_shape_agnostic_impls{"DISABLE_SHAPE_AGNOSTIC_IMPLS"}; +static constexpr Property disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"}; +static constexpr Property disable_memory_reuse{"DISABLE_MEMORY_REUSE"}; +static constexpr Property disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"}; +static constexpr Property disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"}; +static constexpr Property use_usm_host{"USE_USM_HOST"}; +static constexpr Property asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"}; +static constexpr Property mem_prealloc_options{"MEM_PREALLOC_OPTIONS"}; +static constexpr Property load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"}; + } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl similarity index 91% rename from src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl rename to src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl index 687475f67f287c..62548a7abb17fd 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl @@ -3,7 +3,8 @@ // #ifdef GPU_DEBUG_CONFIG -OV_CONFIG_OPTION(ov::intel_gpu, verbose, false, "Enable") + +OV_CONFIG_OPTION(ov::intel_gpu, verbose, 0, "Enable") OV_CONFIG_OPTION(ov::intel_gpu, help, false, "") OV_CONFIG_OPTION(ov::intel_gpu, disable_usm, false, "") OV_CONFIG_OPTION(ov::intel_gpu, disable_onednn_post_ops, false, "") @@ -23,7 +24,6 @@ OV_CONFIG_OPTION(ov::intel_gpu, disable_memory_reuse, false, "") OV_CONFIG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "") OV_CONFIG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "") OV_CONFIG_OPTION(ov::intel_gpu, use_usm_host, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, enable_kv_cache_compression, false, "") OV_CONFIG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "") OV_CONFIG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "") OV_CONFIG_OPTION(ov::intel_gpu, load_dump_raw_binary, "", "") diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl similarity index 100% rename from src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl rename to src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp similarity index 84% rename from src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp rename to src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp index 69b9c321863c03..51e72da8be5923 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp @@ -4,12 +4,10 @@ #pragma once -#include "config_common.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "intel_gpu/runtime/device_info.hpp" -#include "intel_gpu/runtime/utils.hpp" #include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/runtime/internal_properties.hpp" -#include "config_gpu_debug_properties.hpp" #include namespace ov { @@ -22,15 +20,14 @@ struct NewExecutionConfig : public ov::PluginConfig { ConfigOption PropertyVar = \ ConfigOption(GET_EXCEPT_LAST(__VA_ARGS__)); - - #include "config_gpu_options.inl" - #include "config_gpu_debug_options.inl" + #include "options_release.inl" + #include "options_debug.inl" #undef OV_CONFIG_OPTION void finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) override; -protected: +private: // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call // So this method should be called after setting all user properties, but before apply_user_properties() call. void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info); @@ -40,7 +37,7 @@ struct NewExecutionConfig : public ov::PluginConfig { void apply_execution_hints(const cldnn::device_info& info); void apply_performance_hints(const cldnn::device_info& info); void apply_priority_hints(const cldnn::device_info& info); - void apply_debug_options(const cldnn::device_info& info); + void read_debug_options(const cldnn::device_info& info); }; diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp similarity index 71% rename from src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp rename to src/plugins/intel_gpu/src/runtime/plugin_config.cpp index b3c81da368ae68..0a3c49e6387104 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp +++ b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp @@ -2,11 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "config_gpu.hpp" +#include "intel_gpu/runtime/plugin_config.hpp" #include "intel_gpu/plugin/remote_context.hpp" +#include "openvino/core/any.hpp" #include "openvino/runtime/internal_properties.hpp" #include "intel_gpu/runtime/internal_properties.hpp" -#include "config_gpu_debug_properties.hpp" namespace ov { @@ -16,15 +16,15 @@ NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() { #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ m_options_map[PropertyNamespace::PropertyVar.name()] = &PropertyVar; - OV_CONFIG_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin") - #include "config_gpu_options.inl" - #include "config_gpu_debug_options.inl" + #include "intel_gpu/runtime/options_release.inl" + #include "intel_gpu/runtime/options_debug.inl" #undef OV_CONFIG_OPTION } void NewExecutionConfig::finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) { const auto& device_info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + read_debug_options(device_info); apply_user_properties(device_info); apply_rt_info(device_info, rt_info); } @@ -83,46 +83,17 @@ void NewExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { } } -void NewExecutionConfig::apply_debug_options(const cldnn::device_info& info) { - // GPU_DEBUG_GET_INSTANCE(debug_config); - // GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - // set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); - // } - - // GPU_DEBUG_IF(debug_config->serialize_compile == 1) { - // set_property(ov::compilation_num_threads(1)); - // } - - // GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { - // GPU_DEBUG_COUT << "[WARNING] ov::enable_profiling property was forced because of enabled OV_GPU_DumpProfilingData debug option\n"; - // set_property(ov::enable_profiling(true)); - // } - - // GPU_DEBUG_IF(debug_config->disable_dynamic_impl == 1) { - // set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); - // } - - // GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { - // if (debug_config->dynamic_quantize_group_size == -1) - // set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX)); - // else - // set_property(ov::hint::dynamic_quantization_group_size(debug_config->dynamic_quantize_group_size)); - // } - - // GPU_DEBUG_IF(debug_config->use_kv_cache_compression != -1) { - // GPU_DEBUG_IF(debug_config->use_kv_cache_compression == 1) { - // set_property(ov::hint::kv_cache_precision(ov::element::i8)); - // } else { - // set_property(ov::hint::kv_cache_precision(ov::element::undefined)); - // } - // } +void NewExecutionConfig::read_debug_options(const cldnn::device_info& info) { + ov::AnyMap config_properties; + set_user_property(config_properties); + ov::AnyMap env_properties; + set_user_property(env_properties); } void NewExecutionConfig::apply_hints(const cldnn::device_info& info) { apply_execution_hints(info); apply_performance_hints(info); apply_priority_hints(info); - apply_debug_options(info); } void NewExecutionConfig::apply_user_properties(const cldnn::device_info& info) { diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp deleted file mode 100644 index f3ff878ce07740..00000000000000 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/runtime/properties.hpp" - -#ifdef GPU_DEBUG_CONFIG - -namespace ov { -namespace intel_gpu { - -static constexpr Property verbose{"VERBOSE"}; -static constexpr Property help{"HELP"}; -static constexpr Property disable_usm{"DISABLE_USM"}; -static constexpr Property disable_onednn_post_ops{"DISABLE_ONEDNN_POST_OPS"}; -static constexpr Property dump_profiling_data{"DUMP_PROFILING_DATA"}; -// static constexpr Property dump_graphs{"DUMP_GRAPHS"}; -static constexpr Property dump_sources{"DUMP_SOURCES"}; -static constexpr Property dump_tensors{"DUMP_TENSORS"}; -static constexpr Property dump_memory_pool{"DUMP_MEMORY_POOL"}; -static constexpr Property dump_iterations{"DUMP_ITERATIONS"}; -static constexpr Property host_time_profiling{"HOST_TIME_PROFILING"}; -// static constexpr Property max_kernels_per_batch{"MAX_KERNELS_PER_BATCH"}; -static constexpr Property impls_cache_capacity{"IMPLS_CACHE_CAPACITY"}; -static constexpr Property disable_async_compilation{"DISABLE_ASYNC_COMPILATION"}; -static constexpr Property disable_shape_agnostic_impls{"DISABLE_SHAPE_AGNOSTIC_IMPLS"}; -static constexpr Property disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"}; -static constexpr Property disable_memory_reuse{"DISABLE_MEMORY_REUSE"}; -static constexpr Property disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"}; -static constexpr Property disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"}; -static constexpr Property use_usm_host{"USE_USM_HOST"}; -static constexpr Property enable_kv_cache_compression{"ENABLE_KV_CACHE_COMPRESSION"}; -static constexpr Property asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"}; -static constexpr Property mem_prealloc_options{"MEM_PREALLOC_OPTIONS"}; -static constexpr Property load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"}; - -} // namespace intel_gpu -} // namespace ov - -#endif // GPU_DEBUG_CONFIG diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp new file mode 100644 index 00000000000000..a1bb0ac8b6e6a0 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp @@ -0,0 +1,28 @@ +// Copyright (C) 2022-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/runtime/plugin_config.hpp" +#include "openvino/runtime/properties.hpp" +#include "test_utils.h" + +using namespace cldnn; +using namespace ::tests; + +TEST(config_test, basic) { + ov::intel_gpu::NewExecutionConfig cfg; + std::cerr << cfg.to_string(); + + cfg.set_user_property(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)); + cfg.set_property(ov::hint::inference_precision(ov::element::f32)); + + std::cerr << "PROF: " << cfg.enable_profiling.value << std::endl; + + std::cerr << cfg.to_string(); + + std::cerr << cfg.get_property(ov::hint::inference_precision) << std::endl; + std::cerr << cfg.get_property(ov::hint::execution_mode) << std::endl; + +// std::cerr << get_prop() << std::endl; +// std::cerr << get_prop() << std::endl; +} diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp index b296242905b958..b27275d0f03d99 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp @@ -2,9 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "intel_gpu/runtime/execution_config.hpp" -#include "module_tests/config_gpu.hpp" -#include "openvino/runtime/properties.hpp" #include "test_utils.h" #include "intel_gpu/runtime/device.hpp" #include "runtime/ocl/ocl_device_detector.hpp" @@ -104,41 +101,3 @@ TEST(devices_test, sort_order_three_vendors) { ASSERT_EQ(expected_devices_order, actual_devices_order); } - -// class Test { -// public: -// int i; -// constexpr Test(int i) : i(i) {} -// }; - -// constexpr const Test test1(1); -// constexpr const Test test2(2); - -// template -// int get_prop() { -// static_assert(false, "FAIL"); -// } - -// template class prop, typename T, ov::PropertyMutability mutability> -// T get_prop() { -// static_assert(false, "FAIL"); -// } - - -TEST(config_test, basic) { - ov::intel_gpu::NewExecutionConfig cfg; - std::cerr << cfg.to_string(); - - cfg.set_user_property(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)); - cfg.set_property(ov::hint::inference_precision(ov::element::f32)); - - std::cerr << "PROF: " << cfg.enable_profiling.value << std::endl; - - std::cerr << cfg.to_string(); - - std::cerr << cfg.get_property(ov::hint::inference_precision) << std::endl; - std::cerr << cfg.get_property(ov::hint::execution_mode) << std::endl; - -// std::cerr << get_prop() << std::endl; -// std::cerr << get_prop() << std::endl; -} From 37b96988f3749e37c37e59ca24353d1301fa66c2 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 17 Dec 2024 17:14:52 +0400 Subject: [PATCH 03/18] env and config Signed-off-by: Vladimir Paramuzov --- .../intel_gpu/runtime/execution_config.hpp | 11 +-- .../intel_gpu/runtime/plugin_config.hpp | 9 +- .../src/runtime/execution_config.cpp | 32 +++---- .../intel_gpu/src/runtime/plugin_config.cpp | 86 ++++++++----------- .../tests/unit/module_tests/config_test.cpp | 4 + 5 files changed, 65 insertions(+), 77 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index 5e059b17da0e97..2654affe2626f5 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -57,12 +57,12 @@ class PropertyTypeValidator : public BaseValidator { } }; -class ExecutionConfig { +class OldExecutionConfig { public: - ExecutionConfig(); - ExecutionConfig(std::initializer_list values) : ExecutionConfig() { set_property(ov::AnyMap(values)); } - explicit ExecutionConfig(const ov::AnyMap& properties) : ExecutionConfig() { set_property(properties); } - explicit ExecutionConfig(const ov::AnyMap::value_type& property) : ExecutionConfig() { set_property(property); } + OldExecutionConfig(); + OldExecutionConfig(std::initializer_list values) : OldExecutionConfig() { set_property(ov::AnyMap(values)); } + explicit OldExecutionConfig(const ov::AnyMap& properties) : OldExecutionConfig() { set_property(properties); } + explicit OldExecutionConfig(const ov::AnyMap::value_type& property) : OldExecutionConfig() { set_property(property); } void set_default(); void set_property(const ov::AnyMap& properties); @@ -169,6 +169,7 @@ class ExecutionConfig { std::map property_validators; }; +using ExecutionConfig = OldExecutionConfig; } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp index 51e72da8be5923..6ea8f4e107bfc8 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp @@ -25,21 +25,16 @@ struct NewExecutionConfig : public ov::PluginConfig { #undef OV_CONFIG_OPTION - void finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) override; + void finalize_impl(std::shared_ptr context) override; + void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override; private: - // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call - // So this method should be called after setting all user properties, but before apply_user_properties() call. - void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info); - void apply_user_properties(const cldnn::device_info& info); void apply_hints(const cldnn::device_info& info); void apply_execution_hints(const cldnn::device_info& info); void apply_performance_hints(const cldnn::device_info& info); void apply_priority_hints(const cldnn::device_info& info); - void read_debug_options(const cldnn::device_info& info); }; - } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index a698ec7eb6c5a0..eadc070dcca03c 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -11,7 +11,7 @@ namespace ov { namespace intel_gpu { -ExecutionConfig::ExecutionConfig() { +OldExecutionConfig::OldExecutionConfig() { set_default(); } @@ -33,7 +33,7 @@ class PerformanceModeValidator : public BaseValidator { } }; -void ExecutionConfig::set_default() { +void OldExecutionConfig::set_default() { register_property( std::make_tuple(ov::device::id, "0"), std::make_tuple(ov::enable_profiling, false), @@ -85,13 +85,13 @@ void ExecutionConfig::set_default() { std::make_tuple(ov::intel_gpu::use_onednn, false)); } -void ExecutionConfig::register_property_impl(const std::pair& property, PropertyVisibility visibility, BaseValidator::Ptr validator) { +void OldExecutionConfig::register_property_impl(const std::pair& property, PropertyVisibility visibility, BaseValidator::Ptr validator) { property_validators[property.first] = validator; supported_properties[property.first] = visibility; internal_properties[property.first] = property.second; } -void ExecutionConfig::set_property(const AnyMap& config) { +void OldExecutionConfig::set_property(const AnyMap& config) { for (auto& kv : config) { auto& name = kv.first; auto& val = kv.second; @@ -101,18 +101,18 @@ void ExecutionConfig::set_property(const AnyMap& config) { } } -bool ExecutionConfig::is_supported(const std::string& name) const { +bool OldExecutionConfig::is_supported(const std::string& name) const { bool supported = supported_properties.find(name) != supported_properties.end(); bool has_validator = property_validators.find(name) != property_validators.end(); return supported && has_validator; } -bool ExecutionConfig::is_set_by_user(const std::string& name) const { +bool OldExecutionConfig::is_set_by_user(const std::string& name) const { return user_properties.find(name) != user_properties.end(); } -void ExecutionConfig::set_user_property(const AnyMap& config) { +void OldExecutionConfig::set_user_property(const AnyMap& config) { for (auto& kv : config) { auto& name = kv.first; auto& val = kv.second; @@ -124,7 +124,7 @@ void ExecutionConfig::set_user_property(const AnyMap& config) { } } -Any ExecutionConfig::get_property(const std::string& name) const { +Any OldExecutionConfig::get_property(const std::string& name) const { if (user_properties.find(name) != user_properties.end()) { return user_properties.at(name); } @@ -133,7 +133,7 @@ Any ExecutionConfig::get_property(const std::string& name) const { return internal_properties.at(name); } -void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { +void OldExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::execution_mode)) { const auto mode = get_property(ov::hint::execution_mode); if (!is_set_by_user(ov::hint::inference_precision)) { @@ -149,7 +149,7 @@ void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { } } -void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { +void OldExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::performance_mode)) { const auto mode = get_property(ov::hint::performance_mode); if (!is_set_by_user(ov::num_streams)) { @@ -178,7 +178,7 @@ void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { } } -void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { +void OldExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::model_priority)) { const auto priority = get_property(ov::hint::model_priority); if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { @@ -187,7 +187,7 @@ void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { } } -void ExecutionConfig::apply_debug_options(const cldnn::device_info& info) { +void OldExecutionConfig::apply_debug_options(const cldnn::device_info& info) { GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); @@ -222,14 +222,14 @@ void ExecutionConfig::apply_debug_options(const cldnn::device_info& info) { } } -void ExecutionConfig::apply_hints(const cldnn::device_info& info) { +void OldExecutionConfig::apply_hints(const cldnn::device_info& info) { apply_execution_hints(info); apply_performance_hints(info); apply_priority_hints(info); apply_debug_options(info); } -void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { +void OldExecutionConfig::apply_user_properties(const cldnn::device_info& info) { // Copy internal properties before applying hints to ensure that // a property set by hint won't be overriden by a value in user config. // E.g num_streams=AUTO && hint=THROUGHPUT @@ -262,7 +262,7 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { user_properties.clear(); } -void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info) { +void OldExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info) { if (!info.supports_immad) { apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); @@ -270,7 +270,7 @@ void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RT apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); } -std::string ExecutionConfig::to_string() const { +std::string OldExecutionConfig::to_string() const { std::stringstream s; s << "internal properties:\n"; for (auto& kv : internal_properties) { diff --git a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp index 0a3c49e6387104..9aa975d83923a3 100644 --- a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp @@ -22,11 +22,43 @@ NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() { #undef OV_CONFIG_OPTION } -void NewExecutionConfig::finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) { - const auto& device_info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); - read_debug_options(device_info); - apply_user_properties(device_info); - apply_rt_info(device_info, rt_info); +void NewExecutionConfig::apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) { + const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + if (!info.supports_immad) { + apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); + apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); + } + apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); +} + +void NewExecutionConfig::finalize_impl(std::shared_ptr context) { + const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + apply_hints(info); + if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { + set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); + } + if (info.supports_immad) { + set_property(ov::intel_gpu::use_onednn(true)); + } + if (get_property(ov::intel_gpu::use_onednn)) { + set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + } + + // Enable KV-cache compression by default for non-systolic platforms + if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { + set_property(ov::hint::kv_cache_precision(ov::element::i8)); + } + + // Enable dynamic quantization by default for non-systolic platforms + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { + set_property(ov::hint::dynamic_quantization_group_size(32)); + } +} + +void NewExecutionConfig::apply_hints(const cldnn::device_info& info) { + apply_execution_hints(info); + apply_performance_hints(info); + apply_priority_hints(info); } void NewExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { @@ -83,49 +115,5 @@ void NewExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { } } -void NewExecutionConfig::read_debug_options(const cldnn::device_info& info) { - ov::AnyMap config_properties; - set_user_property(config_properties); - ov::AnyMap env_properties; - set_user_property(env_properties); -} - -void NewExecutionConfig::apply_hints(const cldnn::device_info& info) { - apply_execution_hints(info); - apply_performance_hints(info); - apply_priority_hints(info); -} - -void NewExecutionConfig::apply_user_properties(const cldnn::device_info& info) { - apply_hints(info); - if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { - set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); - } - if (info.supports_immad) { - set_property(ov::intel_gpu::use_onednn(true)); - } - if (get_property(ov::intel_gpu::use_onednn)) { - set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); - } - - // Enable KV-cache compression by default for non-systolic platforms - if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); - } - - // Enable dynamic quantization by default for non-systolic platforms - if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { - set_property(ov::hint::dynamic_quantization_group_size(32)); - } -} - -void NewExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info) { - if (!info.supports_immad) { - apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); - apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); - } - apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); -} - } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp index a1bb0ac8b6e6a0..930128ef53bff6 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/plugin/remote_context.hpp" #include "intel_gpu/runtime/plugin_config.hpp" #include "openvino/runtime/properties.hpp" #include "test_utils.h" @@ -23,6 +24,9 @@ TEST(config_test, basic) { std::cerr << cfg.get_property(ov::hint::inference_precision) << std::endl; std::cerr << cfg.get_property(ov::hint::execution_mode) << std::endl; + auto ctx = std::make_shared("GPU", std::vector{ get_test_engine().get_device() }); + cfg.finalize(ctx, {}); + std::cerr << cfg.to_string(); // std::cerr << get_prop() << std::endl; // std::cerr << get_prop() << std::endl; } From 1a02f3f713a2020ee240695703b383d3f81ac330 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 18 Dec 2024 16:28:20 +0400 Subject: [PATCH 04/18] Replace old config & fixes Signed-off-by: Vladimir Paramuzov --- src/inference/CMakeLists.txt | 2 +- .../openvino/runtime/plugin_config.hpp | 25 ++++- src/inference/src/dev/plugin_config.cpp | 106 +++++++++++++++++- .../intel_gpu/runtime/execution_config.hpp | 3 +- .../intel_gpu/runtime/plugin_config.hpp | 6 + src/plugins/intel_gpu/src/graph/program.cpp | 13 ++- src/plugins/intel_gpu/src/plugin/plugin.cpp | 25 +++-- .../intel_gpu/src/runtime/plugin_config.cpp | 17 ++- 8 files changed, 171 insertions(+), 26 deletions(-) diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt index 5f40c6fa54b6d0..1358a843a1863e 100644 --- a/src/inference/CMakeLists.txt +++ b/src/inference/CMakeLists.txt @@ -87,7 +87,7 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE # for ov_plugins.hpp $,$>,${CMAKE_CURRENT_BINARY_DIR}/$,${CMAKE_CURRENT_BINARY_DIR}>) -target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt openvino::util openvino::core::dev) +target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt openvino::util openvino::core::dev nlohmann_json::nlohmann_json) ov_mark_target_as_cc(${TARGET_NAME}_obj) # OpenVINO Runtime is public API => need to mark this library as important for ABI free diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index acccd0bf343604..16985d70b2841a 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -62,7 +62,8 @@ struct ConfigOption : public ConfigOptionBase { bool is_valid_value(ov::Any val) override { try { - return validator ? validator(val.as()) : true; + auto v = val.as(); + return validator ? validator(v) : true; } catch (std::exception&) { return false; } @@ -74,6 +75,7 @@ struct ConfigOption : public ConfigOptionBase { // Base class for configuration of plugins // Implementation should provide a list of properties with default values and validators (optional) +// and prepare a map string property name -> ConfigOptionBase pointer // For the sake of efficiency, we expect that plugin properties are defined as class members of the derived class // and accessed directly in the plugin's code (i.e. w/o get_property()/set_property() calls) // get/set property members are provided to handle external property access @@ -91,9 +93,14 @@ struct ConfigOption : public ConfigOptionBase { class OPENVINO_RUNTIME_API PluginConfig { public: PluginConfig() {} - PluginConfig(std::initializer_list values) : PluginConfig() { set_property(ov::AnyMap(values)); } - explicit PluginConfig(const ov::AnyMap& properties) : PluginConfig() { set_property(properties); } - explicit PluginConfig(const ov::AnyMap::value_type& property) : PluginConfig() { set_property(property); } + virtual ~PluginConfig() = default; + + // Disable copy and move as we need to setup m_options_map properly and ensure that + // values are a part of current config object + PluginConfig(const PluginConfig& other) = delete; + PluginConfig& operator=(const PluginConfig& other) = delete; + PluginConfig(PluginConfig&& other) = delete; + PluginConfig& operator=(PluginConfig&& other) = delete; void set_property(const ov::AnyMap& properties); Any get_property(const std::string& name) const; @@ -118,9 +125,12 @@ class OPENVINO_RUNTIME_API PluginConfig { std::string to_string() const; void finalize(std::shared_ptr context, const ov::RTMap& rt_info); - virtual void finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) = 0; protected: + virtual void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) {} + virtual void apply_debug_options(std::shared_ptr context); + virtual void finalize_impl(std::shared_ptr context) {} + template bool is_set_by_user(const ov::Property& property) const { return user_properties.find(property.name()) != user_properties.end(); @@ -135,6 +145,11 @@ class OPENVINO_RUNTIME_API PluginConfig { } } } + + ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const; + ov::AnyMap read_env(const std::vector& prefixes) const; + void cleanup_unsupported(ov::AnyMap& config) const; + std::map m_options_map; // List of properties explicitly set by user via Core::set_property() or Core::compile_model() or ov::Model's runtime info diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index 9f169c07663a40..c3ac86e05ba04b 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -1,10 +1,20 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "openvino/runtime/plugin_config.hpp" +#include "openvino/core/any.hpp" #include "openvino/core/except.hpp" +#include "openvino/runtime/device_id_parser.hpp" +#include "openvino/util/common_util.hpp" +#include "openvino/util/env_util.hpp" +#include +#ifdef JSON_HEADER +# include +#else +# include +#endif namespace ov { @@ -15,7 +25,8 @@ void PluginConfig::set_property(const AnyMap& config) { const auto& known_options = m_options_map; auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); - OPENVINO_ASSERT(it != known_options.end()); + OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); + OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); it->second->set_any(val); } @@ -25,6 +36,7 @@ ov::Any PluginConfig::get_property(const std::string& name) const { const auto& known_options = m_options_map; auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); + OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); return it->second->get_any(); } @@ -37,6 +49,7 @@ void PluginConfig::set_user_property(const AnyMap& config) { const auto& known_options = m_options_map; auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); + OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); OPENVINO_ASSERT(it->second->is_valid_value(val), "Invalid value: ", val.as(), " for property: ", name); user_properties[name] = val; @@ -44,6 +57,8 @@ void PluginConfig::set_user_property(const AnyMap& config) { } void PluginConfig::finalize(std::shared_ptr context, const ov::RTMap& rt_info) { + apply_rt_info(context, rt_info); + apply_debug_options(context); // Copy internal properties before applying hints to ensure that // a property set by hint won't be overriden by a value in user config. // E.g num_streams=AUTO && hint=THROUGHPUT @@ -54,12 +69,95 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R option->set_any(prop.second); } - finalize_impl(context, rt_info); + finalize_impl(context); // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization user_properties.clear(); } +void PluginConfig::apply_debug_options(std::shared_ptr context) { + ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); + cleanup_unsupported(config_properties); + set_user_property(config_properties); + ov::AnyMap env_properties = read_env({"OV_"}); + set_user_property(env_properties); +} + +ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const { + ov::AnyMap config; + + std::ifstream ifs(filename); + if (!ifs.is_open()) { + return config; + } + + nlohmann::json json_config; + try { + ifs >> json_config; + } catch (const std::exception& e) { + return config; + } + + DeviceIDParser parser(target_device_name); + for (auto item = json_config.cbegin(), end = json_config.cend(); item != end; ++item) { + const std::string& device_name = item.key(); + if (DeviceIDParser(device_name).get_device_name() != parser.get_device_name()) + continue; + + const auto& item_value = item.value(); + for (auto option = item_value.cbegin(), item_value_end = item_value.cend(); option != item_value_end; ++option) { + config[option.key()] = option.value().get(); + } + } + + return config; +} + +ov::AnyMap PluginConfig::read_env(const std::vector& prefixes) const { + ov::AnyMap config; + + for (auto& kv : m_options_map) { + for (auto& prefix : prefixes) { + auto var_name = prefix + kv.first; + const auto& val = ov::util::getenv_string(var_name.c_str()); + + if (!val.empty()) { + if (dynamic_cast*>(kv.second) != nullptr) { + const std::set off = {"0", "false", "off", "no"}; + const std::set on = {"1", "true", "on", "yes"}; + + const auto& val_lower = ov::util::to_lower(val); + if (off.count(val_lower)) { + config[kv.first] = false; + } else if (on.count(val_lower)) { + config[kv.first] = true; + } else { + OPENVINO_THROW("Unexpected value for boolean property: ", val); + } + } else { + config[kv.first] = val; + } + break; + } + } + } + + return config; +} + +void PluginConfig::cleanup_unsupported(ov::AnyMap& config) const { + for (auto it = config.begin(); it != config.end();) { + const auto& known_options = m_options_map; + auto& name = it->first; + auto opt_it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); + if (opt_it == known_options.end()) { + it = config.erase(it); + } else { + ++it; + } + } +} + std::string PluginConfig::to_string() const { std::stringstream s; @@ -67,7 +165,7 @@ std::string PluginConfig::to_string() const { s << "PROPERTIES:\n"; for (const auto& option : m_options_map) { - s << "\t" << option.first << ":" << option.second->get_any().as() << std::endl; + s << "\t" << option.first << ": " << option.second->get_any().as() << std::endl; } s << "USER PROPERTIES:\n"; for (const auto& user_prop : user_properties) { diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index 2654affe2626f5..b9f2cdd27f8283 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -6,6 +6,7 @@ #include "intel_gpu/runtime/internal_properties.hpp" #include "intel_gpu/runtime/device.hpp" +#include "intel_gpu/runtime/plugin_config.hpp" namespace ov { namespace intel_gpu { @@ -169,7 +170,7 @@ class OldExecutionConfig { std::map property_validators; }; -using ExecutionConfig = OldExecutionConfig; +using ExecutionConfig = NewExecutionConfig; } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp index 6ea8f4e107bfc8..5931a60ffae37a 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp @@ -15,6 +15,12 @@ namespace intel_gpu { struct NewExecutionConfig : public ov::PluginConfig { NewExecutionConfig(); + NewExecutionConfig(std::initializer_list values) : NewExecutionConfig() { set_property(ov::AnyMap(values)); } + explicit NewExecutionConfig(const ov::AnyMap& properties) : NewExecutionConfig() { set_property(properties); } + explicit NewExecutionConfig(const ov::AnyMap::value_type& property) : NewExecutionConfig() { set_property(property); } + + NewExecutionConfig(const NewExecutionConfig& other); + NewExecutionConfig& operator=(const NewExecutionConfig& other); #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ ConfigOption PropertyVar = \ diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index bdffb9c4980722..33afd9edcf1d4f 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -3,6 +3,7 @@ // #include "impls/registry/implementation_manager.hpp" +#include "intel_gpu/plugin/remote_context.hpp" #include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/core/type.hpp" #include "openvino/runtime/system_conf.hpp" @@ -161,7 +162,8 @@ program::program(engine& engine_ref, program_node::reset_unique_id(); if (no_optimizations) { init_graph(); - _config.apply_user_properties(_engine.get_device_info()); + auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); + _config.finalize(ctx, {}); } else { build_program(is_internal); if (_is_body_program) { @@ -197,7 +199,8 @@ program::program(engine& engine_ref, _task_executor(std::move(task_executor)), processing_order(), is_internal(is_internal) { - _config.apply_user_properties(_engine.get_device_info()); + auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); + _config.finalize(ctx, {}); init_primitives(); init_program(); prepare_nodes(nodes); @@ -210,7 +213,8 @@ program::program(engine& engine, const ExecutionConfig& config) _config(config), processing_order() { init_primitives(); - _config.apply_user_properties(_engine.get_device_info()); + auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); + _config.finalize(ctx, {}); new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer); _layout_optimizer = cldnn::make_unique(); } @@ -496,7 +500,8 @@ void program::set_options() { void program::build_program(bool is_internal) { init_graph(); - _config.apply_user_properties(_engine.get_device_info()); + auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); + _config.finalize(ctx, {}); { pre_optimize_graph(is_internal); } run_graph_compilation(); { post_optimize_graph(is_internal); } diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 5650f5a66a2ae6..8e238e9cb1aed5 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -25,6 +25,7 @@ #include "intel_gpu/runtime/device_query.hpp" #include "intel_gpu/runtime/execution_config.hpp" #include "intel_gpu/runtime/itt.hpp" +#include "openvino/core/any.hpp" #include "openvino/core/deprecated.hpp" #include "openvino/pass/manager.hpp" #include "openvino/pass/visualize_tree.hpp" @@ -52,6 +53,16 @@ using Time = std::chrono::high_resolution_clock; namespace ov { namespace intel_gpu { +namespace { + +ov::RTMap get_rt_info(const ov::Model& model) { + if (model.has_rt_info("runtime_options")) + return model.get_rt_info("runtime_options"); + return {}; +} + +} // namespace + #define FACTORY_DECLARATION(op_version, op_name) \ void __register ## _ ## op_name ## _ ## op_version(); @@ -189,9 +200,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(orig_config); - if (model->has_rt_info("runtime_options")) - config.apply_rt_info(context->get_engine().get_device_info(), model->get_rt_info("runtime_options")); - config.apply_user_properties(context->get_engine().get_device_info()); + config.finalize(context, get_rt_info(*model)); set_cache_info(model, config); @@ -211,9 +220,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] LoadExeNetworkImpl: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(orig_config); - config.apply_user_properties(context_impl->get_engine().get_device_info()); - + config.finalize(context_impl, get_rt_info(*model)); set_cache_info(model, config); auto transformed_model = clone_and_transform_model(model, config, context_impl); @@ -280,9 +287,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(orig_config); - if (model->has_rt_info("runtime_options")) - config.apply_rt_info(ctx->get_engine().get_device_info(), model->get_rt_info("runtime_options")); - config.apply_user_properties(ctx->get_engine().get_device_info()); + config.finalize(ctx, get_rt_info(*model)); ProgramBuilder prog(ctx->get_engine(), config); @@ -337,7 +342,7 @@ std::shared_ptr Plugin::import_model(std::istream& model, ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(_orig_config); - config.apply_user_properties(context_impl->get_engine().get_device_info()); + config.finalize(context_impl, {}); ov::CacheMode cache_mode = config.get_property(ov::cache_mode); ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks); diff --git a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp index 9aa975d83923a3..330d3ed40c2175 100644 --- a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -22,6 +22,21 @@ NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() { #undef OV_CONFIG_OPTION } +NewExecutionConfig::NewExecutionConfig(const NewExecutionConfig& other) : NewExecutionConfig() { + user_properties = other.user_properties; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); + } +} + +NewExecutionConfig& NewExecutionConfig::operator=(const NewExecutionConfig& other) { + user_properties = other.user_properties; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); + } + return *this; +} + void NewExecutionConfig::apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) { const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); if (!info.supports_immad) { From 1fe84e25b38b8e248e5722a05f09ce6ea87b13f3 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 18 Dec 2024 17:34:47 +0400 Subject: [PATCH 05/18] prefix for config members and unit tests Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 3 + src/inference/src/dev/plugin_config.cpp | 9 +- src/inference/tests/unit/config_test.cpp | 202 ++++++++++++++++++ .../intel_gpu/runtime/plugin_config.hpp | 2 +- .../intel_gpu/src/runtime/plugin_config.cpp | 2 +- .../tests/unit/module_tests/config_test.cpp | 3 +- 6 files changed, 215 insertions(+), 6 deletions(-) create mode 100644 src/inference/tests/unit/config_test.cpp diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 16985d70b2841a..78f8da4fe61ca2 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -118,6 +118,9 @@ class OPENVINO_RUNTIME_API PluginConfig { template T get_property(const ov::Property& property) const { + if (is_set_by_user(property)) { + return user_properties.at(property.name()).template as(); + } OPENVINO_ASSERT(m_options_map.find(property.name()) != m_options_map.end(), "Property not found: ", property.name()); return static_cast*>(m_options_map.at(property.name()))->value; } diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index c3ac86e05ba04b..c4489cdc1bc69f 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -76,9 +76,12 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R } void PluginConfig::apply_debug_options(std::shared_ptr context) { - ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); - cleanup_unsupported(config_properties); - set_user_property(config_properties); + if (context) { + ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); + cleanup_unsupported(config_properties); + set_user_property(config_properties); + } + ov::AnyMap env_properties = read_env({"OV_"}); set_user_property(env_properties); } diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp new file mode 100644 index 00000000000000..db832247dd2bd6 --- /dev/null +++ b/src/inference/tests/unit/config_test.cpp @@ -0,0 +1,202 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/core/any.hpp" +#include "openvino/runtime/plugin_config.hpp" + +#include +#include + +#include "common_test_utils/common_utils.hpp" + +using namespace ::testing; +using namespace ov; + +static constexpr Property unsupported_property{"UNSUPPORTED_PROPERTY"}; +static constexpr Property bool_property{"BOOL_PROPERTY"}; +static constexpr Property int_property{"INT_PROPERTY"}; +static constexpr Property high_level_property{"HIGH_LEVEL_PROPERTY"}; +static constexpr Property low_level_property{"LOW_LEVEL_PROPERTY"}; + + +struct EmptyTestConfig : public ov::PluginConfig { + std::vector get_supported_properties() const { + std::vector supported_properties; + for (const auto& kv : m_options_map) { + supported_properties.push_back(kv.first); + } + return supported_properties; + } +}; + +struct NotEmptyTestConfig : public ov::PluginConfig { + NotEmptyTestConfig() { + m_options_map[bool_property.name()] = &m_bool_property; + m_options_map[int_property.name()] = &m_int_property; + m_options_map[high_level_property.name()] = &m_high_level_property; + m_options_map[low_level_property.name()] = &m_low_level_property; + } + + NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() { + user_properties = other.user_properties; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); + } + } + + ConfigOption m_bool_property = ConfigOption(true); + ConfigOption m_int_property = ConfigOption(-1); + ConfigOption m_high_level_property = ConfigOption(""); + ConfigOption m_low_level_property = ConfigOption(""); + + std::vector get_supported_properties() const { + std::vector supported_properties; + for (const auto& kv : m_options_map) { + supported_properties.push_back(kv.first); + } + return supported_properties; + } + + void finalize_impl(std::shared_ptr context) override { + if (!is_set_by_user(low_level_property)) { + m_low_level_property.value = m_high_level_property.value; + } + } + + void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override { + apply_rt_info_property(high_level_property, rt_info); + } + + using ov::PluginConfig::is_set_by_user; +}; + +TEST(plugin_config, can_create_empty_config) { + ASSERT_NO_THROW( + EmptyTestConfig cfg; + ASSERT_EQ(cfg.get_supported_properties().size(), 0); + ); +} + +TEST(plugin_config, can_create_not_empty_config) { + ASSERT_NO_THROW( + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_supported_properties().size(), 4); + ); +} + +TEST(plugin_config, can_set_get_property) { + NotEmptyTestConfig cfg; + ASSERT_NO_THROW(cfg.get_property(bool_property)); + ASSERT_EQ(cfg.get_property(bool_property), true); + ASSERT_NO_THROW(cfg.set_property(bool_property(false))); + ASSERT_EQ(cfg.get_property(bool_property), false); + + ASSERT_NO_THROW(cfg.set_user_property(bool_property(true))); + ASSERT_EQ(cfg.get_property(bool_property), true); +} + +TEST(plugin_config, throw_for_unsupported_property) { + NotEmptyTestConfig cfg; + ASSERT_ANY_THROW(cfg.get_property(unsupported_property)); + ASSERT_ANY_THROW(cfg.set_property(unsupported_property(10.0f))); + ASSERT_ANY_THROW(cfg.set_user_property(unsupported_property(10.0f))); +} + +TEST(plugin_config, can_direct_access_to_properties) { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.m_bool_property.value, cfg.get_property(bool_property)); + ASSERT_NO_THROW(cfg.set_property(bool_property(false))); + ASSERT_EQ(cfg.m_bool_property.value, cfg.get_property(bool_property)); + ASSERT_EQ(cfg.m_bool_property.value, false); + + ASSERT_NO_THROW(cfg.set_user_property(bool_property(true))); + ASSERT_EQ(cfg.m_bool_property.value, false); // user property doesn't impact member value until finalize() is called + + cfg.m_bool_property.value = true; + ASSERT_EQ(cfg.get_property(bool_property), true); +} + +TEST(plugin_config, finalization_updates_member) { + NotEmptyTestConfig cfg; + ASSERT_NO_THROW(cfg.set_user_property(bool_property(false))); + ASSERT_EQ(cfg.m_bool_property.value, true); // user property doesn't impact member value until finalize() is called + + cfg.finalize(nullptr, {}); + + ASSERT_EQ(cfg.m_bool_property.value, false); // now the value has changed +} + +TEST(plugin_config, get_property_before_finalization_returns_user_property_if_set) { + NotEmptyTestConfig cfg; + + ASSERT_EQ(cfg.get_property(bool_property), true); // default value + ASSERT_EQ(cfg.m_bool_property.value, true); // default value + + cfg.m_bool_property.value = false; // update member directly + ASSERT_EQ(cfg.get_property(bool_property), false); // OK, return the class member value as no user property was set + + ASSERT_NO_THROW(cfg.set_user_property(bool_property(true))); + ASSERT_TRUE(cfg.is_set_by_user(bool_property)); + ASSERT_EQ(cfg.get_property(bool_property), true); // now user property value is returned + ASSERT_EQ(cfg.m_bool_property.value, false); // but class member is not updated + + cfg.finalize(nullptr, {}); + ASSERT_EQ(cfg.get_property(bool_property), cfg.m_bool_property.value); // equal after finalization + ASSERT_FALSE(cfg.is_set_by_user(bool_property)); // and user property is cleared +} + +TEST(plugin_config, finalization_updates_dependant_properties) { + NotEmptyTestConfig cfg; + + cfg.set_user_property(high_level_property("value1")); + ASSERT_TRUE(cfg.is_set_by_user(high_level_property)); + ASSERT_FALSE(cfg.is_set_by_user(low_level_property)); + + cfg.finalize(nullptr, {}); + ASSERT_EQ(cfg.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg.m_low_level_property.value, "value1"); + ASSERT_FALSE(cfg.is_set_by_user(high_level_property)); + ASSERT_FALSE(cfg.is_set_by_user(low_level_property)); +} + +TEST(plugin_config, can_set_property_from_rt_info) { + NotEmptyTestConfig cfg; + + RTMap rt_info = { + {high_level_property.name(), "value1"}, + {int_property.name(), 10} // int_property is not applied from rt info + }; + + // default values + ASSERT_EQ(cfg.m_high_level_property.value, ""); + ASSERT_EQ(cfg.m_low_level_property.value, ""); + ASSERT_EQ(cfg.m_int_property.value, -1); + + cfg.finalize(nullptr, rt_info); + + ASSERT_EQ(cfg.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg.m_low_level_property.value, "value1"); // dependant is updated too + ASSERT_EQ(cfg.m_int_property.value, -1); // still default +} + +TEST(plugin_config, can_copy_config) { + NotEmptyTestConfig cfg1; + + cfg1.m_high_level_property.value = "value1"; + cfg1.m_low_level_property.value = "value2"; + cfg1.m_int_property.value = 1; + cfg1.set_user_property(bool_property(false)); + + NotEmptyTestConfig cfg2 = cfg1; + ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg2.m_low_level_property.value, "value2"); + ASSERT_EQ(cfg2.m_int_property.value, 1); + ASSERT_EQ(cfg2.get_property(bool_property), false); // ensure user properties are copied too + + // check that cfg1 modification doesn't impact a copy + cfg1.set_property(high_level_property("value3")); + cfg1.m_int_property.value = 3; + ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg2.m_int_property.value, 1); +} diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp index 5931a60ffae37a..f18b32cd8b7cbb 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp @@ -23,7 +23,7 @@ struct NewExecutionConfig : public ov::PluginConfig { NewExecutionConfig& operator=(const NewExecutionConfig& other); #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ - ConfigOption PropertyVar = \ + ConfigOption m_ ## PropertyVar = \ ConfigOption(GET_EXCEPT_LAST(__VA_ARGS__)); #include "options_release.inl" diff --git a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp index 330d3ed40c2175..8f4319734d3e9f 100644 --- a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp @@ -14,7 +14,7 @@ namespace intel_gpu { NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() { #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ - m_options_map[PropertyNamespace::PropertyVar.name()] = &PropertyVar; + m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar; #include "intel_gpu/runtime/options_release.inl" #include "intel_gpu/runtime/options_debug.inl" diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp index 930128ef53bff6..b14c5b0bf4623d 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp @@ -14,10 +14,11 @@ TEST(config_test, basic) { ov::intel_gpu::NewExecutionConfig cfg; std::cerr << cfg.to_string(); + std::cerr << cfg.get_property("PERFORMANCE_HINT").as(); cfg.set_user_property(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)); cfg.set_property(ov::hint::inference_precision(ov::element::f32)); - std::cerr << "PROF: " << cfg.enable_profiling.value << std::endl; + std::cerr << "PROF: " << cfg.m_enable_profiling.value << std::endl; std::cerr << cfg.to_string(); From 6dbd32ebcec57258269933932e9dafef1e9c5fa9 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Thu, 19 Dec 2024 14:25:26 +0400 Subject: [PATCH 06/18] added visibility for options Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 50 ++++++++++++- src/inference/src/dev/plugin_config.cpp | 43 ++++++----- src/inference/tests/unit/config_test.cpp | 45 +++++++++--- .../include/intel_gpu/graph/program.hpp | 4 +- .../intel_gpu/runtime/internal_properties.hpp | 8 +-- .../include/intel_gpu/runtime/options.inl | 71 +++++++++++++++++++ .../intel_gpu/runtime/options_debug.inl | 31 -------- .../intel_gpu/runtime/options_release.inl | 50 ------------- .../intel_gpu/runtime/plugin_config.hpp | 9 +-- .../intel_gpu/src/plugin/ops/condition.cpp | 1 - src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 1 - .../src/runtime/execution_config.cpp | 7 -- .../intel_gpu/src/runtime/plugin_config.cpp | 8 +-- .../tests/unit/fusions/gemm_fusion_test.cpp | 1 - .../tests/unit/fusions/loop_fusion_test.cpp | 1 - .../tests/unit/test_cases/crop_gpu_test.cpp | 1 - .../tests/unit/test_cases/loop_gpu_test.cpp | 5 +- 17 files changed, 189 insertions(+), 147 deletions(-) create mode 100644 src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 78f8da4fe61ca2..36b6765849ee8e 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -33,8 +33,40 @@ #define GET_EXCEPT_LAST(...) EXPAND(GET_EXCEPT_LAST_IMPL(COUNT(__VA_ARGS__), __VA_ARGS__)) +#define OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ + ConfigOption m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)}; + +#define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \ + m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar; + +#define OV_CONFIG_RELEASE_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE, __VA_ARGS__) + +#define OV_CONFIG_RELEASE_INTERNAL_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE_INTERNAL, __VA_ARGS__) + +#define OV_CONFIG_DEBUG_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__) + namespace ov { +enum class OptionVisibility { + RELEASE = 0, // Option can be set for any build type via public interface, environment and config file + RELEASE_INTERNAL = 1, // Option can be set for any build type via environment and config file only + DEBUG = 2, // Option can be set for debug builds only via environment and config file +}; + +inline std::ostream& operator<<(std::ostream& os, const OptionVisibility& visibility) { + switch (visibility) { + case OptionVisibility::RELEASE: os << "RELEASE"; break; + case OptionVisibility::RELEASE_INTERNAL: os << "RELEASE_INTERNAL"; break; + case OptionVisibility::DEBUG: os << "DEBUG"; break; + default: os << "UNKNOWN"; break; + } + + return os; +} + struct ConfigOptionBase { explicit ConfigOptionBase() {} virtual ~ConfigOptionBase() = default; @@ -42,13 +74,15 @@ struct ConfigOptionBase { virtual void set_any(const ov::Any any) = 0; virtual ov::Any get_any() const = 0; virtual bool is_valid_value(ov::Any val) = 0; + virtual OptionVisibility get_visibility() const = 0; }; -template +template struct ConfigOption : public ConfigOptionBase { ConfigOption(const T& default_val, std::function validator = nullptr) : ConfigOptionBase(), value(default_val), validator(validator) {} T value; + constexpr static const auto visibility = visibility_; void set_any(const ov::Any any) override { if (validator) @@ -69,6 +103,10 @@ struct ConfigOption : public ConfigOptionBase { } } + OptionVisibility get_visibility() const override { + return visibility; + } + private: std::function validator; }; @@ -139,6 +177,14 @@ class OPENVINO_RUNTIME_API PluginConfig { return user_properties.find(property.name()) != user_properties.end(); } + ConfigOptionBase* get_option_ptr(const std::string& name) const { + auto it = m_options_map.find(name); + OPENVINO_ASSERT(it != m_options_map.end(), "Option not found: ", name); + OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); + + return it->second; + } + template void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { if (!is_set_by_user(property)) { @@ -149,6 +195,8 @@ class OPENVINO_RUNTIME_API PluginConfig { } } + void set_user_property(const ov::AnyMap& properties, const std::vector& allowed_visibility); + ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const; ov::AnyMap read_env(const std::vector& prefixes) const; void cleanup_unsupported(ov::AnyMap& config) const; diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index c4489cdc1bc69f..cfc48745f677f5 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -23,34 +23,31 @@ void PluginConfig::set_property(const AnyMap& config) { auto& name = kv.first; auto& val = kv.second; - const auto& known_options = m_options_map; - auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); - OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); - OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); - - it->second->set_any(val); + auto option = get_option_ptr(name); + option->set_any(val); } } ov::Any PluginConfig::get_property(const std::string& name) const { - const auto& known_options = m_options_map; - auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); - OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); - OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); - - return it->second->get_any(); + auto option = get_option_ptr(name); + return option->get_any(); } void PluginConfig::set_user_property(const AnyMap& config) { + static std::vector allowed_visibility = {OptionVisibility::RELEASE}; + set_user_property(config, allowed_visibility); +} + +void PluginConfig::set_user_property(const ov::AnyMap& config, const std::vector& allowed_visibility) { for (auto& kv : config) { auto& name = kv.first; auto& val = kv.second; - const auto& known_options = m_options_map; - auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); - OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); - OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); - OPENVINO_ASSERT(it->second->is_valid_value(val), "Invalid value: ", val.as(), " for property: ", name); + auto option = get_option_ptr(name); + if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) { + OPENVINO_THROW("Unkown property: ", name); + } + OPENVINO_ASSERT(option->is_valid_value(val), "Invalid value: ", val.as(), " for property: ", name); user_properties[name] = val; } @@ -76,14 +73,22 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R } void PluginConfig::apply_debug_options(std::shared_ptr context) { + static std::vector allowed_visibility = { + OptionVisibility::RELEASE, + OptionVisibility::RELEASE_INTERNAL, +#ifdef ENABLE_DEBUG_CAPS + OptionVisibility::DEBUG +#endif + }; + if (context) { ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); cleanup_unsupported(config_properties); - set_user_property(config_properties); + set_user_property(config_properties, allowed_visibility); } ov::AnyMap env_properties = read_env({"OV_"}); - set_user_property(env_properties); + set_user_property(env_properties, allowed_visibility); } ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const { diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index db832247dd2bd6..0feeef707a2779 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -8,8 +8,6 @@ #include #include -#include "common_test_utils/common_utils.hpp" - using namespace ::testing; using namespace ov; @@ -18,6 +16,8 @@ static constexpr Property bool_property{"BOOL_PROP static constexpr Property int_property{"INT_PROPERTY"}; static constexpr Property high_level_property{"HIGH_LEVEL_PROPERTY"}; static constexpr Property low_level_property{"LOW_LEVEL_PROPERTY"}; +static constexpr Property release_internal_property{"RELEASE_INTERNAL_PROPERTY"}; +static constexpr Property debug_property{"DEBUG_PROPERTY"}; struct EmptyTestConfig : public ov::PluginConfig { @@ -32,10 +32,15 @@ struct EmptyTestConfig : public ov::PluginConfig { struct NotEmptyTestConfig : public ov::PluginConfig { NotEmptyTestConfig() { - m_options_map[bool_property.name()] = &m_bool_property; - m_options_map[int_property.name()] = &m_int_property; - m_options_map[high_level_property.name()] = &m_high_level_property; - m_options_map[low_level_property.name()] = &m_low_level_property; + #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + OV_CONFIG_RELEASE_OPTION(, bool_property, true, "") + OV_CONFIG_RELEASE_OPTION(, int_property, -1, "") + OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "") + OV_CONFIG_RELEASE_OPTION(, low_level_property, "", "") + OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "") + OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "") + #undef OV_CONFIG_OPTION + } NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() { @@ -45,10 +50,14 @@ struct NotEmptyTestConfig : public ov::PluginConfig { } } - ConfigOption m_bool_property = ConfigOption(true); - ConfigOption m_int_property = ConfigOption(-1); - ConfigOption m_high_level_property = ConfigOption(""); - ConfigOption m_low_level_property = ConfigOption(""); + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + OV_CONFIG_RELEASE_OPTION(, bool_property, true, "") + OV_CONFIG_RELEASE_OPTION(, int_property, -1, "") + OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "") + OV_CONFIG_RELEASE_OPTION(, low_level_property, "", "") + OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "") + OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "") + #undef OV_CONFIG_OPTION std::vector get_supported_properties() const { std::vector supported_properties; @@ -68,6 +77,7 @@ struct NotEmptyTestConfig : public ov::PluginConfig { apply_rt_info_property(high_level_property, rt_info); } + using ov::PluginConfig::get_option_ptr; using ov::PluginConfig::is_set_by_user; }; @@ -81,7 +91,7 @@ TEST(plugin_config, can_create_empty_config) { TEST(plugin_config, can_create_not_empty_config) { ASSERT_NO_THROW( NotEmptyTestConfig cfg; - ASSERT_EQ(cfg.get_supported_properties().size(), 4); + ASSERT_EQ(cfg.get_supported_properties().size(), 6); ); } @@ -200,3 +210,16 @@ TEST(plugin_config, can_copy_config) { ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); ASSERT_EQ(cfg2.m_int_property.value, 1); } + +TEST(plugin_config, set_user_property_throw_for_non_release_options) { + NotEmptyTestConfig cfg; + ASSERT_ANY_THROW(cfg.set_user_property(release_internal_property(10))); + ASSERT_ANY_THROW(cfg.set_user_property(debug_property(10))); +} + +TEST(plugin_config, visibility_is_correct) { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_option_ptr(release_internal_property.name())->get_visibility(), OptionVisibility::RELEASE_INTERNAL); + ASSERT_EQ(cfg.get_option_ptr(debug_property.name())->get_visibility(), OptionVisibility::DEBUG); + ASSERT_EQ(cfg.get_option_ptr(int_property.name())->get_visibility(), OptionVisibility::RELEASE); +} diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index 379d7b3b64a222..acee6df2288a74 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -304,8 +304,8 @@ struct program { std::vector allocating_order; std::unique_ptr pm; std::unique_ptr _layout_optimizer; - bool is_internal; - bool _is_body_program; + bool is_internal = false; + bool _is_body_program = false; // if subgraph can be optimized if it consists of only inputs and corresponding outputs bool _can_be_optimized; std::unique_ptr _impls_cache; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index 56a7bf25acc998..10e58acee25cf8 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -46,21 +46,19 @@ static constexpr Property optimize_data{"GPU_OPTIM static constexpr Property allow_static_input_reorder{"GPU_ALLOW_STATIC_INPUT_REORDER"}; static constexpr Property partial_build_program{"GPU_PARTIAL_BUILD"}; static constexpr Property allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"}; -static constexpr Property use_only_static_kernels_for_dynamic_shape{"GPU_USE_ONLY_STATIC_KERNELS_FOR_DYNAMIC_SHAPE"}; static constexpr Property, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"}; static constexpr Property force_implementations{"GPU_FORCE_IMPLEMENTATIONS"}; static constexpr Property config_file{"CONFIG_FILE"}; static constexpr Property enable_lp_transformations{"LP_TRANSFORMS_MODE"}; -static constexpr Property max_dynamic_batch{"DYN_BATCH_LIMIT"}; -static constexpr Property nv12_two_inputs{"GPU_NV12_TWO_INPUTS"}; static constexpr Property buffers_preallocation_ratio{"GPU_BUFFERS_PREALLOCATION_RATIO"}; static constexpr Property max_kernels_per_batch{"GPU_MAX_KERNELS_PER_BATCH"}; static constexpr Property use_onednn{"USE_ONEDNN"}; static constexpr Property help{"HELP"}; static constexpr Property verbose{"VERBOSE"}; +static constexpr Property log_to_file{"LOG_TO_FILE"}; static constexpr Property disable_usm{"DISABLE_USM"}; -static constexpr Property disable_onednn_post_ops{"DISABLE_ONEDNN_POST_OPS"}; +static constexpr Property disable_onednn_post_ops_opt{"DISABLE_ONEDNN_POST_OPS_OPT"}; static constexpr Property dump_graphs{"GPU_DUMP_GRAPHS"}; static constexpr Property dump_profiling_data{"DUMP_PROFILING_DATA"}; static constexpr Property dump_sources{"DUMP_SOURCES"}; @@ -70,11 +68,11 @@ static constexpr Property dump_iteratio static constexpr Property host_time_profiling{"HOST_TIME_PROFILING"}; static constexpr Property impls_cache_capacity{"IMPLS_CACHE_CAPACITY"}; static constexpr Property disable_async_compilation{"DISABLE_ASYNC_COMPILATION"}; -static constexpr Property disable_shape_agnostic_impls{"DISABLE_SHAPE_AGNOSTIC_IMPLS"}; static constexpr Property disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"}; static constexpr Property disable_memory_reuse{"DISABLE_MEMORY_REUSE"}; static constexpr Property disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"}; static constexpr Property disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"}; +static constexpr Property disable_fake_alignment{"DISABLE_FAKE_ALIGNMENT"}; static constexpr Property use_usm_host{"USE_USM_HOST"}; static constexpr Property asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"}; static constexpr Property mem_prealloc_options{"MEM_PREALLOC_OPTIONS"}; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl new file mode 100644 index 00000000000000..d5da1edf81bd69 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -0,0 +1,71 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// Namespace, property name, default value, [validator], description +OV_CONFIG_RELEASE_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin") +OV_CONFIG_RELEASE_OPTION(ov::device, id, "0", "ID of the current device") +OV_CONFIG_RELEASE_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty") +OV_CONFIG_RELEASE_OPTION(ov, num_streams, 1, "Number of streams to be used for inference") +OV_CONFIG_RELEASE_OPTION(ov, compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism") +OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f16, + [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision") +OV_CONFIG_RELEASE_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings") +OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc") +OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy") +OV_CONFIG_RELEASE_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application") +OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not") + +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_sdpa_optimization, true, "Enable/Disable fused SDPA primitive execution") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, enable_loop_unrolling, true, "Enable/Disable Loop/TensorIterator operation unrolling") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, disable_winograd_convolution, false, "Enable/Disable winograd convolution implementation if available") +OV_CONFIG_RELEASE_OPTION(ov::internal, exclusive_async_requests, false, "") +OV_CONFIG_RELEASE_OPTION(ov::internal, query_model_ratio, 1.0f, "") +OV_CONFIG_RELEASE_OPTION(ov, cache_mode, ov::CacheMode::OPTIMIZE_SPEED, "Cache mode defines the trade-off between the model compilation time and the disk space required for the cache") +OV_CONFIG_RELEASE_OPTION(ov, cache_encryption_callbacks, EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model") +OV_CONFIG_RELEASE_OPTION(ov::hint, dynamic_quantization_group_size, 0, "") +OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "") +OV_CONFIG_RELEASE_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching") +OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision") + +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_lp_transformations, false, "Enable/Disable Low precision transformations set") + +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "Type of the queue that must be used for model execution. May be in-order or out-of-order") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, optimize_data, false, "Enable/Disable data flow optimizations for cldnn::program") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_memory_pool, true, "Enable/Disable memory pool usage") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_static_input_reorder, false, "Controls if weights tensors can be reordered during model compilation to more friendly layout for specific kernel") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, custom_outputs, std::vector{}, "List of output primitive names") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, force_implementations, ImplForcingMap{}, "Specifies the list of forced implementations for the primitives") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, partial_build_program, false, "Early exit from model compilation process which allows faster execution graph dumping") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "Switch between new and old shape inference flow. Shall be removed soon") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "Threshold for preallocation feature in case when it uses ratio policy") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, use_onednn, false, "Enable/Disable onednn for usage for particular model/platform") + +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, help, false, "Print help message for all config options") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_usm, false, "Disable USM memory allocations and use only cl_mem") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_onednn_post_ops_opt, false, "Disable optimization pass for onednn post-ops") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data, "", "Save csv file with per-stage and per-primitive profiling data to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_graphs, "", "Save intermediate graph representations during model compilation pipeline to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_sources, "", "Save generated sources for each kernel to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, "", "Save intermediate in/out tensors of each primitive to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool, "", "Save csv file with memory pool info to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, "", "Space separated list of iterations where other dump options should be enabled") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, false, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "Disable runtime inplace optimizations for operations like concat and crop") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "Disable fusions of operations as post-ops/fused-ops") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "Disable pass which merges QKV projections into single MatMul") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fake_alignment, false, "Disable fake alignment feature which tries to keep gpu friendly memory alignment for arbitrary tensor shapes") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, use_usm_host, false, "Enforce USM host usage for all allocations") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "Preallocation setting") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, "", "List of layers to load raw binary") diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl deleted file mode 100644 index 62548a7abb17fd..00000000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#ifdef GPU_DEBUG_CONFIG - -OV_CONFIG_OPTION(ov::intel_gpu, verbose, 0, "Enable") -OV_CONFIG_OPTION(ov::intel_gpu, help, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_usm, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_onednn_post_ops, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, dump_profiling_data, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, dump_graphs, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, dump_sources, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, dump_tensors, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, dump_memory_pool, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, dump_iterations, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, host_time_profiling, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "") -OV_CONFIG_OPTION(ov::intel_gpu, impls_cache_capacity, 0, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_async_compilation, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_shape_agnostic_impls, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_memory_reuse, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, use_usm_host, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, load_dump_raw_binary, "", "") - -#endif diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl deleted file mode 100644 index b3aa12dc75c49b..00000000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -// Namespace, property name, default value, [validator], description -OV_CONFIG_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin") -OV_CONFIG_OPTION(ov::device, id, "0", "ID of the current device") -OV_CONFIG_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty") -OV_CONFIG_OPTION(ov, num_streams, 1, "Number of streams to be used for inference") -OV_CONFIG_OPTION(ov, compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism") -OV_CONFIG_OPTION(ov::hint, inference_precision, ov::element::f16, - [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision") -OV_CONFIG_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings") -OV_CONFIG_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc") -OV_CONFIG_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy") -OV_CONFIG_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application") -OV_CONFIG_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not") - -OV_CONFIG_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks") -OV_CONFIG_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level") -OV_CONFIG_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property") -OV_CONFIG_OPTION(ov::intel_gpu::hint, enable_sdpa_optimization, true, "Enable/Disable fused SDPA primitive execution") -OV_CONFIG_OPTION(ov::intel_gpu, enable_loop_unrolling, true, "Enable/Disable Loop/TensorIterator operation unrolling") -OV_CONFIG_OPTION(ov::intel_gpu, disable_winograd_convolution, false, "Enable/Disable winograd convolution implementation if available") -OV_CONFIG_OPTION(ov::internal, exclusive_async_requests, false, "") -OV_CONFIG_OPTION(ov::internal, query_model_ratio, 1.0f, "") -OV_CONFIG_OPTION(ov, cache_mode, ov::CacheMode::OPTIMIZE_SPEED, "Cache mode defines the trade-off between the model compilation time and the disk space required for the cache") -OV_CONFIG_OPTION(ov, cache_encryption_callbacks, EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model") -OV_CONFIG_OPTION(ov::hint, dynamic_quantization_group_size, 0, "") -OV_CONFIG_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "") -OV_CONFIG_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "") -OV_CONFIG_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching") -OV_CONFIG_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision") - -OV_CONFIG_OPTION(ov::intel_gpu, nv12_two_inputs, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, config_file, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, enable_lp_transformations, false, "") - -OV_CONFIG_OPTION(ov::intel_gpu, max_dynamic_batch, 1, "") -OV_CONFIG_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "") -OV_CONFIG_OPTION(ov::intel_gpu, optimize_data, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, enable_memory_pool, true, "") -OV_CONFIG_OPTION(ov::intel_gpu, allow_static_input_reorder, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, custom_outputs, std::vector{}, "") -OV_CONFIG_OPTION(ov::intel_gpu, force_implementations, ImplForcingMap{}, "") -OV_CONFIG_OPTION(ov::intel_gpu, partial_build_program, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, use_only_static_kernels_for_dynamic_shape, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "") -OV_CONFIG_OPTION(ov::intel_gpu, use_onednn, false, "") diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp index f18b32cd8b7cbb..19a3c1e468e28c 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp @@ -22,13 +22,8 @@ struct NewExecutionConfig : public ov::PluginConfig { NewExecutionConfig(const NewExecutionConfig& other); NewExecutionConfig& operator=(const NewExecutionConfig& other); - #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ - ConfigOption m_ ## PropertyVar = \ - ConfigOption(GET_EXCEPT_LAST(__VA_ARGS__)); - - #include "options_release.inl" - #include "options_debug.inl" - + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" #undef OV_CONFIG_OPTION void finalize_impl(std::shared_ptr context) override; diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index 4b7b3748d6e69d..c72dc9d11e00a8 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -28,7 +28,6 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ config.set_property(ov::intel_gpu::custom_outputs(std::vector({}))); } } - config.set_property(ov::intel_gpu::max_dynamic_batch(1)); config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer())); ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true); diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index bb11308064f7e7..e1bc6055fd4349 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -300,7 +300,6 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr( - std::make_tuple(ov::intel_gpu::max_dynamic_batch, 1), std::make_tuple(ov::intel_gpu::queue_type, QueueTypes::out_of_order), std::make_tuple(ov::intel_gpu::optimize_data, false), std::make_tuple(ov::intel_gpu::enable_memory_pool, true), @@ -79,7 +77,6 @@ void OldExecutionConfig::set_default() { std::make_tuple(ov::intel_gpu::force_implementations, ImplForcingMap{}), std::make_tuple(ov::intel_gpu::partial_build_program, false), std::make_tuple(ov::intel_gpu::allow_new_shape_infer, false), - std::make_tuple(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape, false), std::make_tuple(ov::intel_gpu::buffers_preallocation_ratio, 1.1f), std::make_tuple(ov::intel_gpu::max_kernels_per_batch, 8), std::make_tuple(ov::intel_gpu::use_onednn, false)); @@ -202,10 +199,6 @@ void OldExecutionConfig::apply_debug_options(const cldnn::device_info& info) { set_property(ov::enable_profiling(true)); } - GPU_DEBUG_IF(debug_config->disable_dynamic_impl == 1) { - set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); - } - GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { if (debug_config->dynamic_quantize_group_size == -1) set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX)); diff --git a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp index 8f4319734d3e9f..5eff06155280b1 100644 --- a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp @@ -13,12 +13,8 @@ namespace ov { namespace intel_gpu { NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() { - #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ - m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar; - - #include "intel_gpu/runtime/options_release.inl" - #include "intel_gpu/runtime/options_debug.inl" - + #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" #undef OV_CONFIG_OPTION } diff --git a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp index 08b63e0a8326b8..92bc97693f019c 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp @@ -336,7 +336,6 @@ TEST_P(gemm_2in_add, eltwise_postop_dynamic) { if (engine.get_device_info().supports_immad) { ov::intel_gpu::ImplementationDesc gemmv_impl = { cldnn::format::type::any, "", impl_types::onednn }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "gemm_prim", gemmv_impl } })); - cfg_fused.set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); } auto add_data_layout = get_output_layout(p); diff --git a/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp index 1fbd1c096e7c6e..0b8c1b153c8f5d 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp @@ -55,7 +55,6 @@ program::ptr build_program(engine& engine, ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(output_names_vec)); - config.set_property(ov::intel_gpu::max_dynamic_batch(1)); return program::build_program(engine, body_topology, config, false, false, true); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp index 20d42e85d0c301..ad8cd9648cbc24 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp @@ -1317,7 +1317,6 @@ TEST_P(crop_gpu_dynamic, i32_in2x3x2x2_crop_offsets) { } } } - config2.set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); network network2(engine, topology, config2); // run with static kernel network2.set_input_data("input", input); auto outputs2 = network2.execute(); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp index e9d55960cf568f..ca33241e31d248 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp @@ -55,7 +55,6 @@ static program::ptr build_program(engine& engine, ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(output_names_vec)); - config.set_property(ov::intel_gpu::max_dynamic_batch(1)); config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer)); return program::build_program(engine, body_topology, config, false, false, true); @@ -837,7 +836,7 @@ static void test_loop_gpu_multiple_shapes(ov::PartialShape body_input_layout, permute("permute1", input_info("input_origin"), {0, 1, 2, 3}), concatenation("input1", {input_info("permute1"), input_info("input_origin")}, 0), loop("loop", - {input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input1"), input_info("input2")}, + {input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input1"), input_info("input2")}, body_program, trip_count_id, initial_condition_id, actual_iteration_count_id, input_primitive_maps, output_primitive_maps, back_edges, num_iterations, body_current_iteration_id, body_execution_condition_id, 2), @@ -1105,7 +1104,7 @@ static void test_loop_gpu_wo_trip_count_update_primitive_id(ov::PartialShape bod auto body_program = build_program(engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true); auto const_shape = engine.allocate_memory({ov::PartialShape{4}, data_types::i32, format::bfyx}); - + std::vector body_input_layouts; for (size_t i = 0; i < body_input_layout.size(); i++) { From c39f5c7203c4335affb89032602f004cddc510e0 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Thu, 19 Dec 2024 14:45:22 +0400 Subject: [PATCH 07/18] remove old config Signed-off-by: Vladimir Paramuzov --- .../intel_gpu/runtime/execution_config.hpp | 170 ++---------- .../intel_gpu/runtime/plugin_config.hpp | 41 --- .../src/runtime/execution_config.cpp | 250 ++++-------------- .../intel_gpu/src/runtime/plugin_config.cpp | 130 --------- .../tests/unit/module_tests/config_test.cpp | 33 --- 5 files changed, 68 insertions(+), 556 deletions(-) delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp delete mode 100644 src/plugins/intel_gpu/src/runtime/plugin_config.cpp delete mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index b9f2cdd27f8283..85ef9d23aa96bd 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -1,179 +1,45 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #pragma once +#include "openvino/runtime/plugin_config.hpp" +#include "intel_gpu/runtime/device_info.hpp" #include "intel_gpu/runtime/internal_properties.hpp" -#include "intel_gpu/runtime/device.hpp" -#include "intel_gpu/runtime/plugin_config.hpp" +#include "openvino/runtime/internal_properties.hpp" +#include namespace ov { namespace intel_gpu { -enum class PropertyVisibility { - INTERNAL = 0, - PUBLIC = 1 -}; - -inline std::ostream& operator<<(std::ostream& os, const PropertyVisibility& visibility) { - switch (visibility) { - case PropertyVisibility::PUBLIC: os << "PUBLIC"; break; - case PropertyVisibility::INTERNAL: os << "INTERNAL"; break; - default: os << "UNKNOWN"; break; - } +struct ExecutionConfig : public ov::PluginConfig { + ExecutionConfig(); + ExecutionConfig(std::initializer_list values) : ExecutionConfig() { set_property(ov::AnyMap(values)); } + explicit ExecutionConfig(const ov::AnyMap& properties) : ExecutionConfig() { set_property(properties); } + explicit ExecutionConfig(const ov::AnyMap::value_type& property) : ExecutionConfig() { set_property(property); } - return os; -} + ExecutionConfig(const ExecutionConfig& other); + ExecutionConfig& operator=(const ExecutionConfig& other); -class BaseValidator { -public: - using Ptr = std::shared_ptr; - virtual ~BaseValidator() = default; - virtual bool is_valid(const ov::Any& v) const = 0; -}; + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" + #undef OV_CONFIG_OPTION -class FuncValidator : public BaseValidator { -public: -explicit FuncValidator(std::function func) : m_func(func) { } - bool is_valid(const ov::Any& v) const override { - return m_func(v); - } + void finalize_impl(std::shared_ptr context) override; + void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override; private: - std::function m_func; -}; - -// PropertyTypeValidator ensures that value can be converted to given property type -template -class PropertyTypeValidator : public BaseValidator { -public: - bool is_valid(const ov::Any& v) const override { - try { - v.as(); - return true; - } catch (ov::Exception&) { - return false; - } - } -}; - -class OldExecutionConfig { -public: - OldExecutionConfig(); - OldExecutionConfig(std::initializer_list values) : OldExecutionConfig() { set_property(ov::AnyMap(values)); } - explicit OldExecutionConfig(const ov::AnyMap& properties) : OldExecutionConfig() { set_property(properties); } - explicit OldExecutionConfig(const ov::AnyMap::value_type& property) : OldExecutionConfig() { set_property(property); } - - void set_default(); - void set_property(const ov::AnyMap& properties); - void set_user_property(const ov::AnyMap& properties); - Any get_property(const std::string& name) const; - bool is_set_by_user(const std::string& name) const; - bool is_supported(const std::string& name) const; - void register_property_impl(const std::pair& propertiy, PropertyVisibility visibility, BaseValidator::Ptr validator); - - template ::type = true> - void register_property_impl() { } - - template - void register_property_impl(const std::tuple, ValueT>& property, PropertyInitializer&&... properties) { - auto p = std::get<0>(property)(std::get<1>(property)); - auto v = std::dynamic_pointer_cast(std::make_shared>()); - register_property_impl(std::move(p), visibility, std::move(v)); - register_property_impl(properties...); - } - - template - typename std::enable_if::value, void>::type - register_property_impl(const std::tuple, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) { - auto p = std::get<0>(property)(std::get<1>(property)); - auto v = std::dynamic_pointer_cast(std::make_shared(std::get<2>(property))); - register_property_impl(std::move(p), visibility, std::move(v)); - register_property_impl(properties...); - } - - template - typename std::enable_if, ValidatorT>::value, void>::type - register_property_impl(const std::tuple, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) { - auto p = std::get<0>(property)(std::get<1>(property)); - auto v = std::dynamic_pointer_cast(std::make_shared(std::get<2>(property))); - register_property_impl(std::move(p), visibility, std::move(v)); - register_property_impl(properties...); - } - - template - void register_property(PropertyInitializer&&... properties) { - register_property_impl(properties...); - } - - template - util::EnableIfAllStringAny set_property(Properties&&... properties) { - set_property(ov::AnyMap{std::forward(properties)...}); - } - - template - util::EnableIfAllStringAny set_user_property(Properties&&... properties) { - set_user_property(ov::AnyMap{std::forward(properties)...}); - } - - template - bool is_set_by_user(const ov::Property& property) const { - return is_set_by_user(property.name()); - } - - template - T get_property(const ov::Property& property) const { - return get_property(property.name()).template as(); - } - void apply_user_properties(const cldnn::device_info& info); - - // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call - // So this method should be called after setting all user properties, but before apply_user_properties() call. - void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info); - - std::string to_string() const; - -protected: void apply_hints(const cldnn::device_info& info); void apply_execution_hints(const cldnn::device_info& info); void apply_performance_hints(const cldnn::device_info& info); void apply_priority_hints(const cldnn::device_info& info); - void apply_debug_options(const cldnn::device_info& info); - - template - void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { - if (!is_set_by_user(property)) { - auto rt_info_val = rt_info.find(property.name()); - if (rt_info_val != rt_info.end()) { - set_user_property(property(rt_info_val->second.template as())); - } - } - } - -private: - ov::AnyMap internal_properties; - ov::AnyMap user_properties; - - std::map supported_properties; - std::map property_validators; }; -using ExecutionConfig = NewExecutionConfig; } // namespace intel_gpu } // namespace ov namespace cldnn { using ov::intel_gpu::ExecutionConfig; -} // namespace cldnn +} diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp deleted file mode 100644 index 19a3c1e468e28c..00000000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/runtime/plugin_config.hpp" -#include "intel_gpu/runtime/device_info.hpp" -#include "intel_gpu/runtime/internal_properties.hpp" -#include "openvino/runtime/internal_properties.hpp" -#include - -namespace ov { -namespace intel_gpu { - -struct NewExecutionConfig : public ov::PluginConfig { - NewExecutionConfig(); - NewExecutionConfig(std::initializer_list values) : NewExecutionConfig() { set_property(ov::AnyMap(values)); } - explicit NewExecutionConfig(const ov::AnyMap& properties) : NewExecutionConfig() { set_property(properties); } - explicit NewExecutionConfig(const ov::AnyMap::value_type& property) : NewExecutionConfig() { set_property(property); } - - NewExecutionConfig(const NewExecutionConfig& other); - NewExecutionConfig& operator=(const NewExecutionConfig& other); - - #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) - #include "intel_gpu/runtime/options.inl" - #undef OV_CONFIG_OPTION - - void finalize_impl(std::shared_ptr context) override; - void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override; - -private: - void apply_user_properties(const cldnn::device_info& info); - void apply_hints(const cldnn::device_info& info); - void apply_execution_hints(const cldnn::device_info& info); - void apply_performance_hints(const cldnn::device_info& info); - void apply_priority_hints(const cldnn::device_info& info); -}; - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 2c4bf66bc4f48d..e0fa96f2f74be7 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -1,136 +1,78 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "intel_gpu/runtime/execution_config.hpp" -#include "intel_gpu/runtime/debug_configuration.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "openvino/core/any.hpp" #include "openvino/runtime/internal_properties.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" -#include namespace ov { namespace intel_gpu { -OldExecutionConfig::OldExecutionConfig() { - set_default(); +ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { + #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" + #undef OV_CONFIG_OPTION } -class InferencePrecisionValidator : public BaseValidator { -public: - bool is_valid(const ov::Any& v) const override { - auto precision = v.as(); - return precision == ov::element::f16 || precision == ov::element::f32 || precision == ov::element::undefined; +ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() { + user_properties = other.user_properties; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); } -}; - -class PerformanceModeValidator : public BaseValidator { -public: - bool is_valid(const ov::Any& v) const override { - auto mode = v.as(); - return mode == ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT || - mode == ov::hint::PerformanceMode::THROUGHPUT || - mode == ov::hint::PerformanceMode::LATENCY; - } -}; - -void OldExecutionConfig::set_default() { - register_property( - std::make_tuple(ov::device::id, "0"), - std::make_tuple(ov::enable_profiling, false), - std::make_tuple(ov::cache_dir, ""), - std::make_tuple(ov::num_streams, 1), - std::make_tuple(ov::compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency()))), - std::make_tuple(ov::hint::inference_precision, ov::element::f16, InferencePrecisionValidator()), - std::make_tuple(ov::hint::model_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::LATENCY, PerformanceModeValidator()), - std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::PERFORMANCE), - std::make_tuple(ov::hint::num_requests, 0), - std::make_tuple(ov::hint::enable_cpu_pinning, false), - - std::make_tuple(ov::intel_gpu::hint::host_task_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::queue_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::enable_sdpa_optimization, true), - std::make_tuple(ov::intel_gpu::enable_loop_unrolling, true), - std::make_tuple(ov::intel_gpu::disable_winograd_convolution, false), - std::make_tuple(ov::internal::exclusive_async_requests, false), - std::make_tuple(ov::internal::query_model_ratio, 1.0f), - std::make_tuple(ov::cache_mode, ov::CacheMode::OPTIMIZE_SPEED), - std::make_tuple(ov::cache_encryption_callbacks, EncryptionCallbacks{}), - std::make_tuple(ov::hint::dynamic_quantization_group_size, 0), - std::make_tuple(ov::hint::kv_cache_precision, ov::element::undefined), - std::make_tuple(ov::intel_gpu::hint::enable_kernels_reuse, false), - std::make_tuple(ov::weights_path, ""), - std::make_tuple(ov::hint::activations_scale_factor, 0.f), - - // Legacy API properties - std::make_tuple(ov::intel_gpu::config_file, ""), - std::make_tuple(ov::intel_gpu::enable_lp_transformations, false)); - - register_property( - std::make_tuple(ov::intel_gpu::queue_type, QueueTypes::out_of_order), - std::make_tuple(ov::intel_gpu::optimize_data, false), - std::make_tuple(ov::intel_gpu::enable_memory_pool, true), - std::make_tuple(ov::intel_gpu::allow_static_input_reorder, false), - std::make_tuple(ov::intel_gpu::custom_outputs, std::vector{}), - std::make_tuple(ov::intel_gpu::dump_graphs, ""), - std::make_tuple(ov::intel_gpu::force_implementations, ImplForcingMap{}), - std::make_tuple(ov::intel_gpu::partial_build_program, false), - std::make_tuple(ov::intel_gpu::allow_new_shape_infer, false), - std::make_tuple(ov::intel_gpu::buffers_preallocation_ratio, 1.1f), - std::make_tuple(ov::intel_gpu::max_kernels_per_batch, 8), - std::make_tuple(ov::intel_gpu::use_onednn, false)); } -void OldExecutionConfig::register_property_impl(const std::pair& property, PropertyVisibility visibility, BaseValidator::Ptr validator) { - property_validators[property.first] = validator; - supported_properties[property.first] = visibility; - internal_properties[property.first] = property.second; -} - -void OldExecutionConfig::set_property(const AnyMap& config) { - for (auto& kv : config) { - auto& name = kv.first; - auto& val = kv.second; - OPENVINO_ASSERT(is_supported(kv.first), "[GPU] Attempt to set property ", name, " (", val.as(), ") which was not registered!\n"); - OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name, ": ", val.as()); - internal_properties[name] = val; +ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) { + user_properties = other.user_properties; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); } + return *this; } -bool OldExecutionConfig::is_supported(const std::string& name) const { - bool supported = supported_properties.find(name) != supported_properties.end(); - bool has_validator = property_validators.find(name) != property_validators.end(); - - return supported && has_validator; -} - -bool OldExecutionConfig::is_set_by_user(const std::string& name) const { - return user_properties.find(name) != user_properties.end(); +void ExecutionConfig::apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) { + const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + if (!info.supports_immad) { + apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); + apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); + } + apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); } -void OldExecutionConfig::set_user_property(const AnyMap& config) { - for (auto& kv : config) { - auto& name = kv.first; - auto& val = kv.second; - bool supported = is_supported(name) && supported_properties.at(name) == PropertyVisibility::PUBLIC; - OPENVINO_ASSERT(supported, "[GPU] Attempt to set user property ", name, " (", val.as(), ") which was not registered or internal!\n"); - OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name, ": `", val.as(), "`"); +void ExecutionConfig::finalize_impl(std::shared_ptr context) { + const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + apply_hints(info); + if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { + set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); + } + if (info.supports_immad) { + set_property(ov::intel_gpu::use_onednn(true)); + } + if (get_property(ov::intel_gpu::use_onednn)) { + set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + } - user_properties[kv.first] = kv.second; + // Enable KV-cache compression by default for non-systolic platforms + if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { + set_property(ov::hint::kv_cache_precision(ov::element::i8)); } -} -Any OldExecutionConfig::get_property(const std::string& name) const { - if (user_properties.find(name) != user_properties.end()) { - return user_properties.at(name); + // Enable dynamic quantization by default for non-systolic platforms + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { + set_property(ov::hint::dynamic_quantization_group_size(32)); } +} - OPENVINO_ASSERT(internal_properties.find(name) != internal_properties.end(), "[GPU] Can't get internal property with name ", name); - return internal_properties.at(name); +void ExecutionConfig::apply_hints(const cldnn::device_info& info) { + apply_execution_hints(info); + apply_performance_hints(info); + apply_priority_hints(info); } -void OldExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { +void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::execution_mode)) { const auto mode = get_property(ov::hint::execution_mode); if (!is_set_by_user(ov::hint::inference_precision)) { @@ -146,7 +88,7 @@ void OldExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { } } -void OldExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { +void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::performance_mode)) { const auto mode = get_property(ov::hint::performance_mode); if (!is_set_by_user(ov::num_streams)) { @@ -175,7 +117,7 @@ void OldExecutionConfig::apply_performance_hints(const cldnn::device_info& info) } } -void OldExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { +void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::model_priority)) { const auto priority = get_property(ov::hint::model_priority); if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { @@ -184,97 +126,5 @@ void OldExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { } } -void OldExecutionConfig::apply_debug_options(const cldnn::device_info& info) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); - } - - GPU_DEBUG_IF(debug_config->serialize_compile == 1) { - set_property(ov::compilation_num_threads(1)); - } - - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { - GPU_DEBUG_COUT << "[WARNING] ov::enable_profiling property was forced because of enabled OV_GPU_DumpProfilingData debug option\n"; - set_property(ov::enable_profiling(true)); - } - - GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { - if (debug_config->dynamic_quantize_group_size == -1) - set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX)); - else - set_property(ov::hint::dynamic_quantization_group_size(debug_config->dynamic_quantize_group_size)); - } - - GPU_DEBUG_IF(debug_config->use_kv_cache_compression != -1) { - GPU_DEBUG_IF(debug_config->use_kv_cache_compression == 1) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); - } else { - set_property(ov::hint::kv_cache_precision(ov::element::undefined)); - } - } -} - -void OldExecutionConfig::apply_hints(const cldnn::device_info& info) { - apply_execution_hints(info); - apply_performance_hints(info); - apply_priority_hints(info); - apply_debug_options(info); -} - -void OldExecutionConfig::apply_user_properties(const cldnn::device_info& info) { - // Copy internal properties before applying hints to ensure that - // a property set by hint won't be overriden by a value in user config. - // E.g num_streams=AUTO && hint=THROUGHPUT - // If we apply hints first and then copy all values from user config to internal one, - // then we'll get num_streams=AUTO in final config while some integer number is expected. - for (auto& kv : user_properties) { - internal_properties[kv.first] = kv.second; - } - apply_hints(info); - if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { - set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); - } - if (info.supports_immad) { - set_property(ov::intel_gpu::use_onednn(true)); - } - if (get_property(ov::intel_gpu::use_onednn)) { - set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); - } - - // Enable KV-cache compression by default for non-systolic platforms - if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); - } - - // Enable dynamic quantization by default for non-systolic platforms - if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { - set_property(ov::hint::dynamic_quantization_group_size(32)); - } - - user_properties.clear(); -} - -void OldExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info) { - if (!info.supports_immad) { - apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); - apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); - } - apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); -} - -std::string OldExecutionConfig::to_string() const { - std::stringstream s; - s << "internal properties:\n"; - for (auto& kv : internal_properties) { - s << "\t" << kv.first << ": " << kv.second.as() << std::endl; - } - s << "user properties:\n"; - for (auto& kv : user_properties) { - s << "\t" << kv.first << ": " << kv.second.as() << std::endl; - } - return s.str(); -} - } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp deleted file mode 100644 index 5eff06155280b1..00000000000000 --- a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "intel_gpu/runtime/plugin_config.hpp" -#include "intel_gpu/plugin/remote_context.hpp" -#include "openvino/core/any.hpp" -#include "openvino/runtime/internal_properties.hpp" -#include "intel_gpu/runtime/internal_properties.hpp" - - -namespace ov { -namespace intel_gpu { - -NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() { - #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) - #include "intel_gpu/runtime/options.inl" - #undef OV_CONFIG_OPTION -} - -NewExecutionConfig::NewExecutionConfig(const NewExecutionConfig& other) : NewExecutionConfig() { - user_properties = other.user_properties; - for (const auto& kv : other.m_options_map) { - m_options_map.at(kv.first)->set_any(kv.second->get_any()); - } -} - -NewExecutionConfig& NewExecutionConfig::operator=(const NewExecutionConfig& other) { - user_properties = other.user_properties; - for (const auto& kv : other.m_options_map) { - m_options_map.at(kv.first)->set_any(kv.second->get_any()); - } - return *this; -} - -void NewExecutionConfig::apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) { - const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); - if (!info.supports_immad) { - apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); - apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); - } - apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); -} - -void NewExecutionConfig::finalize_impl(std::shared_ptr context) { - const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); - apply_hints(info); - if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { - set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); - } - if (info.supports_immad) { - set_property(ov::intel_gpu::use_onednn(true)); - } - if (get_property(ov::intel_gpu::use_onednn)) { - set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); - } - - // Enable KV-cache compression by default for non-systolic platforms - if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); - } - - // Enable dynamic quantization by default for non-systolic platforms - if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { - set_property(ov::hint::dynamic_quantization_group_size(32)); - } -} - -void NewExecutionConfig::apply_hints(const cldnn::device_info& info) { - apply_execution_hints(info); - apply_performance_hints(info); - apply_priority_hints(info); -} - -void NewExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { - if (is_set_by_user(ov::hint::execution_mode)) { - const auto mode = get_property(ov::hint::execution_mode); - if (!is_set_by_user(ov::hint::inference_precision)) { - if (mode == ov::hint::ExecutionMode::ACCURACY) { - set_property(ov::hint::inference_precision(ov::element::undefined)); - } else if (mode == ov::hint::ExecutionMode::PERFORMANCE) { - if (info.supports_fp16) - set_property(ov::hint::inference_precision(ov::element::f16)); - else - set_property(ov::hint::inference_precision(ov::element::f32)); - } - } - } -} - -void NewExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { - if (is_set_by_user(ov::hint::performance_mode)) { - const auto mode = get_property(ov::hint::performance_mode); - if (!is_set_by_user(ov::num_streams)) { - if (mode == ov::hint::PerformanceMode::LATENCY) { - set_property(ov::num_streams(1)); - } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) { - set_property(ov::num_streams(ov::streams::AUTO)); - } - } - } - - if (get_property(ov::num_streams) == ov::streams::AUTO) { - int32_t n_streams = std::max(info.num_ccs, 2); - set_property(ov::num_streams(n_streams)); - } - - if (get_property(ov::internal::exclusive_async_requests)) { - set_property(ov::num_streams(1)); - } - - // Allow kernels reuse only for single-stream scenarios - if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) { - if (get_property(ov::num_streams) != 1) { - set_property(ov::intel_gpu::hint::enable_kernels_reuse(false)); - } - } -} - -void NewExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { - if (is_set_by_user(ov::hint::model_priority)) { - const auto priority = get_property(ov::hint::model_priority); - if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { - set_property(ov::intel_gpu::hint::queue_priority(priority)); - } - } -} - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp deleted file mode 100644 index b14c5b0bf4623d..00000000000000 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (C) 2022-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "intel_gpu/plugin/remote_context.hpp" -#include "intel_gpu/runtime/plugin_config.hpp" -#include "openvino/runtime/properties.hpp" -#include "test_utils.h" - -using namespace cldnn; -using namespace ::tests; - -TEST(config_test, basic) { - ov::intel_gpu::NewExecutionConfig cfg; - std::cerr << cfg.to_string(); - - std::cerr << cfg.get_property("PERFORMANCE_HINT").as(); - cfg.set_user_property(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)); - cfg.set_property(ov::hint::inference_precision(ov::element::f32)); - - std::cerr << "PROF: " << cfg.m_enable_profiling.value << std::endl; - - std::cerr << cfg.to_string(); - - std::cerr << cfg.get_property(ov::hint::inference_precision) << std::endl; - std::cerr << cfg.get_property(ov::hint::execution_mode) << std::endl; - - auto ctx = std::make_shared("GPU", std::vector{ get_test_engine().get_device() }); - cfg.finalize(ctx, {}); - std::cerr << cfg.to_string(); -// std::cerr << get_prop() << std::endl; -// std::cerr << get_prop() << std::endl; -} From 8eac659c4de17cc941f4f20c76b85d938ea0015a Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Thu, 19 Dec 2024 15:46:55 +0400 Subject: [PATCH 08/18] enhancements Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 25 ++++++++++-- src/inference/src/dev/plugin_config.cpp | 39 ++++++++++++------- src/inference/tests/unit/config_test.cpp | 2 +- .../intel_gpu/src/graph/fully_connected.cpp | 3 +- .../src/runtime/execution_config.cpp | 4 +- 5 files changed, 50 insertions(+), 23 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 36b6765849ee8e..769a4619b60fe8 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -107,6 +107,23 @@ struct ConfigOption : public ConfigOptionBase { return visibility; } + operator T() const { + return value; + } + + ConfigOption& operator=(const T& val) { + value = val; + return *this; + } + + bool operator==(const T& val) const { + return value == val; + } + + bool operator!=(const T& val) const { + return !(*this == val); + } + private: std::function validator; }; @@ -157,7 +174,7 @@ class OPENVINO_RUNTIME_API PluginConfig { template T get_property(const ov::Property& property) const { if (is_set_by_user(property)) { - return user_properties.at(property.name()).template as(); + return m_user_properties.at(property.name()).template as(); } OPENVINO_ASSERT(m_options_map.find(property.name()) != m_options_map.end(), "Property not found: ", property.name()); return static_cast*>(m_options_map.at(property.name()))->value; @@ -174,7 +191,7 @@ class OPENVINO_RUNTIME_API PluginConfig { template bool is_set_by_user(const ov::Property& property) const { - return user_properties.find(property.name()) != user_properties.end(); + return m_user_properties.find(property.name()) != m_user_properties.end(); } ConfigOptionBase* get_option_ptr(const std::string& name) const { @@ -195,7 +212,7 @@ class OPENVINO_RUNTIME_API PluginConfig { } } - void set_user_property(const ov::AnyMap& properties, const std::vector& allowed_visibility); + void set_user_property(const ov::AnyMap& properties, const std::vector& allowed_visibility, bool throw_on_error); ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const; ov::AnyMap read_env(const std::vector& prefixes) const; @@ -204,7 +221,7 @@ class OPENVINO_RUNTIME_API PluginConfig { std::map m_options_map; // List of properties explicitly set by user via Core::set_property() or Core::compile_model() or ov::Model's runtime info - ov::AnyMap user_properties; + ov::AnyMap m_user_properties; using OptionMapEntry = decltype(m_options_map)::value_type; }; diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index cfc48745f677f5..27d113a04a88cd 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -34,22 +34,31 @@ ov::Any PluginConfig::get_property(const std::string& name) const { } void PluginConfig::set_user_property(const AnyMap& config) { - static std::vector allowed_visibility = {OptionVisibility::RELEASE}; - set_user_property(config, allowed_visibility); + const static std::vector allowed_visibility = {OptionVisibility::RELEASE}; + const bool throw_on_error = true; + set_user_property(config, allowed_visibility, throw_on_error); } -void PluginConfig::set_user_property(const ov::AnyMap& config, const std::vector& allowed_visibility) { +void PluginConfig::set_user_property(const ov::AnyMap& config, const std::vector& allowed_visibility, bool throw_on_error) { for (auto& kv : config) { auto& name = kv.first; auto& val = kv.second; auto option = get_option_ptr(name); if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) { - OPENVINO_THROW("Unkown property: ", name); + if (throw_on_error) + OPENVINO_THROW("Unkown property: ", name); + else + continue; + } + if (!option->is_valid_value(val)) { + if (throw_on_error) + OPENVINO_THROW("Invalid value: ", val.as(), " for property: ", name); + else + continue; } - OPENVINO_ASSERT(option->is_valid_value(val), "Invalid value: ", val.as(), " for property: ", name); - user_properties[name] = val; + m_user_properties[name] = val; } } @@ -61,7 +70,7 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R // E.g num_streams=AUTO && hint=THROUGHPUT // If we apply hints first and then copy all values from user config to internal one, // then we'll get num_streams=AUTO in final config while some integer number is expected. - for (const auto& prop : user_properties) { + for (const auto& prop : m_user_properties) { auto& option = m_options_map.at(prop.first); option->set_any(prop.second); } @@ -69,7 +78,7 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R finalize_impl(context); // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization - user_properties.clear(); + m_user_properties.clear(); } void PluginConfig::apply_debug_options(std::shared_ptr context) { @@ -81,14 +90,17 @@ void PluginConfig::apply_debug_options(std::shared_ptr context) #endif }; + const bool throw_on_error = false; + if (context) { ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); cleanup_unsupported(config_properties); - set_user_property(config_properties, allowed_visibility); + set_user_property(config_properties, allowed_visibility, throw_on_error); } ov::AnyMap env_properties = read_env({"OV_"}); - set_user_property(env_properties, allowed_visibility); + cleanup_unsupported(env_properties); + set_user_property(env_properties, allowed_visibility, throw_on_error); } ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const { @@ -155,10 +167,9 @@ ov::AnyMap PluginConfig::read_env(const std::vector& prefixes) cons void PluginConfig::cleanup_unsupported(ov::AnyMap& config) const { for (auto it = config.begin(); it != config.end();) { - const auto& known_options = m_options_map; auto& name = it->first; - auto opt_it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); - if (opt_it == known_options.end()) { + auto opt_it = std::find_if(m_options_map.begin(), m_options_map.end(), [&](const OptionMapEntry& o) { return o.first == name; }); + if (opt_it == m_options_map.end()) { it = config.erase(it); } else { ++it; @@ -176,7 +187,7 @@ std::string PluginConfig::to_string() const { s << "\t" << option.first << ": " << option.second->get_any().as() << std::endl; } s << "USER PROPERTIES:\n"; - for (const auto& user_prop : user_properties) { + for (const auto& user_prop : m_user_properties) { s << "\t" << user_prop.first << ": " << user_prop.second.as() << std::endl; } diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index 0feeef707a2779..fa09be0616c8d4 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -44,7 +44,7 @@ struct NotEmptyTestConfig : public ov::PluginConfig { } NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() { - user_properties = other.user_properties; + m_user_properties = other.m_user_properties; for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } diff --git a/src/plugins/intel_gpu/src/graph/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/fully_connected.cpp index 2aee524ac2e3e1..dc6ce73e8c585d 100644 --- a/src/plugins/intel_gpu/src/graph/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/fully_connected.cpp @@ -250,8 +250,7 @@ kernel_impl_params fully_connected_inst::get_fake_aligned_params(kernel_impl_par } } - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_fake_alignment) { + GPU_DEBUG_IF(orig_impl_param.get_program().get_config().m_disable_fake_alignment) { can_apply_fake_alignment = false; } diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index e0fa96f2f74be7..5170bc48b955d7 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -19,14 +19,14 @@ ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { } ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() { - user_properties = other.user_properties; + m_user_properties = other.m_user_properties; for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } } ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) { - user_properties = other.user_properties; + m_user_properties = other.m_user_properties; for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } From 3ffe4170dd6968a7bbd55b6af4a04b6a2a7298ef Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 23 Dec 2024 10:27:54 +0400 Subject: [PATCH 09/18] update behavior for set/get property. Add help message Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 36 +++-- src/inference/src/dev/plugin_config.cpp | 134 ++++++++++++++++-- src/inference/tests/unit/config_test.cpp | 32 ++--- .../include/intel_gpu/plugin/plugin.hpp | 1 - .../intel_gpu/runtime/execution_config.hpp | 3 +- .../include/intel_gpu/runtime/options.inl | 2 +- src/plugins/intel_gpu/src/graph/broadcast.cpp | 2 +- src/plugins/intel_gpu/src/graph/crop.cpp | 2 +- src/plugins/intel_gpu/src/graph/eltwise.cpp | 2 +- src/plugins/intel_gpu/src/graph/gather.cpp | 2 +- .../graph_optimizer/add_required_reorders.cpp | 2 +- .../graph_optimizer/build_implementations.cpp | 2 +- .../graph_optimizer/graph_initializations.cpp | 4 +- .../graph_optimizer/propagate_constants.cpp | 4 +- .../select_preferred_formats.cpp | 2 +- .../src/graph/impls/ocl/fully_connected.cpp | 2 +- .../impls/ocl/kernel_selector_helper.cpp | 4 +- .../src/graph/impls/ocl/kernels_cache.cpp | 10 +- .../impls/onednn/primitive_onednn_base.h | 8 +- .../impls/registry/implementation_manager.cpp | 2 +- .../registry/non_max_suppression_impls.cpp | 2 +- .../intel_gpu/src/graph/layout_optimizer.cpp | 2 +- src/plugins/intel_gpu/src/graph/network.cpp | 6 +- .../src/graph/non_max_suppression.cpp | 2 +- src/plugins/intel_gpu/src/graph/permute.cpp | 2 +- .../intel_gpu/src/graph/primitive_inst.cpp | 8 +- src/plugins/intel_gpu/src/graph/program.cpp | 44 +++--- .../src/graph/program_dump_graph.cpp | 2 +- src/plugins/intel_gpu/src/graph/reorder.cpp | 2 +- src/plugins/intel_gpu/src/graph/reshape.cpp | 2 +- .../src/graph/scatter_elements_update.cpp | 2 +- .../intel_gpu/src/graph/scatter_nd_update.cpp | 2 +- .../intel_gpu/src/graph/scatter_update.cpp | 2 +- src/plugins/intel_gpu/src/graph/select.cpp | 2 +- .../intel_gpu/src/graph/strided_slice.cpp | 2 +- .../intel_gpu/src/plugin/compiled_model.cpp | 22 +-- src/plugins/intel_gpu/src/plugin/graph.cpp | 20 ++- .../intel_gpu/src/plugin/ops/condition.cpp | 6 +- src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 4 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 49 +++---- .../intel_gpu/src/plugin/program_builder.cpp | 18 +-- .../src/plugin/sync_infer_request.cpp | 8 +- .../src/plugin/transformations_pipeline.cpp | 14 +- .../src/runtime/execution_config.cpp | 46 ++++-- .../intel_gpu/src/runtime/ocl/ocl_engine.cpp | 2 +- .../intel_gpu/src/runtime/ocl/ocl_stream.cpp | 8 +- src/plugins/intel_gpu/src/runtime/stream.cpp | 4 +- .../test_cases/fully_connected_gpu_test.cpp | 24 ++-- 48 files changed, 339 insertions(+), 224 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 769a4619b60fe8..a1bcab62b5d5fd 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -33,12 +33,26 @@ #define GET_EXCEPT_LAST(...) EXPAND(GET_EXCEPT_LAST_IMPL(COUNT(__VA_ARGS__), __VA_ARGS__)) +#define GET_LAST_IMPL(N, ...) CAT(GET_LAST_IMPL_, N)(__VA_ARGS__) +#define GET_LAST_IMPL_0(_0, ...) _0 +#define GET_LAST_IMPL_1(_0, _1, ...) _1 +#define GET_LAST_IMPL_2(_0, _1, _2, ...) _2 +#define GET_LAST_IMPL_3(_0, _1, _2, _3, ...) _3 +#define GET_LAST_IMPL_4(_0, _1, _2, _3, _4, ...) _4 +#define GET_LAST_IMPL_5(_0, _1, _2, _3, _4, _5, ...) _5 +#define GET_LAST_IMPL_6(_0, _1, _2, _3, _4, _5, _6, ...) _6 + +#define GET_LAST(...) GET_LAST_IMPL(COUNT(__VA_ARGS__), _, __VA_ARGS__ ,,,,,,,,,,,) + #define OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ ConfigOption m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)}; #define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \ m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar; +#define OV_CONFIG_OPTION_HELP(PropertyNamespace, PropertyVar, Visibility, DefaultValue, ...) \ + { #PropertyNamespace "::" #PropertyVar, PropertyNamespace::PropertyVar.name(), GET_LAST(__VA_ARGS__)}, + #define OV_CONFIG_RELEASE_OPTION(PropertyNamespace, PropertyVar, ...) \ OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE, __VA_ARGS__) @@ -159,18 +173,12 @@ class OPENVINO_RUNTIME_API PluginConfig { void set_property(const ov::AnyMap& properties); Any get_property(const std::string& name) const; - void set_user_property(const ov::AnyMap& properties); template util::EnableIfAllStringAny set_property(Properties&&... properties) { set_property(ov::AnyMap{std::forward(properties)...}); } - template - util::EnableIfAllStringAny set_user_property(Properties&&... properties) { - set_user_property(ov::AnyMap{std::forward(properties)...}); - } - template T get_property(const ov::Property& property) const { if (is_set_by_user(property)) { @@ -189,6 +197,7 @@ class OPENVINO_RUNTIME_API PluginConfig { virtual void apply_debug_options(std::shared_ptr context); virtual void finalize_impl(std::shared_ptr context) {} + template bool is_set_by_user(const ov::Property& property) const { return m_user_properties.find(property.name()) != m_user_properties.end(); @@ -207,12 +216,13 @@ class OPENVINO_RUNTIME_API PluginConfig { if (!is_set_by_user(property)) { auto rt_info_val = rt_info.find(property.name()); if (rt_info_val != rt_info.end()) { - set_user_property(property(rt_info_val->second.template as())); + set_property(property(rt_info_val->second.template as())); } } } - void set_user_property(const ov::AnyMap& properties, const std::vector& allowed_visibility, bool throw_on_error); + ov::Any get_property(const std::string& name, const std::vector& allowed_visibility) const; + void set_property(const ov::AnyMap& properties, const std::vector& allowed_visibility, bool throw_on_error); ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const; ov::AnyMap read_env(const std::vector& prefixes) const; @@ -223,6 +233,16 @@ class OPENVINO_RUNTIME_API PluginConfig { // List of properties explicitly set by user via Core::set_property() or Core::compile_model() or ov::Model's runtime info ov::AnyMap m_user_properties; using OptionMapEntry = decltype(m_options_map)::value_type; + + // property variable name, string name, default value, description + using OptionsDesc = std::vector>; + static OptionsDesc m_options_desc; + virtual const OptionsDesc& get_options_desc() const { static OptionsDesc empty; return empty; } + const std::string get_help_message(const std::string& name = "") const; + void print_help() const; + +private: + bool m_is_finalized = false; }; } // namespace ov diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index 27d113a04a88cd..e1b09b76ad8235 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -8,7 +8,9 @@ #include "openvino/runtime/device_id_parser.hpp" #include "openvino/util/common_util.hpp" #include "openvino/util/env_util.hpp" +#include #include +#include #ifdef JSON_HEADER # include @@ -16,30 +18,63 @@ # include #endif -namespace ov { - -void PluginConfig::set_property(const AnyMap& config) { - for (auto& kv : config) { - auto& name = kv.first; - auto& val = kv.second; +#ifdef _WIN32 +#include +#else +#include +#include +#endif - auto option = get_option_ptr(name); - option->set_any(val); +namespace { +size_t get_terminal_width() { + const size_t default_width = 120; +#ifdef _WIN32 + CONSOLE_SCREEN_BUFFER_INFO csbi; + if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) { + return csbi.srWindow.Right - csbi.srWindow.Left + 1; + } else { + return default_width; + } +#else + struct winsize w; + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) { + return w.ws_col; + } else { + return default_width; } +#endif // _WIN32 } +} + +namespace ov { ov::Any PluginConfig::get_property(const std::string& name) const { + const static std::vector allowed_visibility = {OptionVisibility::RELEASE, OptionVisibility::RELEASE_INTERNAL}; + return get_property(name, allowed_visibility); +} + +ov::Any PluginConfig::get_property(const std::string& name, const std::vector& allowed_visibility) const { + if (m_user_properties.find(name) != m_user_properties.end()) { + return m_user_properties.at(name); + } + auto option = get_option_ptr(name); + if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) { + OPENVINO_THROW("Couldn't get unknown property: ", name); + } + return option->get_any(); } -void PluginConfig::set_user_property(const AnyMap& config) { +void PluginConfig::set_property(const AnyMap& config) { const static std::vector allowed_visibility = {OptionVisibility::RELEASE}; const bool throw_on_error = true; - set_user_property(config, allowed_visibility, throw_on_error); + set_property(config, allowed_visibility, throw_on_error); } -void PluginConfig::set_user_property(const ov::AnyMap& config, const std::vector& allowed_visibility, bool throw_on_error) { +void PluginConfig::set_property(const ov::AnyMap& config, const std::vector& allowed_visibility, bool throw_on_error) { + OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); + for (auto& kv : config) { auto& name = kv.first; auto& val = kv.second; @@ -47,13 +82,13 @@ void PluginConfig::set_user_property(const ov::AnyMap& config, const std::vector auto option = get_option_ptr(name); if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) { if (throw_on_error) - OPENVINO_THROW("Unkown property: ", name); + OPENVINO_THROW("Couldn't set unknown property: ", name); else continue; } if (!option->is_valid_value(val)) { if (throw_on_error) - OPENVINO_THROW("Invalid value: ", val.as(), " for property: ", name); + OPENVINO_THROW("Invalid value: ", val.as(), " for property: ", name, "\nProperty description: ", get_help_message(name)); else continue; } @@ -79,6 +114,8 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization m_user_properties.clear(); + + m_is_finalized = true; } void PluginConfig::apply_debug_options(std::shared_ptr context) { @@ -95,12 +132,12 @@ void PluginConfig::apply_debug_options(std::shared_ptr context) if (context) { ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); cleanup_unsupported(config_properties); - set_user_property(config_properties, allowed_visibility, throw_on_error); + set_property(config_properties, allowed_visibility, throw_on_error); } ov::AnyMap env_properties = read_env({"OV_"}); cleanup_unsupported(env_properties); - set_user_property(env_properties, allowed_visibility, throw_on_error); + set_property(env_properties, allowed_visibility, throw_on_error); } ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const { @@ -194,4 +231,71 @@ std::string PluginConfig::to_string() const { return s.str(); } +void PluginConfig::print_help() const { + auto format_text = [](const std::string& cpp_name, const std::string& str_name, const std::string& desc, size_t max_name_width, size_t max_width) { + std::istringstream words(desc); + std::ostringstream formatted_text; + std::string word; + std::vector words_vec; + + while (words >> word) { + words_vec.push_back(word); + } + + size_t j = 0; + size_t count_of_desc_lines = (desc.length() + max_width - 1) / max_width; + for (size_t i = 0 ; i < std::max(2, count_of_desc_lines); i++) { + if (i == 0) { + formatted_text << std::left << std::setw(max_name_width) << cpp_name; + } else if (i == 1) { + formatted_text << std::left << std::setw(max_name_width) << str_name; + } else { + formatted_text << std::left << std::setw(max_name_width) << ""; + } + + formatted_text << " | "; + + size_t line_length = max_name_width + 3; + for (; j < words_vec.size();) { + line_length += words_vec[j].size() + 1; + if (line_length > max_width) { + break; + } else { + formatted_text << words_vec[j] << " "; + } + j++; + } + formatted_text << "\n"; + } + return formatted_text.str(); + }; + + const auto& options_desc = get_options_desc(); + std::stringstream ss; + auto max_name_length_item = std::max_element(options_desc.begin(), options_desc.end(), + [](const OptionsDesc::value_type& a, const OptionsDesc::value_type& b){ + return std::get<0>(a).size() < std::get<0>(b).size(); + }); + + const size_t max_name_width = static_cast(std::get<0>(*max_name_length_item).size() + std::get<1>(*max_name_length_item).size()); + const size_t terminal_width = get_terminal_width(); + ss << std::left << std::setw(max_name_width) << ("Option name") << " | " << " Description " << "\n"; + ss << std::left << std::setw(terminal_width) << std::setfill('-') << "" << "\n"; + for (auto& kv : options_desc) { + ss << format_text(std::get<0>(kv), std::get<1>(kv), std::get<2>(kv), max_name_width, terminal_width) << "\n"; + } + + std::cout << ss.str(); +} + +const std::string PluginConfig::get_help_message(const std::string& name) const { + const auto& options_desc = get_options_desc(); + auto it = std::find_if(options_desc.begin(), options_desc.end(), [&](const OptionsDesc::value_type& v) { return std::get<1>(v) == name; }); + if (it != options_desc.end()) { + return std::get<2>(*it); + } + + return ""; +} + } // namespace ov diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index fa09be0616c8d4..42b7fba115a273 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -101,35 +101,27 @@ TEST(plugin_config, can_set_get_property) { ASSERT_EQ(cfg.get_property(bool_property), true); ASSERT_NO_THROW(cfg.set_property(bool_property(false))); ASSERT_EQ(cfg.get_property(bool_property), false); - - ASSERT_NO_THROW(cfg.set_user_property(bool_property(true))); - ASSERT_EQ(cfg.get_property(bool_property), true); } TEST(plugin_config, throw_for_unsupported_property) { NotEmptyTestConfig cfg; ASSERT_ANY_THROW(cfg.get_property(unsupported_property)); ASSERT_ANY_THROW(cfg.set_property(unsupported_property(10.0f))); - ASSERT_ANY_THROW(cfg.set_user_property(unsupported_property(10.0f))); } TEST(plugin_config, can_direct_access_to_properties) { NotEmptyTestConfig cfg; - ASSERT_EQ(cfg.m_bool_property.value, cfg.get_property(bool_property)); - ASSERT_NO_THROW(cfg.set_property(bool_property(false))); - ASSERT_EQ(cfg.m_bool_property.value, cfg.get_property(bool_property)); - ASSERT_EQ(cfg.m_bool_property.value, false); + ASSERT_EQ(cfg.m_int_property.value, cfg.get_property(int_property)); + ASSERT_NO_THROW(cfg.set_property(int_property(1))); + ASSERT_EQ(cfg.m_int_property.value, -1); // user property doesn't impact member value until finalize() is called - ASSERT_NO_THROW(cfg.set_user_property(bool_property(true))); - ASSERT_EQ(cfg.m_bool_property.value, false); // user property doesn't impact member value until finalize() is called - - cfg.m_bool_property.value = true; - ASSERT_EQ(cfg.get_property(bool_property), true); + cfg.m_int_property.value = 2; + ASSERT_EQ(cfg.get_property(int_property), 1); // still 1 as user property was set previously } TEST(plugin_config, finalization_updates_member) { NotEmptyTestConfig cfg; - ASSERT_NO_THROW(cfg.set_user_property(bool_property(false))); + ASSERT_NO_THROW(cfg.set_property(bool_property(false))); ASSERT_EQ(cfg.m_bool_property.value, true); // user property doesn't impact member value until finalize() is called cfg.finalize(nullptr, {}); @@ -146,7 +138,7 @@ TEST(plugin_config, get_property_before_finalization_returns_user_property_if_se cfg.m_bool_property.value = false; // update member directly ASSERT_EQ(cfg.get_property(bool_property), false); // OK, return the class member value as no user property was set - ASSERT_NO_THROW(cfg.set_user_property(bool_property(true))); + ASSERT_NO_THROW(cfg.set_property(bool_property(true))); ASSERT_TRUE(cfg.is_set_by_user(bool_property)); ASSERT_EQ(cfg.get_property(bool_property), true); // now user property value is returned ASSERT_EQ(cfg.m_bool_property.value, false); // but class member is not updated @@ -159,7 +151,7 @@ TEST(plugin_config, get_property_before_finalization_returns_user_property_if_se TEST(plugin_config, finalization_updates_dependant_properties) { NotEmptyTestConfig cfg; - cfg.set_user_property(high_level_property("value1")); + cfg.set_property(high_level_property("value1")); ASSERT_TRUE(cfg.is_set_by_user(high_level_property)); ASSERT_FALSE(cfg.is_set_by_user(low_level_property)); @@ -196,7 +188,7 @@ TEST(plugin_config, can_copy_config) { cfg1.m_high_level_property.value = "value1"; cfg1.m_low_level_property.value = "value2"; cfg1.m_int_property.value = 1; - cfg1.set_user_property(bool_property(false)); + cfg1.set_property(bool_property(false)); NotEmptyTestConfig cfg2 = cfg1; ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); @@ -211,10 +203,10 @@ TEST(plugin_config, can_copy_config) { ASSERT_EQ(cfg2.m_int_property.value, 1); } -TEST(plugin_config, set_user_property_throw_for_non_release_options) { +TEST(plugin_config, set_property_throw_for_non_release_options) { NotEmptyTestConfig cfg; - ASSERT_ANY_THROW(cfg.set_user_property(release_internal_property(10))); - ASSERT_ANY_THROW(cfg.set_user_property(debug_property(10))); + ASSERT_ANY_THROW(cfg.set_property(release_internal_property(10))); + ASSERT_ANY_THROW(cfg.set_property(debug_property(10))); } TEST(plugin_config, visibility_is_correct) { diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp index 49a45ec9ffa11a..28a20fa737da76 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp @@ -44,7 +44,6 @@ class Plugin : public ov::IPlugin { bool is_metric(const std::string& name) const; ov::Any get_metric(const std::string& name, const ov::AnyMap& arguments) const; - void set_cache_info(const std::shared_ptr& model, ExecutionConfig& properties) const; public: Plugin(); diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index 85ef9d23aa96bd..e7246662b06500 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -26,10 +26,11 @@ struct ExecutionConfig : public ov::PluginConfig { #include "intel_gpu/runtime/options.inl" #undef OV_CONFIG_OPTION +protected: void finalize_impl(std::shared_ptr context) override; void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override; + const ov::PluginConfig::OptionsDesc& get_options_desc() const override; -private: void apply_user_properties(const cldnn::device_info& info); void apply_hints(const cldnn::device_info& info); void apply_execution_hints(const cldnn::device_info& info); diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl index d5da1edf81bd69..1941aaec69b2bf 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -9,7 +9,7 @@ OV_CONFIG_RELEASE_OPTION(ov, cache_dir, "", "Directory where model cache can be OV_CONFIG_RELEASE_OPTION(ov, num_streams, 1, "Number of streams to be used for inference") OV_CONFIG_RELEASE_OPTION(ov, compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism") OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f16, - [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision") + [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision. Supported values: { f16, f32, undefined }") OV_CONFIG_RELEASE_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings") OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc") OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy") diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp index 3af0300602d7bf..38b0795ceddcc3 100644 --- a/src/plugins/intel_gpu/src/graph/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp @@ -149,7 +149,7 @@ void broadcast_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index e3ff36ceae38a5..7d373d1e6c3a92 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -278,7 +278,7 @@ void crop_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout()); diff --git a/src/plugins/intel_gpu/src/graph/eltwise.cpp b/src/plugins/intel_gpu/src/graph/eltwise.cpp index a370e8ba260f8b..7a3b7b6b5b93ec 100644 --- a/src/plugins/intel_gpu/src/graph/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/eltwise.cpp @@ -393,7 +393,7 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) : ""); } } else { - bool use_new_shape_infer = network.get_config().get_property(ov::intel_gpu::allow_new_shape_infer); + bool use_new_shape_infer = network.get_config().m_allow_new_shape_infer; auto input0_pshape = node.get_input_pshape(0); for (size_t i = 1; i < inputs_count; ++i) { diff --git a/src/plugins/intel_gpu/src/graph/gather.cpp b/src/plugins/intel_gpu/src/graph/gather.cpp index 6a361563653092..549850560e903d 100644 --- a/src/plugins/intel_gpu/src/graph/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/gather.cpp @@ -150,7 +150,7 @@ void gather_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp index 462809268db88a..3e0e8aa2f61bf5 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp @@ -161,7 +161,7 @@ bool add_required_reorders::test_format(cldnn::program_node& node, format reques } void add_required_reorders::run(program& p) { - bool optimize_data = p.get_config().get_property(ov::intel_gpu::optimize_data); + bool optimize_data = p.get_config().m_optimize_data; auto usr_itr = p.get_processing_order().begin(); while (usr_itr != p.get_processing_order().end()) { auto& usr = *usr_itr++; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp index 4c1b1008434144..999e103c3fe200 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp @@ -11,7 +11,7 @@ using namespace cldnn; void build_implementations::run(program& p) { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "pass::build_implementations"); - if (p.get_config().get_property(ov::intel_gpu::partial_build_program)) { + if (p.get_config().m_partial_build_program) { return; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp index 2f2015c6f8a303..692f767926520c 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp @@ -17,7 +17,7 @@ using namespace cldnn; namespace cldnn { void graph_initializations::set_outputs(program& p) { - auto custom_outputs = p.get_config().get_property(ov::intel_gpu::custom_outputs); + auto custom_outputs = p.get_config().m_custom_outputs.value; if (!custom_outputs.empty()) { for (auto const& output : custom_outputs) { OPENVINO_ASSERT(p.has_node(output), "not found custom output node in current cldnn::program: ", output); @@ -37,7 +37,7 @@ void graph_initializations::set_outputs(program& p) { void graph_initializations::run(program& p) { set_outputs(p); - auto forcing_map = p.get_config().get_property(ov::intel_gpu::force_implementations); + auto forcing_map = p.get_config().m_force_implementations.value; for (auto& kv : forcing_map) { if (p.has_node(kv.first)) { p.get_node(kv.first).set_forced_impl_type(kv.second.impl_type); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp index a4129800733875..3efc2ba341596a 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp @@ -143,8 +143,8 @@ propagate_constants::calculate(engine& engine, return {}; ExecutionConfig cf_config = config; - cf_config.set_property(ov::intel_gpu::optimize_data(false)); - cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs)); + cf_config.m_optimize_data = false; + cf_config.m_custom_outputs = const_outputs; network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true); std::map, std::shared_ptr>> weightless_cache_map; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp index fcd6dab33754fd..ac7714d1b60542 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp @@ -74,7 +74,7 @@ void select_preferred_formats::run(program& p) { } #endif // ENABLE_ONEDNN_FOR_GPU - auto forcing_map = p.get_config().get_property(ov::intel_gpu::force_implementations); + auto forcing_map = p.get_config().m_force_implementations.value; for (auto n : p.get_processing_order()) { n->recalc_output_layout(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index 110444c2c6255c..915a0ce6167c49 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -203,7 +203,7 @@ struct fully_connected_impl : typed_primitive_impl_ocl { params.quantization = kernel_selector::QuantizationType::NONE; } - params.dynamic_quantization_group_size = impl_param.get_program().get_config().get_property(ov::hint::dynamic_quantization_group_size); + params.dynamic_quantization_group_size = impl_param.get_program().get_config().m_dynamic_quantization_group_size; return params; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp index 42d83a0265d290..1c47853dac82d5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp @@ -1164,13 +1164,13 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p params.engineInfo.ip_version = device_info.ip_version; params.engineInfo.arch = kernel_selector::gpu_arch(static_cast::type>(device_info.arch)); - auto impl_forcing = config.get_property(ov::intel_gpu::force_implementations); + auto impl_forcing = config.m_force_implementations.value; if (impl_forcing.count(param_info.desc->id) != 0) { params.forceImplementation = impl_forcing.at(param_info.desc->id).kernel_name; } - params.allowStaticInputReordering = config.get_property(ov::intel_gpu::optimize_data) || config.get_property(ov::intel_gpu::allow_static_input_reorder); + params.allowStaticInputReordering = config.m_optimize_data || config.m_allow_static_input_reorder; params.allowInputReordering = false; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp index 5db452dcda26f0..18ea23e5843223 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp @@ -111,7 +111,7 @@ namespace cldnn { std::mutex kernels_cache::_mutex; std::string kernels_cache::get_cache_path() const { - auto path = _config.get_property(ov::cache_dir); + auto path = _config.m_cache_dir.value; if (path.empty()) { return {}; } @@ -123,12 +123,12 @@ std::string kernels_cache::get_cache_path() const { } bool kernels_cache::is_cache_enabled() const { - if (!_config.get_property(ov::intel_gpu::allow_new_shape_infer) && - (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SPEED)) { + if (!_config.m_allow_new_shape_infer && + (_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SPEED)) { return false; } - return !_config.get_property(ov::cache_dir).empty(); + return !_config.m_cache_dir.value.empty(); } size_t kernels_cache::get_max_kernels_per_batch() const { @@ -136,7 +136,7 @@ size_t kernels_cache::get_max_kernels_per_batch() const { GPU_DEBUG_IF(debug_config->max_kernels_per_batch >= 1) { return static_cast(debug_config->max_kernels_per_batch); } - return _config.get_property(ov::intel_gpu::max_kernels_per_batch); + return _config.m_max_kernels_per_batch; } void kernels_cache::get_program_source(const kernels_code& kernels_source_code, std::vector* all_batches) const { diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 275748da311081..01545a0305afdd 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -47,7 +47,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _engine(&engine), _attrs(attrs), _pd(pd) { - _enable_profiling = config.get_property(ov::enable_profiling); + _enable_profiling = config.m_enable_profiling; _scratchpad_md = _pd.scratchpad_desc(); @@ -70,7 +70,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _engine(&engine), _pd(), _prim() { - _enable_profiling = config.get_property(ov::enable_profiling); + _enable_profiling = config.m_enable_profiling; GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { _enable_profiling = true; @@ -318,7 +318,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { private: std::string get_cache_directory(const ExecutionConfig& config) const { - auto path = config.get_property(ov::cache_dir); + auto path = config.m_cache_dir.value; if (path.empty()) { return {}; } @@ -343,7 +343,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { void build_primitive(const ExecutionConfig& config) { auto cache_outpath = get_cache_directory(config); - if (!config.get_property(ov::intel_gpu::allow_new_shape_infer)) { + if (!config.m_allow_new_shape_infer) { cache_outpath = ""; } diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp index fdb2f151de8986..b135d9af73f31f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp @@ -58,7 +58,7 @@ std::unique_ptr ImplementationManager::create(const program_node if (auto impl = create_impl(node, params)) { update_impl(*impl, params); impl->set_node_params(node); - impl->can_share_kernels = node.get_program().get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse); + impl->can_share_kernels = node.get_program().get_config().m_enable_kernels_reuse; return impl; } diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp index bc944cdc5ac5c9..4f6f7dc12868c1 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp @@ -60,7 +60,7 @@ const std::vector>& Registry(scores_layout.get_partial_shape()[0].get_length()); const size_t kClassNum = static_cast(scores_layout.get_partial_shape()[1].get_length()); const size_t kNStreams = - static_cast(node.get_program().get_config().get_property(ov::streams::num)); + static_cast(node.get_program().get_config().m_num_streams.value); const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast(8)) * kNStreams; return kKeyValue > 64; } diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 5262e8c4621e72..a2cacf9724f33d 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -435,7 +435,7 @@ bool should_use_winograd_2x3_s1(const convolution_node& node, layout const& input_layout, layout const& weights_layout, bool output_size_handling_enabled) { - bool disable_winograd_conv = node.get_program().get_config().get_property(ov::intel_gpu::disable_winograd_convolution); + bool disable_winograd_conv = node.get_program().get_config().m_disable_winograd_convolution; if (disable_winograd_conv) return false; diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 37152b0d9e3b4f..7547fa5952b800 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -180,9 +180,9 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo , _memory_pool(new memory_pool(program->get_engine())) , _internal(is_internal) , _is_primary_stream(is_primary_stream) - , _enable_profiling(program->get_config().get_property(ov::enable_profiling)) + , _enable_profiling(program->get_config().m_enable_profiling) , _reset_arguments(true) - , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_property(ov::intel_gpu::buffers_preallocation_ratio))) { + , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().m_buffers_preallocation_ratio)) { if (!_internal) { net_id = get_unique_net_id(); } @@ -364,7 +364,7 @@ void network::calculate_weights_cache_capacity() { } // Sum all weights constants for each stream - required_mem_size += weights_const_size * _config.get_property(ov::streams::num); + required_mem_size += weights_const_size * _config.m_num_streams.value; // Add all other constants (shared between streams) required_mem_size += total_const_size - weights_const_size; diff --git a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp index ba1cbbdf7816dc..94a85cfbace47a 100644 --- a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp @@ -157,7 +157,7 @@ void non_max_suppression_gather_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[i]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[i].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[i] = {_network.get_engine().reinterpret_buffer(input_memory(i), _impl_params->get_output_layout(i))}; diff --git a/src/plugins/intel_gpu/src/graph/permute.cpp b/src/plugins/intel_gpu/src/graph/permute.cpp index 2e4c792729f306..c01cb15e7bba62 100644 --- a/src/plugins/intel_gpu/src/graph/permute.cpp +++ b/src/plugins/intel_gpu/src/graph/permute.cpp @@ -146,7 +146,7 @@ void permute_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 0737362405ff9c..0016b026558778 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -163,7 +163,7 @@ static memory::ptr get_memory_from_pool(engine& _engine, OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate output for dynamic layout without upper bound"); // Use layout with max tensor for dynamic shape with upper bound - if (_node.get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + if (_node.get_program().get_config().m_enable_memory_pool) { if (curr_memory != nullptr) pool.release_memory(curr_memory, _node.get_unique_id(), _node.id(), net_id); return pool.get_memory(layout, @@ -2029,7 +2029,7 @@ primitive_inst::primitive_inst(network & network, program_node const& node, bool : _network(network) , _node(&node) , _node_output_layout(node.get_output_layout()) - , _use_shared_kernels(node.get_program().get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse)) + , _use_shared_kernels(node.get_program().get_config().m_enable_kernels_reuse) , _impl_params(node.get_kernel_impl_params()) , _impl(node.get_selected_impl() ? node.get_selected_impl()->clone() : nullptr) , _runtime_memory_dependencies(node.get_memory_dependencies()) @@ -2577,8 +2577,8 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() { ExecutionConfig subgraph_config{ ov::intel_gpu::allow_static_input_reorder(true), ov::intel_gpu::allow_new_shape_infer(true), - ov::enable_profiling(get_network().get_config().get_property(ov::enable_profiling)), - ov::intel_gpu::use_onednn(get_network().get_config().get_property(ov::intel_gpu::use_onednn)) + ov::enable_profiling(get_network().get_config().m_enable_profiling), + ov::intel_gpu::use_onednn(get_network().get_config().m_use_onednn) }; auto prog = program::build_program(get_network().get_engine(), t, diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 33afd9edcf1d4f..bfb2092ceb8d33 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -108,8 +108,8 @@ using namespace cldnn; using namespace ov::intel_gpu; static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags, int num_streams = 0) { - int streams = (num_streams > 0) ? num_streams : config.get_property(ov::compilation_num_threads); - auto priority = config.get_property(ov::intel_gpu::hint::host_task_priority); + int streams = (num_streams > 0) ? num_streams : config.m_compilation_num_threads.value; + auto priority = config.m_host_task_priority; auto core_type = ov::hint::SchedulingCoreType::ANY_CORE; switch (priority) { case ov::hint::Priority::LOW: core_type = ov::hint::SchedulingCoreType::ECORE_ONLY; break; @@ -117,7 +117,7 @@ static ov::threading::IStreamsExecutor::Config make_task_executor_config(const E case ov::hint::Priority::HIGH: core_type = ov::hint::SchedulingCoreType::PCORE_ONLY; break; default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority); } - bool enable_cpu_pinning = config.get_property(ov::hint::enable_cpu_pinning); + bool enable_cpu_pinning = config.m_enable_cpu_pinning; ov::threading::IStreamsExecutor::Config task_executor_config(tags, streams, @@ -215,7 +215,7 @@ program::program(engine& engine, const ExecutionConfig& config) init_primitives(); auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); _config.finalize(ctx, {}); - new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer); + new_shape_infer = _config.m_allow_new_shape_infer; _layout_optimizer = cldnn::make_unique(); } @@ -227,14 +227,14 @@ void program::init_program() { set_options(); pm = std::unique_ptr(new pass_manager(*this)); - new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer); + new_shape_infer = _config.m_allow_new_shape_infer; if (_task_executor == nullptr) _task_executor = program::make_task_executor(_config); _kernels_cache = std::unique_ptr(new kernels_cache(_engine, _config, prog_id, _task_executor, kernel_selector::KernelBase::get_db().get_batch_headers())); - _kernels_cache->set_kernels_reuse(get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse)); + _kernels_cache->set_kernels_reuse(get_config().m_enable_kernels_reuse); if (!_compilation_context) _compilation_context = program::make_compilation_context(_config); @@ -488,13 +488,13 @@ void program::set_options() { static std::atomic id_gen{0}; prog_id = ++id_gen; assert(prog_id != 0); - if (!_config.get_property(ov::intel_gpu::force_implementations).empty()) { - _config.set_property(ov::intel_gpu::optimize_data(true)); + if (!_config.m_force_implementations.value.empty()) { + _config.m_optimize_data = true; } GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - _config.set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); + _config.m_dump_graphs = debug_config->dump_graphs; } } @@ -532,7 +532,7 @@ void program::init_graph() { if (!node->is_type()) node->get_output_layouts(); if (node->is_type()) { - _config.set_property(ov::intel_gpu::use_onednn(true)); + _config.m_use_onednn = true; } } // Perform initial shape_of subgraphs markup @@ -551,7 +551,7 @@ void program::pre_optimize_graph(bool is_internal) { bool output_size_handling_enabled = analyze_output_size_handling_need(); - bool optimize_data = _config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = _config.m_optimize_data; if (optimize_data) { apply_opt_pass(); } @@ -628,7 +628,7 @@ void program::post_optimize_graph(bool is_internal) { reorder_factory rf; - bool optimize_data = _config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = _config.m_optimize_data; if (!is_internal) { apply_opt_pass(rf); @@ -636,7 +636,7 @@ void program::post_optimize_graph(bool is_internal) { apply_opt_pass(false, true); // TODO: do we need it at this place also? - auto partial_build = _config.get_property(ov::intel_gpu::partial_build_program); + auto partial_build = _config.m_partial_build_program; #ifdef GPU_DEBUG_CONFIG GPU_DEBUG_GET_INSTANCE(debug_config); if (!is_internal && (!partial_build || !debug_config->dry_run_path.empty())) { @@ -655,7 +655,7 @@ void program::post_optimize_graph(bool is_internal) { // Recalculate processing order after all graph transformation to keep optimal primitives ordering // for OOO queue - if (_config.get_property(ov::intel_gpu::queue_type) == QueueTypes::out_of_order) + if (_config.m_queue_type == QueueTypes::out_of_order) get_processing_order().calculate_BFS_processing_order(); } @@ -777,7 +777,7 @@ const std::vector& program::get_allocating_order(bool forced_updat } void program::prepare_memory_dependencies() { - if (!_config.get_property(ov::intel_gpu::enable_memory_pool)) + if (!_config.m_enable_memory_pool) return; for (auto& node : get_processing_order()) { node->add_memory_dependency(node->get_unique_id()); @@ -1388,7 +1388,7 @@ program::primitives_info program::get_current_stage_info() const { void program::save_pass_info(std::string pass_name) { // TODO: Directory path here can be probably changed to some bool flag - if (!_config.get_property(ov::intel_gpu::dump_graphs).empty()) + if (!_config.m_dump_graphs.value.empty()) optimizer_passes_info.emplace_back(pass_name, get_current_stage_info()); } @@ -1416,7 +1416,7 @@ const program::primitives_info& program::get_primitives_info() const { return pr void program::apply_opt_pass(base_pass& pass) { pm->run(*this, pass); } void program::set_layout_optimizer_attributes(layout_optimizer& lo) { - lo.set_implementation_forcing(_config.get_property(ov::intel_gpu::force_implementations)); + lo.set_implementation_forcing(_config.m_force_implementations); // first pass to set layout optimization_attributes for topology @@ -1640,15 +1640,15 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 1); #ifdef ENABLE_ONEDNN_FOR_GPU - bool enable_onednn_for_tests = get_config().get_property(ov::intel_gpu::optimize_data) || is_internal_program(); + bool enable_onednn_for_tests = get_config().m_optimize_data || is_internal_program(); auto& engine = get_engine(); if (engine.get_device_info().vendor_id == INTEL_VENDOR_ID && - get_config().get_property(ov::intel_gpu::queue_type) == QueueTypes::in_order && + get_config().m_queue_type == QueueTypes::in_order && enable_onednn_for_tests) { if (engine.get_device_info().supports_immad) { lo.add_all_onednn_impls_optimization_attribute(); } else { - if (get_config().get_property(ov::intel_gpu::use_onednn)) { + if (get_config().m_use_onednn) { lo.enable_onednn_for(); } } @@ -1856,8 +1856,8 @@ void program::load(cldnn::BinaryInputBuffer& ib) { init_program(); std::shared_ptr mapped_memory = nullptr; - std::string weights_path = _config.get_property(ov::weights_path); - if (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && + std::string weights_path = _config.m_weights_path; + if (_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE && ov::util::validate_weights_path(weights_path)) { mapped_memory = ov::load_mmap_object(weights_path); } diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp index 4a2f43b28d9360..fffdabd68b5779 100644 --- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp @@ -157,7 +157,7 @@ void dump_full_node(std::ofstream& out, const program_node* node) { } // namespace std::string get_dir_path(const ExecutionConfig& config) { - auto path = config.get_property(ov::intel_gpu::dump_graphs); + auto path = config.m_dump_graphs.value; if (path.empty()) { return {}; } diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index 93698432e73be0..8041a91656117b 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -287,7 +287,7 @@ void reorder_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index e5e33f4ad87b14..f5468fde71b557 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -320,7 +320,7 @@ void reshape_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp index ee8850fbd46220..244099c7736e2f 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp @@ -75,7 +75,7 @@ void scatter_elements_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp index ba0cea2e32299e..510b6be55bbdb1 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp @@ -86,7 +86,7 @@ void scatter_nd_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_update.cpp index 8d10f9ad2b4fd7..78bf350331093e 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_update.cpp @@ -66,7 +66,7 @@ void scatter_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/select.cpp b/src/plugins/intel_gpu/src/graph/select.cpp index 9e0902e1f2ad4e..159398ecc494a3 100644 --- a/src/plugins/intel_gpu/src/graph/select.cpp +++ b/src/plugins/intel_gpu/src/graph/select.cpp @@ -95,7 +95,7 @@ select_inst::typed_primitive_inst(network& network, select_node const& node) : p 3, ""); - bool allow_new_shape_infer = network.get_program()->get_config().get_property(ov::intel_gpu::allow_new_shape_infer); + bool allow_new_shape_infer = network.get_program()->get_config().m_allow_new_shape_infer; // Broadcast check is performed in ngraph shape infer of select when allow_new_shape_infer=true if (!allow_new_shape_infer) { if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NONE) { diff --git a/src/plugins/intel_gpu/src/graph/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/strided_slice.cpp index 47248cd2a4d773..f8e943e380033a 100644 --- a/src/plugins/intel_gpu/src/graph/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/strided_slice.cpp @@ -208,7 +208,7 @@ void strided_slice_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 810353fe626c19..83153f31f976fa 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -21,20 +21,20 @@ namespace intel_gpu { namespace { std::shared_ptr create_task_executor(const std::shared_ptr& plugin, const ExecutionConfig& config) { - if (config.get_property(ov::internal::exclusive_async_requests)) { + if (config.m_exclusive_async_requests) { // exclusive_async_requests essentially disables the streams (and hence should be checked first) => aligned with // the CPU behavior return plugin->get_executor_manager()->get_executor("GPU"); - } else if (config.get_property(ov::hint::enable_cpu_pinning)) { + } else if (config.m_enable_cpu_pinning) { return std::make_shared( ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", - config.get_property(ov::num_streams), + config.m_num_streams.value, 1, ov::hint::SchedulingCoreType::PCORE_ONLY, true}); } else { return std::make_shared( - ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", config.get_property(ov::num_streams)}); + ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", config.m_num_streams.value}); } } } // namespace @@ -53,7 +53,7 @@ CompiledModel::CompiledModel(std::shared_ptr model, m_outputs(ov::ICompiledModel::outputs()), m_loaded_from_cache(false) { auto graph_base = std::make_shared(model, m_context, m_config, 0); - for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { + for (uint16_t n = 0; n < m_config.m_num_streams.value; n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -148,7 +148,7 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, } auto graph_base = std::make_shared(ib, context, m_config, 0); - for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { + for (uint16_t n = 0; n < m_config.m_num_streams.value; n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -170,8 +170,8 @@ std::shared_ptr CompiledModel::create_infer_request() co void CompiledModel::export_model(std::ostream& model) const { // If ov::CacheMode::OPTIMIZE_SIZE is set, do the export iff it's possible to do weightless caching // which requires the weights_path. - ov::CacheMode cache_mode = m_config.get_property(ov::cache_mode); - std::string weights_path = m_config.get_property(ov::weights_path); + ov::CacheMode cache_mode = m_config.m_cache_mode; + std::string weights_path = m_config.m_weights_path; if (cache_mode == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) return; @@ -179,7 +179,7 @@ void CompiledModel::export_model(std::ostream& model) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model"); OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded"); - const ov::EncryptionCallbacks encryption_callbacks = m_config.get_property(ov::cache_encryption_callbacks); + const ov::EncryptionCallbacks encryption_callbacks = m_config.m_cache_encryption_callbacks; // Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty. const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; @@ -280,8 +280,8 @@ ov::Any CompiledModel::get_property(const std::string& name) const { } else if (name == ov::loaded_from_cache) { return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache}; } else if (name == ov::optimal_number_of_infer_requests) { - unsigned int nr = m_config.get_property(ov::num_streams); - if (m_config.get_property(ov::hint::performance_mode) != ov::hint::PerformanceMode::LATENCY) + unsigned int nr = m_config.m_num_streams.value; + if (m_config.m_performance_mode != ov::hint::PerformanceMode::LATENCY) nr *= 2; return decltype(ov::optimal_number_of_infer_requests)::value_type {nr}; } else if (name == ov::execution_devices) { diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index c3d74feffb5599..0485f1cc712b5a 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -87,13 +87,9 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context } } { - bool bool_prop_value; - ib >> bool_prop_value; - m_config.set_property(ov::intel_gpu::partial_build_program(bool_prop_value)); - ib >> bool_prop_value; - m_config.set_property(ov::intel_gpu::optimize_data(bool_prop_value)); - ib >> bool_prop_value; - m_config.set_property(ov::intel_gpu::allow_new_shape_infer(bool_prop_value)); + ib >> m_config.m_partial_build_program.value; + ib >> m_config.m_optimize_data.value; + ib >> m_config.m_allow_new_shape_infer.value; } auto imported_prog = std::make_shared(get_engine(), m_config); @@ -178,7 +174,7 @@ void Graph::build(std::shared_ptr program) { auto external_queue = m_context->get_external_queue(); if (external_queue) { - OPENVINO_ASSERT(m_config.get_property(ov::num_streams) == 1, "[GPU] Throughput streams can't be used with shared queue!"); + OPENVINO_ASSERT(m_config.m_num_streams == 1, "[GPU] Throughput streams can't be used with shared queue!"); const auto &engine = program->get_engine(); m_network = std::make_shared(program, engine.create_stream(m_config, external_queue), m_stream_id); } else { @@ -210,7 +206,7 @@ bool Graph::use_external_queue() const { std::shared_ptr Graph::get_runtime_model(std::vector& primitives_info, bool filter_const_primitives) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::get_runtime_model"); - if (m_config.get_property(ov::enable_profiling)) { + if (m_config.m_enable_profiling) { try { // Update may throw an exception for step-by-step runtime graph dump, // since network->get_executed_primitives() method can't be called before network execution @@ -522,9 +518,9 @@ void Graph::export_model(cldnn::BinaryOutputBuffer &ob) { } } { - ob << m_config.get_property(ov::intel_gpu::partial_build_program); - ob << m_config.get_property(ov::intel_gpu::optimize_data); - ob << m_config.get_property(ov::intel_gpu::allow_new_shape_infer); + ob << m_config.m_partial_build_program.value; + ob << m_config.m_optimize_data.value; + ob << m_config.m_allow_new_shape_infer.value; } ob.set_stream(m_network->get_stream_ptr().get()); diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index c72dc9d11e00a8..bc298c5c816d71 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -23,12 +23,12 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ auto config = p.get_config(); { - auto custom_outputs = config.get_property(ov::intel_gpu::custom_outputs); + auto custom_outputs = config.m_custom_outputs.value; if (!custom_outputs.empty()) { - config.set_property(ov::intel_gpu::custom_outputs(std::vector({}))); + config.m_custom_outputs = std::vector({}); } } - config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer())); + config.m_allow_new_shape_infer = op->is_dynamic() || p.use_new_shape_infer(); ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true); branch.inner_program = prog.get_compiled_program(); diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index e1bc6055fd4349..81e57e148f0b93 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -299,8 +299,8 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr("runtime_options"); - return {}; + rt_info = model.get_rt_info("runtime_options"); + + if (model.has_rt_info("__weights_path")) { + rt_info[ov::weights_path.name()] = model.get_rt_info("__weights_path"); + } + return rt_info; } } // namespace @@ -174,22 +179,6 @@ Plugin::Plugin() { m_compiled_model_runtime_properties["OV_VERSION"] = ov_version.buildNumber; } -void Plugin::set_cache_info(const std::shared_ptr& model, ExecutionConfig& config) const { - // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with - // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not - // using that mechanism. - if (config.get_property(ov::cache_mode) != ov::CacheMode::OPTIMIZE_SIZE) { - return; - } - - const auto& rt_info = model->get_rt_info(); - auto weights_path = rt_info.find("__weights_path"); - if (weights_path != rt_info.end()) { - ov::AnyMap weights_path_property{{"WEIGHTS_PATH", weights_path->second}}; - config.set_property(weights_path_property); - } -} - std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, const ov::AnyMap& orig_config) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::compile_model"); std::string device_id = get_device_id(orig_config); @@ -199,11 +188,9 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(orig_config); + config.set_property(orig_config); config.finalize(context, get_rt_info(*model)); - set_cache_info(model, config); - auto transformed_model = clone_and_transform_model(model, config, context); { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::compile_model::CreateCompiledModel"); @@ -221,7 +208,6 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< ExecutionConfig config = m_configs_map.at(device_id); config.finalize(context_impl, get_rt_info(*model)); - set_cache_info(model, config); auto transformed_model = clone_and_transform_model(model, config, context_impl); return std::make_shared(transformed_model, shared_from_this(), context_impl, config); @@ -251,7 +237,7 @@ ov::SoPtr Plugin::get_default_context(const AnyMap& params) void Plugin::set_property(const ov::AnyMap &config) { auto update_config = [](ExecutionConfig& config, const ov::AnyMap& user_config) { - config.set_user_property(user_config); + config.set_property(user_config); // Check that custom layers config can be loaded if (user_config.find(ov::intel_gpu::config_file.name()) != user_config.end()) { CustomLayerMap custom_layers; @@ -286,12 +272,12 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& auto ctx = get_default_context(device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(orig_config); + config.set_property(orig_config); config.finalize(ctx, get_rt_info(*model)); ProgramBuilder prog(ctx->get_engine(), config); - float query_model_ratio = config.get_property(ov::internal::query_model_ratio.name()).as(); + float query_model_ratio = config.m_query_model_ratio; auto supported = ov::get_supported_nodes(model, [&config,&ctx,this](std::shared_ptr& model) { @@ -341,11 +327,11 @@ std::shared_ptr Plugin::import_model(std::istream& model, } ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(_orig_config); + config.set_property(_orig_config); config.finalize(context_impl, {}); - ov::CacheMode cache_mode = config.get_property(ov::cache_mode); - ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks); + ov::CacheMode cache_mode = config.m_cache_mode; + ov::EncryptionCallbacks encryption_callbacks = config.m_cache_encryption_callbacks; const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; std::unique_ptr ib_ptr = @@ -362,9 +348,8 @@ std::shared_ptr Plugin::import_model(std::istream& model, return nullptr; } - std::string weights_path = config.get_property(ov::weights_path); - if (config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && - !ov::util::validate_weights_path(weights_path)) { + std::string weights_path = config.m_weights_path; + if (config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) { return nullptr; } @@ -663,7 +648,7 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { auto context = get_default_contexts().at(device_id); const auto& device_info = context->get_engine().get_device_info(); const auto& config = m_configs_map.at(device_id); - uint32_t n_streams = static_cast(config.get_property(ov::num_streams)); + uint32_t n_streams = static_cast(config.m_num_streams.value); uint64_t occupied_device_mem = 0; auto statistic_result = get_metric(ov::intel_gpu::memory_statistics.name(), options).as>(); auto occupied_usm_dev = statistic_result.find("usm_device_current"); diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index a9bb813d0ce587..0234fbd5de4617 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -105,7 +105,7 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& config_path += "/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml"; CustomLayer::LoadFromFile(config_path, m_custom_layers, true); - auto custom_layers_config = m_config.get_property(ov::intel_gpu::config_file); + auto custom_layers_config = m_config.m_config_file.value; CustomLayer::LoadFromFile(custom_layers_config, m_custom_layers, custom_layers_config.empty()); auto ops = model->get_ordered_ops(); @@ -113,9 +113,9 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& // smaller # of kernels are built compared to static models. // So having smaller batch size is even better for dynamic model as we can do more parallel build. if (model->is_dynamic()) { - m_config.set_property(ov::intel_gpu::max_kernels_per_batch(4)); + m_config.m_max_kernels_per_batch = 4; } else { - m_config.set_property(ov::intel_gpu::max_kernels_per_batch(8)); + m_config.m_max_kernels_per_batch = 8; } m_program = build(ops, partial_build, is_inner_program); @@ -160,12 +160,12 @@ std::shared_ptr ProgramBuilder::build(const std::vectororigin_op_name = op.get_friendly_name(); prim->origin_op_type_name = op.get_type_name(); - if (this->m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) { + if (this->m_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE) { if (auto data_prim = dynamic_cast(prim.get())) { auto rt_info = op.get_rt_info(); @@ -340,7 +340,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrorigin_op_type_name = prim->type_string(); } - if (this->m_config.get_property(ov::enable_profiling) && should_profile) { + if (this->m_config.m_enable_profiling && should_profile) { profiling_ids.push_back(prim_id); init_profile_info(*prim); } diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index f87f9af5275722..0133bcf7fcfaa6 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -114,8 +114,8 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr& c : ov::ISyncInferRequest(compiled_model) , m_graph(compiled_model->get_graph(0)) , m_context(std::static_pointer_cast(compiled_model->get_context_impl())) - , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_property(ov::intel_gpu::buffers_preallocation_ratio))) - , m_enable_profiling(m_graph->get_config().get_property(ov::enable_profiling)) + , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().m_buffers_preallocation_ratio)) + , m_enable_profiling(m_graph->get_config().m_enable_profiling) , m_use_external_queue(m_graph->use_external_queue()) { GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) { @@ -415,7 +415,7 @@ void SyncInferRequest::wait() { auto mem_shape = output_layout.get_shape(); // In case of old shape infer we need to shrink out tensor shape to avoid redudnant dimensions that occur due to rank extension // For new shape infer this shouldn't happen, thus remove that WA once we migrate to ngraph-based shape infer for all cases - if (!m_graph->get_config().get_property(ov::intel_gpu::allow_new_shape_infer)) { + if (!m_graph->get_config().m_allow_new_shape_infer) { OPENVINO_ASSERT(port.get_partial_shape().is_static(), "[GPU] Unexpected dynamic shape for legacy shape inference"); OPENVINO_ASSERT(ov::shape_size(port.get_shape()) == ov::shape_size(mem_shape), "[GPU] Unexpected elements count for output tensor"); mem_shape = port.get_shape(); @@ -888,7 +888,7 @@ std::vector SyncInferRequest::prepare_input(const std::string auto memory = device_tensor->get_memory(); // WA to extend shape to ranks expected by legacy shape infer. Remove after full migration to new shape infer - if (!m_graph->get_config().get_property(ov::intel_gpu::allow_new_shape_infer)) { + if (!m_graph->get_config().m_allow_new_shape_infer) { auto new_layout = memory->get_layout(); new_layout.set_partial_shape(m_graph->get_input_layouts().at(input_idx).get_shape()); memory = engine.reinterpret_buffer(*memory, new_layout); diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 44d68740a0dfb7..2ea04290c356e8 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -282,7 +282,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { const auto& defaultPrecisions = ov::pass::low_precision::precision_set::get_int8_support(); const ov::element::TypeVector supported_woq_types = {ov::element::u8, ov::element::i8, ov::element::u4, ov::element::i4}; bool enableInt8; - bool unroll_loop = config.get_property(ov::intel_gpu::enable_loop_unrolling); + bool unroll_loop = config.m_enable_loop_unrolling; { ov::pass::Manager manager("Plugin:GPU"); auto pass_config = manager.get_pass_config(); @@ -295,7 +295,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); - enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && is_model_quantized; + enableInt8 = config.m_enable_lp_transformations && is_model_quantized; manager.register_pass( std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }, @@ -328,7 +328,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { }; // Add conversion from FP data types to infer precision if it's specified - auto infer_precision = config.get_property(ov::hint::inference_precision); + auto infer_precision = config.m_inference_precision.value; if (infer_precision != ov::element::undefined) { if (!fp_precision_supported(infer_precision)) infer_precision = fallback_precision; @@ -409,7 +409,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { GPU_DEBUG_CODE(return cldnn::debug_configuration::get_instance()->enable_sdpa == 1); } - if (!config.get_property(ov::intel_gpu::hint::enable_sdpa_optimization)) + if (!config.m_enable_sdpa_optimization) return false; auto sdpa = std::dynamic_pointer_cast(node); @@ -946,7 +946,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); manager.register_pass(device_info.supports_immad); - manager.register_pass(config.get_property(ov::hint::activations_scale_factor)); + manager.register_pass(config.m_activations_scale_factor); if (!device_info.supports_immad) { manager.register_pass(); @@ -956,7 +956,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - auto kv_cache_compression_dt = config.get_property(ov::hint::kv_cache_precision); + auto kv_cache_compression_dt = config.m_kv_cache_precision; manager.register_pass(kv_cache_compression_dt); manager.register_pass(); @@ -981,7 +981,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); if (device_info.supports_immad) { - auto dynamic_quantization_group_size = config.get_property(ov::hint::dynamic_quantization_group_size); + auto dynamic_quantization_group_size = config.m_dynamic_quantization_group_size; pass_config->set_callback([=](const_node_ptr& root) -> bool { if (root->get_input_node_shared_ptr(0)->get_element_type() == ov::element::Type_t::f32) { GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: input type is not supported" << std::endl; diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 5170bc48b955d7..c08b30484eb87e 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -7,6 +7,8 @@ #include "openvino/core/any.hpp" #include "openvino/runtime/internal_properties.hpp" #include "intel_gpu/runtime/internal_properties.hpp" +#include "openvino/runtime/plugin_config.hpp" +#include "openvino/runtime/properties.hpp" namespace ov { @@ -40,29 +42,36 @@ void ExecutionConfig::apply_rt_info(std::shared_ptr context, con apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); } apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); + + // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with + // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not + // using that mechanism. + if (get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) { + apply_rt_info_property(ov::weights_path, rt_info); + } } void ExecutionConfig::finalize_impl(std::shared_ptr context) { const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); apply_hints(info); if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { - set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); + m_enable_lp_transformations = info.supports_imad || info.supports_immad; } if (info.supports_immad) { - set_property(ov::intel_gpu::use_onednn(true)); + m_use_onednn = true; } if (get_property(ov::intel_gpu::use_onednn)) { - set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + m_queue_type = QueueTypes::in_order; } // Enable KV-cache compression by default for non-systolic platforms if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); + m_kv_cache_precision = ov::element::i8; } // Enable dynamic quantization by default for non-systolic platforms if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { - set_property(ov::hint::dynamic_quantization_group_size(32)); + m_dynamic_quantization_group_size = 32; } } @@ -77,12 +86,12 @@ void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { const auto mode = get_property(ov::hint::execution_mode); if (!is_set_by_user(ov::hint::inference_precision)) { if (mode == ov::hint::ExecutionMode::ACCURACY) { - set_property(ov::hint::inference_precision(ov::element::undefined)); + m_inference_precision = ov::element::undefined; } else if (mode == ov::hint::ExecutionMode::PERFORMANCE) { if (info.supports_fp16) - set_property(ov::hint::inference_precision(ov::element::f16)); + m_inference_precision = ov::element::f16; else - set_property(ov::hint::inference_precision(ov::element::f32)); + m_inference_precision = ov::element::f32; } } } @@ -93,26 +102,26 @@ void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { const auto mode = get_property(ov::hint::performance_mode); if (!is_set_by_user(ov::num_streams)) { if (mode == ov::hint::PerformanceMode::LATENCY) { - set_property(ov::num_streams(1)); + m_num_streams = 1; } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) { - set_property(ov::num_streams(ov::streams::AUTO)); + m_num_streams = ov::streams::AUTO; } } } if (get_property(ov::num_streams) == ov::streams::AUTO) { int32_t n_streams = std::max(info.num_ccs, 2); - set_property(ov::num_streams(n_streams)); + m_num_streams = n_streams; } if (get_property(ov::internal::exclusive_async_requests)) { - set_property(ov::num_streams(1)); + m_num_streams = 1; } // Allow kernels reuse only for single-stream scenarios if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) { if (get_property(ov::num_streams) != 1) { - set_property(ov::intel_gpu::hint::enable_kernels_reuse(false)); + m_enable_kernels_reuse = false; } } } @@ -121,10 +130,19 @@ void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::model_priority)) { const auto priority = get_property(ov::hint::model_priority); if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { - set_property(ov::intel_gpu::hint::queue_priority(priority)); + m_queue_priority = priority; } } } +const ov::PluginConfig::OptionsDesc& ExecutionConfig::get_options_desc() const { + static ov::PluginConfig::OptionsDesc help_map { + #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" + #undef OV_CONFIG_OPTION + }; + return help_map; +} + } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp index df1cad281d636c..e1046a1828c342 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp @@ -64,7 +64,7 @@ void ocl_engine::create_onednn_engine(const ExecutionConfig& config) { auto casted = std::dynamic_pointer_cast(_device); OPENVINO_ASSERT(casted, "[GPU] Invalid device type stored in ocl_engine"); - std::string cache_dir = config.get_property(ov::cache_dir); + std::string cache_dir = config.m_cache_dir; if (cache_dir.empty()) { _onednn_engine = std::make_shared(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get())); } else { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp index e227c94c7dc06d..61844cd640ea41 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp @@ -189,22 +189,22 @@ void set_arguments_impl(ocl_kernel_type& kernel, } // namespace ocl_stream::ocl_stream(const ocl_engine &engine, const ExecutionConfig& config) - : stream(config.get_property(ov::intel_gpu::queue_type), stream::get_expected_sync_method(config)) + : stream(config.m_queue_type, stream::get_expected_sync_method(config)) , _engine(engine) { auto context = engine.get_cl_context(); auto device = engine.get_cl_device(); ocl::command_queues_builder queue_builder; - queue_builder.set_profiling(config.get_property(ov::enable_profiling)); + queue_builder.set_profiling(config.m_enable_profiling); queue_builder.set_out_of_order(m_queue_type == QueueTypes::out_of_order); OPENVINO_ASSERT(m_sync_method != SyncMethods::none || m_queue_type == QueueTypes::in_order, "[GPU] Unexpected sync method (none) is specified for out_of_order queue"); bool priorty_extensions = engine.extension_supported("cl_khr_priority_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_priority_mode(config.get_property(ov::intel_gpu::hint::queue_priority), priorty_extensions); + queue_builder.set_priority_mode(config.m_queue_priority, priorty_extensions); bool throttle_extensions = engine.extension_supported("cl_khr_throttle_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_throttle_mode(config.get_property(ov::intel_gpu::hint::queue_throttle), throttle_extensions); + queue_builder.set_throttle_mode(config.m_queue_throttle, throttle_extensions); bool queue_families_extension = engine.get_device_info().supports_queue_families; queue_builder.set_supports_queue_families(queue_families_extension); diff --git a/src/plugins/intel_gpu/src/runtime/stream.cpp b/src/plugins/intel_gpu/src/runtime/stream.cpp index d79a144c08ee86..aba6a0d8681758 100644 --- a/src/plugins/intel_gpu/src/runtime/stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/stream.cpp @@ -20,8 +20,8 @@ QueueTypes stream::detect_queue_type(engine_types engine_type, void* queue_handl } SyncMethods stream::get_expected_sync_method(const ExecutionConfig& config) { - auto profiling = config.get_property(ov::enable_profiling); - auto queue_type = config.get_property(ov::intel_gpu::queue_type); + auto profiling = config.m_enable_profiling; + auto queue_type = config.m_queue_type; return profiling ? SyncMethods::events : queue_type == QueueTypes::out_of_order ? SyncMethods::barriers : SyncMethods::none; } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index f59dc5c42cffc1..6bf44a31add0f4 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -1555,7 +1555,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_user_property(ov::hint::dynamic_quantization_group_size(32)); + config.set_property(ov::hint::dynamic_quantization_group_size(32)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1643,7 +1643,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1669,7 +1669,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1753,7 +1753,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1780,9 +1780,9 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); if (is_dyn_quan) { - config.set_user_property(ov::hint::dynamic_quantization_group_size(32)); + config.set_property(ov::hint::dynamic_quantization_group_size(32)); } else { - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); } network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1923,7 +1923,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl = { in_layout.format, "", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim1", fc_impl }, { "fc_prim2", fc_impl } })); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1952,7 +1952,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -2905,7 +2905,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topo, config); network.set_input_data("input", input_mem); @@ -2931,7 +2931,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_user_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); + config.set_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false); @@ -3031,7 +3031,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bf_tiled", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topo, config); network.set_input_data("input", input_mem); @@ -3057,7 +3057,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_user_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); + config.set_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false); From 70cd3f579f7d166ad09b9500f9212cd9c638ac42 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 23 Dec 2024 13:23:35 +0400 Subject: [PATCH 10/18] refactor Signed-off-by: Vladimir Paramuzov --- src/inference/dev_api/openvino/runtime/plugin_config.hpp | 3 --- src/inference/src/dev/plugin_config.cpp | 5 ++++- src/plugins/intel_gpu/src/graph/program.cpp | 5 ----- src/plugins/intel_gpu/src/plugin/program_builder.cpp | 1 + src/plugins/intel_gpu/src/runtime/execution_config.cpp | 7 +++++++ 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index a1bcab62b5d5fd..04e384cc26d35e 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -197,7 +197,6 @@ class OPENVINO_RUNTIME_API PluginConfig { virtual void apply_debug_options(std::shared_ptr context); virtual void finalize_impl(std::shared_ptr context) {} - template bool is_set_by_user(const ov::Property& property) const { return m_user_properties.find(property.name()) != m_user_properties.end(); @@ -236,12 +235,10 @@ class OPENVINO_RUNTIME_API PluginConfig { // property variable name, string name, default value, description using OptionsDesc = std::vector>; - static OptionsDesc m_options_desc; virtual const OptionsDesc& get_options_desc() const { static OptionsDesc empty; return empty; } const std::string get_help_message(const std::string& name = "") const; void print_help() const; -private: bool m_is_finalized = false; }; diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index e1b09b76ad8235..ca1c87cce1b659 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -98,6 +98,9 @@ void PluginConfig::set_property(const ov::AnyMap& config, const std::vector context, const ov::RTMap& rt_info) { + if (m_is_finalized) + return; + apply_rt_info(context, rt_info); apply_debug_options(context); // Copy internal properties before applying hints to ensure that @@ -122,8 +125,8 @@ void PluginConfig::apply_debug_options(std::shared_ptr context) static std::vector allowed_visibility = { OptionVisibility::RELEASE, OptionVisibility::RELEASE_INTERNAL, -#ifdef ENABLE_DEBUG_CAPS OptionVisibility::DEBUG +#ifdef ENABLE_DEBUG_CAPS #endif }; diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index bfb2092ceb8d33..6ec1b9156bf266 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -491,11 +491,6 @@ void program::set_options() { if (!_config.m_force_implementations.value.empty()) { _config.m_optimize_data = true; } - - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - _config.m_dump_graphs = debug_config->dump_graphs; - } } void program::build_program(bool is_internal) { diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 0234fbd5de4617..ef28ba584b734d 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -10,6 +10,7 @@ #include "openvino/op/lstm_sequence.hpp" #include "openvino/op/loop.hpp" #include "openvino/op/search_sorted.hpp" +#include "openvino/runtime/properties.hpp" #include "ov_ops/dynamic_quantize.hpp" #include "intel_gpu/plugin/common_utils.hpp" diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index c08b30484eb87e..80337c8fcd4a5f 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -22,6 +22,7 @@ ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() { m_user_properties = other.m_user_properties; + m_is_finalized = other.m_is_finalized; for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } @@ -29,6 +30,7 @@ ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) { m_user_properties = other.m_user_properties; + m_is_finalized = other.m_is_finalized; for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } @@ -52,6 +54,11 @@ void ExecutionConfig::apply_rt_info(std::shared_ptr context, con } void ExecutionConfig::finalize_impl(std::shared_ptr context) { + if (m_help) { + print_help(); + exit(-1); + } + const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); apply_hints(info); if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { From 5d7d6b8fb4b7868cc929f9681efe0d5a4db1e570 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 23 Dec 2024 16:35:56 +0400 Subject: [PATCH 11/18] Hide config class members Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 16 ++++++ src/inference/src/dev/plugin_config.cpp | 14 ++++- .../intel_gpu/runtime/execution_config.hpp | 9 +++- src/plugins/intel_gpu/src/graph/broadcast.cpp | 2 +- src/plugins/intel_gpu/src/graph/crop.cpp | 2 +- src/plugins/intel_gpu/src/graph/eltwise.cpp | 2 +- .../intel_gpu/src/graph/fully_connected.cpp | 2 +- src/plugins/intel_gpu/src/graph/gather.cpp | 2 +- .../graph_optimizer/add_required_reorders.cpp | 2 +- .../graph_optimizer/build_implementations.cpp | 2 +- .../graph_optimizer/graph_initializations.cpp | 4 +- .../graph_optimizer/propagate_constants.cpp | 6 ++- .../select_preferred_formats.cpp | 2 +- .../src/graph/impls/ocl/fully_connected.cpp | 2 +- .../impls/ocl/kernel_selector_helper.cpp | 4 +- .../src/graph/impls/ocl/kernels_cache.cpp | 10 ++-- .../impls/onednn/primitive_onednn_base.h | 8 +-- .../impls/registry/implementation_manager.cpp | 2 +- .../registry/non_max_suppression_impls.cpp | 2 +- .../intel_gpu/src/graph/layout_optimizer.cpp | 2 +- src/plugins/intel_gpu/src/graph/network.cpp | 6 +-- .../src/graph/non_max_suppression.cpp | 2 +- src/plugins/intel_gpu/src/graph/permute.cpp | 2 +- .../intel_gpu/src/graph/primitive_inst.cpp | 8 +-- src/plugins/intel_gpu/src/graph/program.cpp | 54 ++++++++----------- .../src/graph/program_dump_graph.cpp | 2 +- src/plugins/intel_gpu/src/graph/reorder.cpp | 2 +- src/plugins/intel_gpu/src/graph/reshape.cpp | 2 +- .../src/graph/scatter_elements_update.cpp | 2 +- .../intel_gpu/src/graph/scatter_nd_update.cpp | 2 +- .../intel_gpu/src/graph/scatter_update.cpp | 2 +- src/plugins/intel_gpu/src/graph/select.cpp | 2 +- .../intel_gpu/src/graph/strided_slice.cpp | 2 +- .../intel_gpu/src/plugin/compiled_model.cpp | 22 ++++---- src/plugins/intel_gpu/src/plugin/graph.cpp | 16 ++---- .../intel_gpu/src/plugin/ops/condition.cpp | 11 ++-- src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 5 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 12 ++--- .../intel_gpu/src/plugin/program_builder.cpp | 43 +++++++++------ .../src/plugin/sync_infer_request.cpp | 8 +-- .../src/plugin/transformations_pipeline.cpp | 14 ++--- .../src/runtime/execution_config.cpp | 13 ++++- .../intel_gpu/src/runtime/ocl/ocl_engine.cpp | 2 +- .../intel_gpu/src/runtime/ocl/ocl_stream.cpp | 8 +-- src/plugins/intel_gpu/src/runtime/stream.cpp | 4 +- 45 files changed, 189 insertions(+), 152 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 04e384cc26d35e..9e566b216590cb 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -6,6 +6,7 @@ #include #include +#include "openvino/core/attribute_visitor.hpp" #include "openvino/runtime/iremote_context.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/core/except.hpp" @@ -47,6 +48,19 @@ #define OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ ConfigOption m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)}; +#define OV_CONFIG_DECLARE_GETTERS(PropertyNamespace, PropertyVar, Visibility, ...) \ + const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() const { \ + if (m_is_finalized) { \ + return m_ ## PropertyVar.value; \ + } else { \ + if (m_user_properties.find(PropertyNamespace::PropertyVar.name()) != m_user_properties.end()) { \ + return m_user_properties.at(PropertyNamespace::PropertyVar.name()).as(); \ + } else { \ + return m_ ## PropertyVar.value; \ + } \ + } \ + } + #define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \ m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar; @@ -192,6 +206,8 @@ class OPENVINO_RUNTIME_API PluginConfig { void finalize(std::shared_ptr context, const ov::RTMap& rt_info); + bool visit_attributes(ov::AttributeVisitor& visitor) const; + protected: virtual void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) {} virtual void apply_debug_options(std::shared_ptr context); diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index ca1c87cce1b659..b21547f40a57df 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -67,7 +67,7 @@ ov::Any PluginConfig::get_property(const std::string& name, const std::vector allowed_visibility = {OptionVisibility::RELEASE}; + const static std::vector allowed_visibility = {OptionVisibility::RELEASE,OptionVisibility::RELEASE_INTERNAL, OptionVisibility::DEBUG}; const bool throw_on_error = true; set_property(config, allowed_visibility, throw_on_error); } @@ -121,6 +121,18 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R m_is_finalized = true; } +bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) const { + // for (const auto& prop : m_user_properties) { + // visitor.on_attribute(prop.first + "__user", prop.second.as()); + // } + // for (const auto& prop : m_options_map) { + // visitor.on_attribute(prop.first + "__internal", prop.second->get_any().as()); + // } + // visitor.on_attribute("is_finalized", m_is_finalized); + + return true; +} + void PluginConfig::apply_debug_options(std::shared_ptr context) { static std::vector allowed_visibility = { OptionVisibility::RELEASE, diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index e7246662b06500..0ca7f616f8790b 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -22,7 +22,10 @@ struct ExecutionConfig : public ov::PluginConfig { ExecutionConfig(const ExecutionConfig& other); ExecutionConfig& operator=(const ExecutionConfig& other); - #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + void finalize(cldnn::engine& engine); + using ov::PluginConfig::finalize; + + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_GETTERS(__VA_ARGS__) #include "intel_gpu/runtime/options.inl" #undef OV_CONFIG_OPTION @@ -36,6 +39,10 @@ struct ExecutionConfig : public ov::PluginConfig { void apply_execution_hints(const cldnn::device_info& info); void apply_performance_hints(const cldnn::device_info& info); void apply_priority_hints(const cldnn::device_info& info); + + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" + #undef OV_CONFIG_OPTION }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp index 38b0795ceddcc3..fb1ef48df4b82c 100644 --- a/src/plugins/intel_gpu/src/graph/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp @@ -149,7 +149,7 @@ void broadcast_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index 7d373d1e6c3a92..7f091b7b7a8a28 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -278,7 +278,7 @@ void crop_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout()); diff --git a/src/plugins/intel_gpu/src/graph/eltwise.cpp b/src/plugins/intel_gpu/src/graph/eltwise.cpp index 7a3b7b6b5b93ec..83d9dbb260e40e 100644 --- a/src/plugins/intel_gpu/src/graph/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/eltwise.cpp @@ -393,7 +393,7 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) : ""); } } else { - bool use_new_shape_infer = network.get_config().m_allow_new_shape_infer; + bool use_new_shape_infer = network.get_config().get_allow_new_shape_infer(); auto input0_pshape = node.get_input_pshape(0); for (size_t i = 1; i < inputs_count; ++i) { diff --git a/src/plugins/intel_gpu/src/graph/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/fully_connected.cpp index dc6ce73e8c585d..40478cfe017b23 100644 --- a/src/plugins/intel_gpu/src/graph/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/fully_connected.cpp @@ -250,7 +250,7 @@ kernel_impl_params fully_connected_inst::get_fake_aligned_params(kernel_impl_par } } - GPU_DEBUG_IF(orig_impl_param.get_program().get_config().m_disable_fake_alignment) { + GPU_DEBUG_IF(orig_impl_param.get_program().get_config().get_disable_fake_alignment()) { can_apply_fake_alignment = false; } diff --git a/src/plugins/intel_gpu/src/graph/gather.cpp b/src/plugins/intel_gpu/src/graph/gather.cpp index 549850560e903d..2e58e49ad3f207 100644 --- a/src/plugins/intel_gpu/src/graph/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/gather.cpp @@ -150,7 +150,7 @@ void gather_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp index 3e0e8aa2f61bf5..3bb6118a4fa565 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp @@ -161,7 +161,7 @@ bool add_required_reorders::test_format(cldnn::program_node& node, format reques } void add_required_reorders::run(program& p) { - bool optimize_data = p.get_config().m_optimize_data; + bool optimize_data = p.get_config().get_optimize_data(); auto usr_itr = p.get_processing_order().begin(); while (usr_itr != p.get_processing_order().end()) { auto& usr = *usr_itr++; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp index 999e103c3fe200..ef4300c33bfea1 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp @@ -11,7 +11,7 @@ using namespace cldnn; void build_implementations::run(program& p) { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "pass::build_implementations"); - if (p.get_config().m_partial_build_program) { + if (p.get_config().get_partial_build_program()) { return; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp index 692f767926520c..7e562582fdcc74 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp @@ -17,7 +17,7 @@ using namespace cldnn; namespace cldnn { void graph_initializations::set_outputs(program& p) { - auto custom_outputs = p.get_config().m_custom_outputs.value; + auto custom_outputs = p.get_config().get_custom_outputs(); if (!custom_outputs.empty()) { for (auto const& output : custom_outputs) { OPENVINO_ASSERT(p.has_node(output), "not found custom output node in current cldnn::program: ", output); @@ -37,7 +37,7 @@ void graph_initializations::set_outputs(program& p) { void graph_initializations::run(program& p) { set_outputs(p); - auto forcing_map = p.get_config().m_force_implementations.value; + auto forcing_map = p.get_config().get_force_implementations(); for (auto& kv : forcing_map) { if (p.has_node(kv.first)) { p.get_node(kv.first).set_forced_impl_type(kv.second.impl_type); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp index 3efc2ba341596a..2c361c6335069c 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" #include "pass_manager.h" #include "program_node.h" #include "intel_gpu/runtime/engine.hpp" @@ -143,8 +144,9 @@ propagate_constants::calculate(engine& engine, return {}; ExecutionConfig cf_config = config; - cf_config.m_optimize_data = false; - cf_config.m_custom_outputs = const_outputs; + cf_config.set_property(ov::intel_gpu::optimize_data(false)); + cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs)); + cf_config.finalize(engine); network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true); std::map, std::shared_ptr>> weightless_cache_map; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp index ac7714d1b60542..107a943ada7724 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp @@ -74,7 +74,7 @@ void select_preferred_formats::run(program& p) { } #endif // ENABLE_ONEDNN_FOR_GPU - auto forcing_map = p.get_config().m_force_implementations.value; + auto forcing_map = p.get_config().get_force_implementations(); for (auto n : p.get_processing_order()) { n->recalc_output_layout(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index 915a0ce6167c49..d77ac2098e16d8 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -203,7 +203,7 @@ struct fully_connected_impl : typed_primitive_impl_ocl { params.quantization = kernel_selector::QuantizationType::NONE; } - params.dynamic_quantization_group_size = impl_param.get_program().get_config().m_dynamic_quantization_group_size; + params.dynamic_quantization_group_size = impl_param.get_program().get_config().get_dynamic_quantization_group_size(); return params; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp index 1c47853dac82d5..c5628d70a0450d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp @@ -1164,13 +1164,13 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p params.engineInfo.ip_version = device_info.ip_version; params.engineInfo.arch = kernel_selector::gpu_arch(static_cast::type>(device_info.arch)); - auto impl_forcing = config.m_force_implementations.value; + auto impl_forcing = config.get_force_implementations(); if (impl_forcing.count(param_info.desc->id) != 0) { params.forceImplementation = impl_forcing.at(param_info.desc->id).kernel_name; } - params.allowStaticInputReordering = config.m_optimize_data || config.m_allow_static_input_reorder; + params.allowStaticInputReordering = config.get_optimize_data() || config.get_allow_static_input_reorder(); params.allowInputReordering = false; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp index 18ea23e5843223..423e879936cf0d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp @@ -111,7 +111,7 @@ namespace cldnn { std::mutex kernels_cache::_mutex; std::string kernels_cache::get_cache_path() const { - auto path = _config.m_cache_dir.value; + auto path = _config.get_cache_dir(); if (path.empty()) { return {}; } @@ -123,12 +123,12 @@ std::string kernels_cache::get_cache_path() const { } bool kernels_cache::is_cache_enabled() const { - if (!_config.m_allow_new_shape_infer && - (_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SPEED)) { + if (!_config.get_allow_new_shape_infer() && + (_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SPEED)) { return false; } - return !_config.m_cache_dir.value.empty(); + return !_config.get_cache_dir().empty(); } size_t kernels_cache::get_max_kernels_per_batch() const { @@ -136,7 +136,7 @@ size_t kernels_cache::get_max_kernels_per_batch() const { GPU_DEBUG_IF(debug_config->max_kernels_per_batch >= 1) { return static_cast(debug_config->max_kernels_per_batch); } - return _config.m_max_kernels_per_batch; + return _config.get_max_kernels_per_batch(); } void kernels_cache::get_program_source(const kernels_code& kernels_source_code, std::vector* all_batches) const { diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 01545a0305afdd..9a463f6f98291e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -47,7 +47,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _engine(&engine), _attrs(attrs), _pd(pd) { - _enable_profiling = config.m_enable_profiling; + _enable_profiling = config.get_enable_profiling(); _scratchpad_md = _pd.scratchpad_desc(); @@ -70,7 +70,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _engine(&engine), _pd(), _prim() { - _enable_profiling = config.m_enable_profiling; + _enable_profiling = config.get_enable_profiling(); GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { _enable_profiling = true; @@ -318,7 +318,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { private: std::string get_cache_directory(const ExecutionConfig& config) const { - auto path = config.m_cache_dir.value; + auto path = config.get_cache_dir(); if (path.empty()) { return {}; } @@ -343,7 +343,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { void build_primitive(const ExecutionConfig& config) { auto cache_outpath = get_cache_directory(config); - if (!config.m_allow_new_shape_infer) { + if (!config.get_allow_new_shape_infer()) { cache_outpath = ""; } diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp index b135d9af73f31f..0ce180380f14b5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp @@ -58,7 +58,7 @@ std::unique_ptr ImplementationManager::create(const program_node if (auto impl = create_impl(node, params)) { update_impl(*impl, params); impl->set_node_params(node); - impl->can_share_kernels = node.get_program().get_config().m_enable_kernels_reuse; + impl->can_share_kernels = node.get_program().get_config().get_enable_kernels_reuse(); return impl; } diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp index 4f6f7dc12868c1..535ac540c1ffff 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp @@ -60,7 +60,7 @@ const std::vector>& Registry(scores_layout.get_partial_shape()[0].get_length()); const size_t kClassNum = static_cast(scores_layout.get_partial_shape()[1].get_length()); const size_t kNStreams = - static_cast(node.get_program().get_config().m_num_streams.value); + static_cast(node.get_program().get_config().get_num_streams()); const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast(8)) * kNStreams; return kKeyValue > 64; } diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index a2cacf9724f33d..bb9271e761bef0 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -435,7 +435,7 @@ bool should_use_winograd_2x3_s1(const convolution_node& node, layout const& input_layout, layout const& weights_layout, bool output_size_handling_enabled) { - bool disable_winograd_conv = node.get_program().get_config().m_disable_winograd_convolution; + bool disable_winograd_conv = node.get_program().get_config().get_disable_winograd_convolution(); if (disable_winograd_conv) return false; diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 7547fa5952b800..0ed7ce4d2dadd8 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -180,9 +180,9 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo , _memory_pool(new memory_pool(program->get_engine())) , _internal(is_internal) , _is_primary_stream(is_primary_stream) - , _enable_profiling(program->get_config().m_enable_profiling) + , _enable_profiling(program->get_config().get_enable_profiling()) , _reset_arguments(true) - , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().m_buffers_preallocation_ratio)) { + , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_buffers_preallocation_ratio())) { if (!_internal) { net_id = get_unique_net_id(); } @@ -364,7 +364,7 @@ void network::calculate_weights_cache_capacity() { } // Sum all weights constants for each stream - required_mem_size += weights_const_size * _config.m_num_streams.value; + required_mem_size += weights_const_size * _config.get_num_streams(); // Add all other constants (shared between streams) required_mem_size += total_const_size - weights_const_size; diff --git a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp index 94a85cfbace47a..f3788e34362604 100644 --- a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp @@ -157,7 +157,7 @@ void non_max_suppression_gather_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[i]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[i].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[i] = {_network.get_engine().reinterpret_buffer(input_memory(i), _impl_params->get_output_layout(i))}; diff --git a/src/plugins/intel_gpu/src/graph/permute.cpp b/src/plugins/intel_gpu/src/graph/permute.cpp index c01cb15e7bba62..00a0b8e2a2881c 100644 --- a/src/plugins/intel_gpu/src/graph/permute.cpp +++ b/src/plugins/intel_gpu/src/graph/permute.cpp @@ -146,7 +146,7 @@ void permute_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 0016b026558778..796442fe5d7ad5 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -163,7 +163,7 @@ static memory::ptr get_memory_from_pool(engine& _engine, OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate output for dynamic layout without upper bound"); // Use layout with max tensor for dynamic shape with upper bound - if (_node.get_program().get_config().m_enable_memory_pool) { + if (_node.get_program().get_config().get_enable_memory_pool()) { if (curr_memory != nullptr) pool.release_memory(curr_memory, _node.get_unique_id(), _node.id(), net_id); return pool.get_memory(layout, @@ -2029,7 +2029,7 @@ primitive_inst::primitive_inst(network & network, program_node const& node, bool : _network(network) , _node(&node) , _node_output_layout(node.get_output_layout()) - , _use_shared_kernels(node.get_program().get_config().m_enable_kernels_reuse) + , _use_shared_kernels(node.get_program().get_config().get_enable_kernels_reuse()) , _impl_params(node.get_kernel_impl_params()) , _impl(node.get_selected_impl() ? node.get_selected_impl()->clone() : nullptr) , _runtime_memory_dependencies(node.get_memory_dependencies()) @@ -2577,8 +2577,8 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() { ExecutionConfig subgraph_config{ ov::intel_gpu::allow_static_input_reorder(true), ov::intel_gpu::allow_new_shape_infer(true), - ov::enable_profiling(get_network().get_config().m_enable_profiling), - ov::intel_gpu::use_onednn(get_network().get_config().m_use_onednn) + ov::enable_profiling(get_network().get_config().get_enable_profiling()), + ov::intel_gpu::use_onednn(get_network().get_config().get_use_onednn()) }; auto prog = program::build_program(get_network().get_engine(), t, diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 6ec1b9156bf266..12db38bd067599 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -108,8 +108,8 @@ using namespace cldnn; using namespace ov::intel_gpu; static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags, int num_streams = 0) { - int streams = (num_streams > 0) ? num_streams : config.m_compilation_num_threads.value; - auto priority = config.m_host_task_priority; + int streams = (num_streams > 0) ? num_streams : config.get_compilation_num_threads(); + auto priority = config.get_host_task_priority(); auto core_type = ov::hint::SchedulingCoreType::ANY_CORE; switch (priority) { case ov::hint::Priority::LOW: core_type = ov::hint::SchedulingCoreType::ECORE_ONLY; break; @@ -117,7 +117,7 @@ static ov::threading::IStreamsExecutor::Config make_task_executor_config(const E case ov::hint::Priority::HIGH: core_type = ov::hint::SchedulingCoreType::PCORE_ONLY; break; default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority); } - bool enable_cpu_pinning = config.m_enable_cpu_pinning; + bool enable_cpu_pinning = config.get_enable_cpu_pinning(); ov::threading::IStreamsExecutor::Config task_executor_config(tags, streams, @@ -162,8 +162,7 @@ program::program(engine& engine_ref, program_node::reset_unique_id(); if (no_optimizations) { init_graph(); - auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); - _config.finalize(ctx, {}); + _config.finalize(_engine); } else { build_program(is_internal); if (_is_body_program) { @@ -199,8 +198,7 @@ program::program(engine& engine_ref, _task_executor(std::move(task_executor)), processing_order(), is_internal(is_internal) { - auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); - _config.finalize(ctx, {}); + _config.finalize(_engine); init_primitives(); init_program(); prepare_nodes(nodes); @@ -213,9 +211,8 @@ program::program(engine& engine, const ExecutionConfig& config) _config(config), processing_order() { init_primitives(); - auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); - _config.finalize(ctx, {}); - new_shape_infer = _config.m_allow_new_shape_infer; + _config.finalize(_engine); + new_shape_infer = _config.get_allow_new_shape_infer(); _layout_optimizer = cldnn::make_unique(); } @@ -227,14 +224,14 @@ void program::init_program() { set_options(); pm = std::unique_ptr(new pass_manager(*this)); - new_shape_infer = _config.m_allow_new_shape_infer; + new_shape_infer = _config.get_allow_new_shape_infer(); if (_task_executor == nullptr) _task_executor = program::make_task_executor(_config); _kernels_cache = std::unique_ptr(new kernels_cache(_engine, _config, prog_id, _task_executor, kernel_selector::KernelBase::get_db().get_batch_headers())); - _kernels_cache->set_kernels_reuse(get_config().m_enable_kernels_reuse); + _kernels_cache->set_kernels_reuse(_config.get_enable_kernels_reuse()); if (!_compilation_context) _compilation_context = program::make_compilation_context(_config); @@ -488,15 +485,11 @@ void program::set_options() { static std::atomic id_gen{0}; prog_id = ++id_gen; assert(prog_id != 0); - if (!_config.m_force_implementations.value.empty()) { - _config.m_optimize_data = true; - } } void program::build_program(bool is_internal) { init_graph(); - auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); - _config.finalize(ctx, {}); + _config.finalize(_engine); { pre_optimize_graph(is_internal); } run_graph_compilation(); { post_optimize_graph(is_internal); } @@ -526,9 +519,6 @@ void program::init_graph() { for (auto& node : processing_order) { if (!node->is_type()) node->get_output_layouts(); - if (node->is_type()) { - _config.m_use_onednn = true; - } } // Perform initial shape_of subgraphs markup apply_opt_pass(); @@ -546,7 +536,7 @@ void program::pre_optimize_graph(bool is_internal) { bool output_size_handling_enabled = analyze_output_size_handling_need(); - bool optimize_data = _config.m_optimize_data; + bool optimize_data = _config.get_optimize_data(); if (optimize_data) { apply_opt_pass(); } @@ -623,7 +613,7 @@ void program::post_optimize_graph(bool is_internal) { reorder_factory rf; - bool optimize_data = _config.m_optimize_data; + bool optimize_data = _config.get_optimize_data(); if (!is_internal) { apply_opt_pass(rf); @@ -631,7 +621,7 @@ void program::post_optimize_graph(bool is_internal) { apply_opt_pass(false, true); // TODO: do we need it at this place also? - auto partial_build = _config.m_partial_build_program; + auto partial_build = _config.get_partial_build_program(); #ifdef GPU_DEBUG_CONFIG GPU_DEBUG_GET_INSTANCE(debug_config); if (!is_internal && (!partial_build || !debug_config->dry_run_path.empty())) { @@ -650,7 +640,7 @@ void program::post_optimize_graph(bool is_internal) { // Recalculate processing order after all graph transformation to keep optimal primitives ordering // for OOO queue - if (_config.m_queue_type == QueueTypes::out_of_order) + if (_config.get_queue_type() == QueueTypes::out_of_order) get_processing_order().calculate_BFS_processing_order(); } @@ -772,7 +762,7 @@ const std::vector& program::get_allocating_order(bool forced_updat } void program::prepare_memory_dependencies() { - if (!_config.m_enable_memory_pool) + if (!_config.get_enable_memory_pool()) return; for (auto& node : get_processing_order()) { node->add_memory_dependency(node->get_unique_id()); @@ -1383,7 +1373,7 @@ program::primitives_info program::get_current_stage_info() const { void program::save_pass_info(std::string pass_name) { // TODO: Directory path here can be probably changed to some bool flag - if (!_config.m_dump_graphs.value.empty()) + if (!_config.get_dump_graphs().empty()) optimizer_passes_info.emplace_back(pass_name, get_current_stage_info()); } @@ -1411,7 +1401,7 @@ const program::primitives_info& program::get_primitives_info() const { return pr void program::apply_opt_pass(base_pass& pass) { pm->run(*this, pass); } void program::set_layout_optimizer_attributes(layout_optimizer& lo) { - lo.set_implementation_forcing(_config.m_force_implementations); + lo.set_implementation_forcing(_config.get_force_implementations()); // first pass to set layout optimization_attributes for topology @@ -1635,15 +1625,15 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 1); #ifdef ENABLE_ONEDNN_FOR_GPU - bool enable_onednn_for_tests = get_config().m_optimize_data || is_internal_program(); + bool enable_onednn_for_tests = get_config().get_optimize_data() || is_internal_program(); auto& engine = get_engine(); if (engine.get_device_info().vendor_id == INTEL_VENDOR_ID && - get_config().m_queue_type == QueueTypes::in_order && + get_config().get_queue_type() == QueueTypes::in_order && enable_onednn_for_tests) { if (engine.get_device_info().supports_immad) { lo.add_all_onednn_impls_optimization_attribute(); } else { - if (get_config().m_use_onednn) { + if (get_config().get_use_onednn()) { lo.enable_onednn_for(); } } @@ -1851,8 +1841,8 @@ void program::load(cldnn::BinaryInputBuffer& ib) { init_program(); std::shared_ptr mapped_memory = nullptr; - std::string weights_path = _config.m_weights_path; - if (_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE && + std::string weights_path = _config.get_weights_path(); + if (_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE && ov::util::validate_weights_path(weights_path)) { mapped_memory = ov::load_mmap_object(weights_path); } diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp index fffdabd68b5779..0989e82cc0ff47 100644 --- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp @@ -157,7 +157,7 @@ void dump_full_node(std::ofstream& out, const program_node* node) { } // namespace std::string get_dir_path(const ExecutionConfig& config) { - auto path = config.m_dump_graphs.value; + auto path = config.get_dump_graphs(); if (path.empty()) { return {}; } diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index 8041a91656117b..bfdc287852ae05 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -287,7 +287,7 @@ void reorder_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index f5468fde71b557..bc1921127efd1f 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -320,7 +320,7 @@ void reshape_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp index 244099c7736e2f..2af34fe0245443 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp @@ -75,7 +75,7 @@ void scatter_elements_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp index 510b6be55bbdb1..0ea852614b291c 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp @@ -86,7 +86,7 @@ void scatter_nd_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_update.cpp index 78bf350331093e..ee61478dc3c8af 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_update.cpp @@ -66,7 +66,7 @@ void scatter_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/select.cpp b/src/plugins/intel_gpu/src/graph/select.cpp index 159398ecc494a3..9a6b845a874d10 100644 --- a/src/plugins/intel_gpu/src/graph/select.cpp +++ b/src/plugins/intel_gpu/src/graph/select.cpp @@ -95,7 +95,7 @@ select_inst::typed_primitive_inst(network& network, select_node const& node) : p 3, ""); - bool allow_new_shape_infer = network.get_program()->get_config().m_allow_new_shape_infer; + bool allow_new_shape_infer = network.get_program()->get_config().get_allow_new_shape_infer(); // Broadcast check is performed in ngraph shape infer of select when allow_new_shape_infer=true if (!allow_new_shape_infer) { if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NONE) { diff --git a/src/plugins/intel_gpu/src/graph/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/strided_slice.cpp index f8e943e380033a..ae962ff7e1e369 100644 --- a/src/plugins/intel_gpu/src/graph/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/strided_slice.cpp @@ -208,7 +208,7 @@ void strided_slice_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 83153f31f976fa..279035f27d776f 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -21,20 +21,20 @@ namespace intel_gpu { namespace { std::shared_ptr create_task_executor(const std::shared_ptr& plugin, const ExecutionConfig& config) { - if (config.m_exclusive_async_requests) { + if (config.get_exclusive_async_requests()) { // exclusive_async_requests essentially disables the streams (and hence should be checked first) => aligned with // the CPU behavior return plugin->get_executor_manager()->get_executor("GPU"); - } else if (config.m_enable_cpu_pinning) { + } else if (config.get_enable_cpu_pinning()) { return std::make_shared( ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", - config.m_num_streams.value, + config.get_num_streams(), 1, ov::hint::SchedulingCoreType::PCORE_ONLY, true}); } else { return std::make_shared( - ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", config.m_num_streams.value}); + ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", config.get_num_streams()}); } } } // namespace @@ -53,7 +53,7 @@ CompiledModel::CompiledModel(std::shared_ptr model, m_outputs(ov::ICompiledModel::outputs()), m_loaded_from_cache(false) { auto graph_base = std::make_shared(model, m_context, m_config, 0); - for (uint16_t n = 0; n < m_config.m_num_streams.value; n++) { + for (uint16_t n = 0; n < m_config.get_num_streams(); n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -148,7 +148,7 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, } auto graph_base = std::make_shared(ib, context, m_config, 0); - for (uint16_t n = 0; n < m_config.m_num_streams.value; n++) { + for (uint16_t n = 0; n < m_config.get_num_streams(); n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -170,8 +170,8 @@ std::shared_ptr CompiledModel::create_infer_request() co void CompiledModel::export_model(std::ostream& model) const { // If ov::CacheMode::OPTIMIZE_SIZE is set, do the export iff it's possible to do weightless caching // which requires the weights_path. - ov::CacheMode cache_mode = m_config.m_cache_mode; - std::string weights_path = m_config.m_weights_path; + ov::CacheMode cache_mode = m_config.get_cache_mode(); + std::string weights_path = m_config.get_weights_path(); if (cache_mode == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) return; @@ -179,7 +179,7 @@ void CompiledModel::export_model(std::ostream& model) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model"); OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded"); - const ov::EncryptionCallbacks encryption_callbacks = m_config.m_cache_encryption_callbacks; + const ov::EncryptionCallbacks encryption_callbacks = m_config.get_cache_encryption_callbacks(); // Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty. const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; @@ -280,8 +280,8 @@ ov::Any CompiledModel::get_property(const std::string& name) const { } else if (name == ov::loaded_from_cache) { return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache}; } else if (name == ov::optimal_number_of_infer_requests) { - unsigned int nr = m_config.m_num_streams.value; - if (m_config.m_performance_mode != ov::hint::PerformanceMode::LATENCY) + unsigned int nr = m_config.get_num_streams(); + if (m_config.get_performance_mode() != ov::hint::PerformanceMode::LATENCY) nr *= 2; return decltype(ov::optimal_number_of_infer_requests)::value_type {nr}; } else if (name == ov::execution_devices) { diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 0485f1cc712b5a..2c595b419fbaa4 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -86,11 +86,7 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context ib >> perfEntry.parentPrimitive; } } - { - ib >> m_config.m_partial_build_program.value; - ib >> m_config.m_optimize_data.value; - ib >> m_config.m_allow_new_shape_infer.value; - } + // ib >> m_config; auto imported_prog = std::make_shared(get_engine(), m_config); imported_prog->load(ib); @@ -174,7 +170,7 @@ void Graph::build(std::shared_ptr program) { auto external_queue = m_context->get_external_queue(); if (external_queue) { - OPENVINO_ASSERT(m_config.m_num_streams == 1, "[GPU] Throughput streams can't be used with shared queue!"); + OPENVINO_ASSERT(m_config.get_num_streams() == 1, "[GPU] Throughput streams can't be used with shared queue!"); const auto &engine = program->get_engine(); m_network = std::make_shared(program, engine.create_stream(m_config, external_queue), m_stream_id); } else { @@ -206,7 +202,7 @@ bool Graph::use_external_queue() const { std::shared_ptr Graph::get_runtime_model(std::vector& primitives_info, bool filter_const_primitives) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::get_runtime_model"); - if (m_config.m_enable_profiling) { + if (m_config.get_enable_profiling()) { try { // Update may throw an exception for step-by-step runtime graph dump, // since network->get_executed_primitives() method can't be called before network execution @@ -517,11 +513,7 @@ void Graph::export_model(cldnn::BinaryOutputBuffer &ob) { ob << perf_item.second.second.parentPrimitive; } } - { - ob << m_config.m_partial_build_program.value; - ob << m_config.m_optimize_data.value; - ob << m_config.m_allow_new_shape_infer.value; - } + // ob << m_config; ob.set_stream(m_network->get_stream_ptr().get()); m_network->get_program()->save(ob); diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index bc298c5c816d71..617b92cb0d7ebe 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -1,6 +1,7 @@ // Copyright (C) 2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/op/if.hpp" #include "intel_gpu/plugin/program_builder.hpp" #include "intel_gpu/primitives/condition.hpp" @@ -22,13 +23,9 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ << ", num inputs: " << op->get_input_size() << std::endl; auto config = p.get_config(); - { - auto custom_outputs = config.m_custom_outputs.value; - if (!custom_outputs.empty()) { - config.m_custom_outputs = std::vector({}); - } - } - config.m_allow_new_shape_infer = op->is_dynamic() || p.use_new_shape_infer(); + config.set_property(ov::intel_gpu::custom_outputs(std::vector({}))); + config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer())); + config.finalize(p.get_engine()); ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true); branch.inner_program = prog.get_compiled_program(); diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index 81e57e148f0b93..c9804f9e75f84f 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -299,8 +299,9 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr& ProgramBuilder prog(ctx->get_engine(), config); - float query_model_ratio = config.m_query_model_ratio; + float query_model_ratio = config.get_query_model_ratio(); auto supported = ov::get_supported_nodes(model, [&config,&ctx,this](std::shared_ptr& model) { @@ -330,8 +330,8 @@ std::shared_ptr Plugin::import_model(std::istream& model, config.set_property(_orig_config); config.finalize(context_impl, {}); - ov::CacheMode cache_mode = config.m_cache_mode; - ov::EncryptionCallbacks encryption_callbacks = config.m_cache_encryption_callbacks; + ov::CacheMode cache_mode = config.get_cache_mode(); + ov::EncryptionCallbacks encryption_callbacks = config.get_cache_encryption_callbacks(); const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; std::unique_ptr ib_ptr = @@ -348,8 +348,8 @@ std::shared_ptr Plugin::import_model(std::istream& model, return nullptr; } - std::string weights_path = config.m_weights_path; - if (config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) { + std::string weights_path = config.get_weights_path(); + if (config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) { return nullptr; } @@ -648,7 +648,7 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { auto context = get_default_contexts().at(device_id); const auto& device_info = context->get_engine().get_device_info(); const auto& config = m_configs_map.at(device_id); - uint32_t n_streams = static_cast(config.m_num_streams.value); + uint32_t n_streams = static_cast(config.get_num_streams()); uint64_t occupied_device_mem = 0; auto statistic_result = get_metric(ov::intel_gpu::memory_statistics.name(), options).as>(); auto occupied_usm_dev = statistic_result.find("usm_device_current"); diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index ef28ba584b734d..5f92cd8c306cb4 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/split.hpp" @@ -106,19 +107,10 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& config_path += "/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml"; CustomLayer::LoadFromFile(config_path, m_custom_layers, true); - auto custom_layers_config = m_config.m_config_file.value; + auto custom_layers_config = m_config.get_config_file(); CustomLayer::LoadFromFile(custom_layers_config, m_custom_layers, custom_layers_config.empty()); auto ops = model->get_ordered_ops(); - // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, - // smaller # of kernels are built compared to static models. - // So having smaller batch size is even better for dynamic model as we can do more parallel build. - if (model->is_dynamic()) { - m_config.m_max_kernels_per_batch = 4; - } else { - m_config.m_max_kernels_per_batch = 8; - } - m_program = build(ops, partial_build, is_inner_program); } @@ -159,14 +151,33 @@ std::shared_ptr ProgramBuilder::build(const std::vectoris_dynamic()) { + is_dynamic = true; + break; + } + } if (is_inner_program) { - allow_new_shape_infer = (m_config.m_allow_new_shape_infer || allow_new_shape_infer); + allow_new_shape_infer = (m_config.get_allow_new_shape_infer() || allow_new_shape_infer); + } + + // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, + // smaller # of kernels are built compared to static models. + // So having smaller batch size is even better for dynamic model as we can do more parallel build. + if (is_dynamic) { + m_config.set_property(ov::intel_gpu::max_kernels_per_batch(4));; + } else { + m_config.set_property(ov::intel_gpu::max_kernels_per_batch(8));; } - m_config.m_partial_build_program = partial_build; - m_config.m_optimize_data = true; - m_config.m_allow_new_shape_infer = allow_new_shape_infer; + m_config.set_property(ov::intel_gpu::partial_build_program(partial_build)); + m_config.set_property(ov::intel_gpu::optimize_data(true)); + m_config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer)); + //if (has_lstm) + m_config.set_property(ov::intel_gpu::use_onednn(true)); + m_config.finalize(m_engine); prepare_build(); { @@ -310,7 +321,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrorigin_op_name = op.get_friendly_name(); prim->origin_op_type_name = op.get_type_name(); - if (this->m_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE) { + if (this->m_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) { if (auto data_prim = dynamic_cast(prim.get())) { auto rt_info = op.get_rt_info(); @@ -341,7 +352,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrorigin_op_type_name = prim->type_string(); } - if (this->m_config.m_enable_profiling && should_profile) { + if (this->m_config.get_enable_profiling() && should_profile) { profiling_ids.push_back(prim_id); init_profile_info(*prim); } diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 0133bcf7fcfaa6..69e4a041e77f65 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -114,8 +114,8 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr& c : ov::ISyncInferRequest(compiled_model) , m_graph(compiled_model->get_graph(0)) , m_context(std::static_pointer_cast(compiled_model->get_context_impl())) - , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().m_buffers_preallocation_ratio)) - , m_enable_profiling(m_graph->get_config().m_enable_profiling) + , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_buffers_preallocation_ratio())) + , m_enable_profiling(m_graph->get_config().get_enable_profiling()) , m_use_external_queue(m_graph->use_external_queue()) { GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) { @@ -415,7 +415,7 @@ void SyncInferRequest::wait() { auto mem_shape = output_layout.get_shape(); // In case of old shape infer we need to shrink out tensor shape to avoid redudnant dimensions that occur due to rank extension // For new shape infer this shouldn't happen, thus remove that WA once we migrate to ngraph-based shape infer for all cases - if (!m_graph->get_config().m_allow_new_shape_infer) { + if (!m_graph->get_config().get_allow_new_shape_infer()) { OPENVINO_ASSERT(port.get_partial_shape().is_static(), "[GPU] Unexpected dynamic shape for legacy shape inference"); OPENVINO_ASSERT(ov::shape_size(port.get_shape()) == ov::shape_size(mem_shape), "[GPU] Unexpected elements count for output tensor"); mem_shape = port.get_shape(); @@ -888,7 +888,7 @@ std::vector SyncInferRequest::prepare_input(const std::string auto memory = device_tensor->get_memory(); // WA to extend shape to ranks expected by legacy shape infer. Remove after full migration to new shape infer - if (!m_graph->get_config().m_allow_new_shape_infer) { + if (!m_graph->get_config().get_allow_new_shape_infer()) { auto new_layout = memory->get_layout(); new_layout.set_partial_shape(m_graph->get_input_layouts().at(input_idx).get_shape()); memory = engine.reinterpret_buffer(*memory, new_layout); diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 2ea04290c356e8..67f87fabd6a5dc 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -282,7 +282,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { const auto& defaultPrecisions = ov::pass::low_precision::precision_set::get_int8_support(); const ov::element::TypeVector supported_woq_types = {ov::element::u8, ov::element::i8, ov::element::u4, ov::element::i4}; bool enableInt8; - bool unroll_loop = config.m_enable_loop_unrolling; + bool unroll_loop = config.get_enable_loop_unrolling(); { ov::pass::Manager manager("Plugin:GPU"); auto pass_config = manager.get_pass_config(); @@ -295,7 +295,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); - enableInt8 = config.m_enable_lp_transformations && is_model_quantized; + enableInt8 = config.get_enable_lp_transformations() && is_model_quantized; manager.register_pass( std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }, @@ -328,7 +328,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { }; // Add conversion from FP data types to infer precision if it's specified - auto infer_precision = config.m_inference_precision.value; + auto infer_precision = config.get_inference_precision(); if (infer_precision != ov::element::undefined) { if (!fp_precision_supported(infer_precision)) infer_precision = fallback_precision; @@ -409,7 +409,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { GPU_DEBUG_CODE(return cldnn::debug_configuration::get_instance()->enable_sdpa == 1); } - if (!config.m_enable_sdpa_optimization) + if (!config.get_enable_sdpa_optimization()) return false; auto sdpa = std::dynamic_pointer_cast(node); @@ -946,7 +946,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); manager.register_pass(device_info.supports_immad); - manager.register_pass(config.m_activations_scale_factor); + manager.register_pass(config.get_activations_scale_factor()); if (!device_info.supports_immad) { manager.register_pass(); @@ -956,7 +956,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - auto kv_cache_compression_dt = config.m_kv_cache_precision; + auto kv_cache_compression_dt = config.get_kv_cache_precision(); manager.register_pass(kv_cache_compression_dt); manager.register_pass(); @@ -981,7 +981,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); if (device_info.supports_immad) { - auto dynamic_quantization_group_size = config.m_dynamic_quantization_group_size; + auto dynamic_quantization_group_size = config.get_dynamic_quantization_group_size(); pass_config->set_callback([=](const_node_ptr& root) -> bool { if (root->get_input_node_shared_ptr(0)->get_element_type() == ov::element::Type_t::f32) { GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: input type is not supported" << std::endl; diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 80337c8fcd4a5f..79b8ab9d564a9e 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -22,7 +22,7 @@ ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() { m_user_properties = other.m_user_properties; - m_is_finalized = other.m_is_finalized; + m_is_finalized = false; // copy is not automatically finalized for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } @@ -30,13 +30,18 @@ ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) { m_user_properties = other.m_user_properties; - m_is_finalized = other.m_is_finalized; + m_is_finalized = false; // copy is not automatically finalized for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } return *this; } +void ExecutionConfig::finalize(cldnn::engine& engine) { + auto ctx = std::make_shared("GPU", std::vector{engine.get_device()}); + PluginConfig::finalize(ctx, {}); +} + void ExecutionConfig::apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) { const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); if (!info.supports_immad) { @@ -80,6 +85,10 @@ void ExecutionConfig::finalize_impl(std::shared_ptr context) { if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { m_dynamic_quantization_group_size = 32; } + + if (!get_force_implementations().empty()) { + m_optimize_data = true; + } } void ExecutionConfig::apply_hints(const cldnn::device_info& info) { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp index e1046a1828c342..6eb5855c29f9b5 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp @@ -64,7 +64,7 @@ void ocl_engine::create_onednn_engine(const ExecutionConfig& config) { auto casted = std::dynamic_pointer_cast(_device); OPENVINO_ASSERT(casted, "[GPU] Invalid device type stored in ocl_engine"); - std::string cache_dir = config.m_cache_dir; + std::string cache_dir = config.get_cache_dir(); if (cache_dir.empty()) { _onednn_engine = std::make_shared(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get())); } else { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp index 61844cd640ea41..bc01a8174292e4 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp @@ -189,22 +189,22 @@ void set_arguments_impl(ocl_kernel_type& kernel, } // namespace ocl_stream::ocl_stream(const ocl_engine &engine, const ExecutionConfig& config) - : stream(config.m_queue_type, stream::get_expected_sync_method(config)) + : stream(config.get_queue_type(), stream::get_expected_sync_method(config)) , _engine(engine) { auto context = engine.get_cl_context(); auto device = engine.get_cl_device(); ocl::command_queues_builder queue_builder; - queue_builder.set_profiling(config.m_enable_profiling); + queue_builder.set_profiling(config.get_enable_profiling()); queue_builder.set_out_of_order(m_queue_type == QueueTypes::out_of_order); OPENVINO_ASSERT(m_sync_method != SyncMethods::none || m_queue_type == QueueTypes::in_order, "[GPU] Unexpected sync method (none) is specified for out_of_order queue"); bool priorty_extensions = engine.extension_supported("cl_khr_priority_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_priority_mode(config.m_queue_priority, priorty_extensions); + queue_builder.set_priority_mode(config.get_queue_priority(), priorty_extensions); bool throttle_extensions = engine.extension_supported("cl_khr_throttle_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_throttle_mode(config.m_queue_throttle, throttle_extensions); + queue_builder.set_throttle_mode(config.get_queue_throttle(), throttle_extensions); bool queue_families_extension = engine.get_device_info().supports_queue_families; queue_builder.set_supports_queue_families(queue_families_extension); diff --git a/src/plugins/intel_gpu/src/runtime/stream.cpp b/src/plugins/intel_gpu/src/runtime/stream.cpp index aba6a0d8681758..d8a3e559db2c93 100644 --- a/src/plugins/intel_gpu/src/runtime/stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/stream.cpp @@ -20,8 +20,8 @@ QueueTypes stream::detect_queue_type(engine_types engine_type, void* queue_handl } SyncMethods stream::get_expected_sync_method(const ExecutionConfig& config) { - auto profiling = config.m_enable_profiling; - auto queue_type = config.m_queue_type; + auto profiling = config.get_enable_profiling(); + auto queue_type = config.get_queue_type(); return profiling ? SyncMethods::events : queue_type == QueueTypes::out_of_order ? SyncMethods::barriers : SyncMethods::none; } From 0e93db364d00fa5b01d0bd34823e8ef4d7641dae Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 24 Dec 2024 09:38:23 +0400 Subject: [PATCH 12/18] Options visibility update Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 44 +++++++++++-------- src/inference/src/dev/plugin_config.cpp | 30 +++---------- src/inference/tests/unit/config_test.cpp | 25 +++++------ src/plugins/intel_gpu/src/plugin/plugin.cpp | 11 ++--- .../src/runtime/execution_config.cpp | 18 ++++---- .../unit/fusions/convolution_fusion_test.cpp | 2 +- .../fusions/fully_connected_fusion_test.cpp | 8 ++-- .../graph_manipulation_gpu_test.cpp | 2 +- .../passes/prepare_primitive_fusing_test.cpp | 2 +- .../remove_redundant_reorders_tests.cpp | 12 ++--- .../test_cases/concatenation_gpu_test.cpp | 6 +-- .../unit/test_cases/reorder_gpu_test.cpp | 2 +- .../tests/unit/test_utils/test_utils.cpp | 6 +-- 13 files changed, 79 insertions(+), 89 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 9e566b216590cb..36785d071bfc8e 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -78,12 +78,32 @@ namespace ov { -enum class OptionVisibility { - RELEASE = 0, // Option can be set for any build type via public interface, environment and config file - RELEASE_INTERNAL = 1, // Option can be set for any build type via environment and config file only - DEBUG = 2, // Option can be set for debug builds only via environment and config file +enum class OptionVisibility : uint8_t { + RELEASE = 1 << 0, // Option can be set for any build type via public interface, environment and config file + RELEASE_INTERNAL = 1 << 1, // Option can be set for any build type via environment and config file only + DEBUG = 1 << 2, // Option can be set for debug builds only via environment and config file +#ifdef ENABLE_DEBUG_CAPS + ANY = 0x07, // Any visibility is valid including DEBUG +#else + ANY = 0x03, // Any visibility is valid excluding DEBUG +#endif }; +inline OptionVisibility operator&(OptionVisibility a, OptionVisibility b) { + typedef std::underlying_type::type underlying_type; + return static_cast(static_cast(a) & static_cast(b)); +} + +inline OptionVisibility operator|(OptionVisibility a, OptionVisibility b) { + typedef std::underlying_type::type underlying_type; + return static_cast(static_cast(a) | static_cast(b)); +} + +inline OptionVisibility operator~(OptionVisibility a) { + typedef std::underlying_type::type underlying_type; + return static_cast(~static_cast(a)); +} + inline std::ostream& operator<<(std::ostream& os, const OptionVisibility& visibility) { switch (visibility) { case OptionVisibility::RELEASE: os << "RELEASE"; break; @@ -185,23 +205,14 @@ class OPENVINO_RUNTIME_API PluginConfig { PluginConfig(PluginConfig&& other) = delete; PluginConfig& operator=(PluginConfig&& other) = delete; - void set_property(const ov::AnyMap& properties); - Any get_property(const std::string& name) const; + void set_property(const ov::AnyMap& properties, OptionVisibility allowed_visibility = OptionVisibility::ANY, bool throw_on_error = true); + Any get_property(const std::string& name, OptionVisibility allowed_visibility = OptionVisibility::ANY) const; template util::EnableIfAllStringAny set_property(Properties&&... properties) { set_property(ov::AnyMap{std::forward(properties)...}); } - template - T get_property(const ov::Property& property) const { - if (is_set_by_user(property)) { - return m_user_properties.at(property.name()).template as(); - } - OPENVINO_ASSERT(m_options_map.find(property.name()) != m_options_map.end(), "Property not found: ", property.name()); - return static_cast*>(m_options_map.at(property.name()))->value; - } - std::string to_string() const; void finalize(std::shared_ptr context, const ov::RTMap& rt_info); @@ -236,9 +247,6 @@ class OPENVINO_RUNTIME_API PluginConfig { } } - ov::Any get_property(const std::string& name, const std::vector& allowed_visibility) const; - void set_property(const ov::AnyMap& properties, const std::vector& allowed_visibility, bool throw_on_error); - ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const; ov::AnyMap read_env(const std::vector& prefixes) const; void cleanup_unsupported(ov::AnyMap& config) const; diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index b21547f40a57df..bbfe88d8737f80 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -48,31 +48,21 @@ size_t get_terminal_width() { namespace ov { -ov::Any PluginConfig::get_property(const std::string& name) const { - const static std::vector allowed_visibility = {OptionVisibility::RELEASE, OptionVisibility::RELEASE_INTERNAL}; - return get_property(name, allowed_visibility); -} -ov::Any PluginConfig::get_property(const std::string& name, const std::vector& allowed_visibility) const { +ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility allowed_visibility) const { if (m_user_properties.find(name) != m_user_properties.end()) { return m_user_properties.at(name); } auto option = get_option_ptr(name); - if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) { + if ((allowed_visibility & option->get_visibility()) == option->get_visibility()) { OPENVINO_THROW("Couldn't get unknown property: ", name); } return option->get_any(); } -void PluginConfig::set_property(const AnyMap& config) { - const static std::vector allowed_visibility = {OptionVisibility::RELEASE,OptionVisibility::RELEASE_INTERNAL, OptionVisibility::DEBUG}; - const bool throw_on_error = true; - set_property(config, allowed_visibility, throw_on_error); -} - -void PluginConfig::set_property(const ov::AnyMap& config, const std::vector& allowed_visibility, bool throw_on_error) { +void PluginConfig::set_property(const ov::AnyMap& config, OptionVisibility allowed_visibility, bool throw_on_error) { OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); for (auto& kv : config) { @@ -80,7 +70,7 @@ void PluginConfig::set_property(const ov::AnyMap& config, const std::vectorget_visibility()) == allowed_visibility.end()) { + if ((allowed_visibility & option->get_visibility()) == option->get_visibility()) { if (throw_on_error) OPENVINO_THROW("Couldn't set unknown property: ", name); else @@ -134,25 +124,17 @@ bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) const { } void PluginConfig::apply_debug_options(std::shared_ptr context) { - static std::vector allowed_visibility = { - OptionVisibility::RELEASE, - OptionVisibility::RELEASE_INTERNAL, - OptionVisibility::DEBUG -#ifdef ENABLE_DEBUG_CAPS -#endif - }; - const bool throw_on_error = false; if (context) { ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); cleanup_unsupported(config_properties); - set_property(config_properties, allowed_visibility, throw_on_error); + set_property(config_properties, OptionVisibility::ANY, throw_on_error); } ov::AnyMap env_properties = read_env({"OV_"}); cleanup_unsupported(env_properties); - set_property(env_properties, allowed_visibility, throw_on_error); + set_property(env_properties, OptionVisibility::ANY, throw_on_error); } ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const { diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index 42b7fba115a273..d2c99585ab015b 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -40,7 +40,6 @@ struct NotEmptyTestConfig : public ov::PluginConfig { OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "") OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "") #undef OV_CONFIG_OPTION - } NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() { @@ -50,7 +49,7 @@ struct NotEmptyTestConfig : public ov::PluginConfig { } } - #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) OV_CONFIG_DECLARE_GETTERS(__VA_ARGS__) OV_CONFIG_RELEASE_OPTION(, bool_property, true, "") OV_CONFIG_RELEASE_OPTION(, int_property, -1, "") OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "") @@ -97,26 +96,26 @@ TEST(plugin_config, can_create_not_empty_config) { TEST(plugin_config, can_set_get_property) { NotEmptyTestConfig cfg; - ASSERT_NO_THROW(cfg.get_property(bool_property)); - ASSERT_EQ(cfg.get_property(bool_property), true); + ASSERT_NO_THROW(cfg.get_bool_property()); + ASSERT_EQ(cfg.get_bool_property(), true); ASSERT_NO_THROW(cfg.set_property(bool_property(false))); - ASSERT_EQ(cfg.get_property(bool_property), false); + ASSERT_EQ(cfg.get_bool_property(), false); } TEST(plugin_config, throw_for_unsupported_property) { NotEmptyTestConfig cfg; - ASSERT_ANY_THROW(cfg.get_property(unsupported_property)); + ASSERT_ANY_THROW(cfg.get_property(unsupported_property.name())); ASSERT_ANY_THROW(cfg.set_property(unsupported_property(10.0f))); } TEST(plugin_config, can_direct_access_to_properties) { NotEmptyTestConfig cfg; - ASSERT_EQ(cfg.m_int_property.value, cfg.get_property(int_property)); + ASSERT_EQ(cfg.m_int_property.value, cfg.get_int_property()); ASSERT_NO_THROW(cfg.set_property(int_property(1))); ASSERT_EQ(cfg.m_int_property.value, -1); // user property doesn't impact member value until finalize() is called cfg.m_int_property.value = 2; - ASSERT_EQ(cfg.get_property(int_property), 1); // still 1 as user property was set previously + ASSERT_EQ(cfg.get_int_property(), 1); // stil 1 as user property was set previously } TEST(plugin_config, finalization_updates_member) { @@ -132,19 +131,19 @@ TEST(plugin_config, finalization_updates_member) { TEST(plugin_config, get_property_before_finalization_returns_user_property_if_set) { NotEmptyTestConfig cfg; - ASSERT_EQ(cfg.get_property(bool_property), true); // default value + ASSERT_EQ(cfg.get_bool_property(), true); // default value ASSERT_EQ(cfg.m_bool_property.value, true); // default value cfg.m_bool_property.value = false; // update member directly - ASSERT_EQ(cfg.get_property(bool_property), false); // OK, return the class member value as no user property was set + ASSERT_EQ(cfg.get_bool_property(), false); // OK, return the class member value as no user property was set ASSERT_NO_THROW(cfg.set_property(bool_property(true))); ASSERT_TRUE(cfg.is_set_by_user(bool_property)); - ASSERT_EQ(cfg.get_property(bool_property), true); // now user property value is returned + ASSERT_EQ(cfg.get_bool_property(), true); // now user property value is returned ASSERT_EQ(cfg.m_bool_property.value, false); // but class member is not updated cfg.finalize(nullptr, {}); - ASSERT_EQ(cfg.get_property(bool_property), cfg.m_bool_property.value); // equal after finalization + ASSERT_EQ(cfg.get_bool_property(), cfg.m_bool_property.value); // equal after finalization ASSERT_FALSE(cfg.is_set_by_user(bool_property)); // and user property is cleared } @@ -194,7 +193,7 @@ TEST(plugin_config, can_copy_config) { ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); ASSERT_EQ(cfg2.m_low_level_property.value, "value2"); ASSERT_EQ(cfg2.m_int_property.value, 1); - ASSERT_EQ(cfg2.get_property(bool_property), false); // ensure user properties are copied too + ASSERT_EQ(cfg2.get_bool_property(), false); // ensure user properties are copied too // check that cfg1 modification doesn't impact a copy cfg1.set_property(high_level_property("value3")); diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 8b9899989f0017..a1fb2499c1358d 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -34,6 +34,7 @@ #include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/make_tensor.hpp" #include "openvino/runtime/performance_heuristics.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/util/common_util.hpp" #include "openvino/util/weights_path.hpp" @@ -188,7 +189,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_property(orig_config); + config.set_property(orig_config, OptionVisibility::RELEASE); config.finalize(context, get_rt_info(*model)); auto transformed_model = clone_and_transform_model(model, config, context); @@ -237,7 +238,7 @@ ov::SoPtr Plugin::get_default_context(const AnyMap& params) void Plugin::set_property(const ov::AnyMap &config) { auto update_config = [](ExecutionConfig& config, const ov::AnyMap& user_config) { - config.set_property(user_config); + config.set_property(user_config, OptionVisibility::RELEASE); // Check that custom layers config can be loaded if (user_config.find(ov::intel_gpu::config_file.name()) != user_config.end()) { CustomLayerMap custom_layers; @@ -272,7 +273,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& auto ctx = get_default_context(device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_property(orig_config); + config.set_property(orig_config, OptionVisibility::RELEASE); config.finalize(ctx, get_rt_info(*model)); ProgramBuilder prog(ctx->get_engine(), config); @@ -327,7 +328,7 @@ std::shared_ptr Plugin::import_model(std::istream& model, } ExecutionConfig config = m_configs_map.at(device_id); - config.set_property(_orig_config); + config.set_property(_orig_config, OptionVisibility::RELEASE); config.finalize(context_impl, {}); ov::CacheMode cache_mode = config.get_cache_mode(); @@ -435,7 +436,7 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] get_property: Couldn't find config for GPU with id ", device_id); const auto& c = m_configs_map.at(device_id); - return c.get_property(name); + return c.get_property(name, OptionVisibility::RELEASE); } auto StringRightTrim = [](std::string string, std::string substring, bool case_sensitive = true) { diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 79b8ab9d564a9e..b4921fb8e16fb3 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -53,7 +53,7 @@ void ExecutionConfig::apply_rt_info(std::shared_ptr context, con // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not // using that mechanism. - if (get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) { + if (get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) { apply_rt_info_property(ov::weights_path, rt_info); } } @@ -72,7 +72,7 @@ void ExecutionConfig::finalize_impl(std::shared_ptr context) { if (info.supports_immad) { m_use_onednn = true; } - if (get_property(ov::intel_gpu::use_onednn)) { + if (get_use_onednn()) { m_queue_type = QueueTypes::in_order; } @@ -99,7 +99,7 @@ void ExecutionConfig::apply_hints(const cldnn::device_info& info) { void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::execution_mode)) { - const auto mode = get_property(ov::hint::execution_mode); + const auto mode = get_execution_mode(); if (!is_set_by_user(ov::hint::inference_precision)) { if (mode == ov::hint::ExecutionMode::ACCURACY) { m_inference_precision = ov::element::undefined; @@ -115,7 +115,7 @@ void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::performance_mode)) { - const auto mode = get_property(ov::hint::performance_mode); + const auto mode = get_performance_mode(); if (!is_set_by_user(ov::num_streams)) { if (mode == ov::hint::PerformanceMode::LATENCY) { m_num_streams = 1; @@ -125,18 +125,18 @@ void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { } } - if (get_property(ov::num_streams) == ov::streams::AUTO) { + if (get_num_streams() == ov::streams::AUTO) { int32_t n_streams = std::max(info.num_ccs, 2); m_num_streams = n_streams; } - if (get_property(ov::internal::exclusive_async_requests)) { + if (get_exclusive_async_requests()) { m_num_streams = 1; } // Allow kernels reuse only for single-stream scenarios - if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) { - if (get_property(ov::num_streams) != 1) { + if (get_enable_kernels_reuse()) { + if (get_num_streams() != 1) { m_enable_kernels_reuse = false; } } @@ -144,7 +144,7 @@ void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::model_priority)) { - const auto priority = get_property(ov::hint::model_priority); + const auto priority = get_model_priority(); if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { m_queue_priority = priority; } diff --git a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp index 235853eaf79f60..528ed566524b4f 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp @@ -318,7 +318,7 @@ class WeightsPrimitiveFusingTestOneDNN : public BaseFusingTestget_layout_optimizer().set_implementation_forcing(config.get_property(ov::intel_gpu::force_implementations)); + prog->get_layout_optimizer().set_implementation_forcing(config.get_force_implementations()); program_wrapper::apply_opt_pass(*prog); ASSERT_TRUE(!has_node(*prog, "permute")); diff --git a/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp b/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp index 1fbeab7e67ac2d..b46033f15d77db 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp @@ -136,7 +136,7 @@ TEST(remove_redundant_reorders, skip_reorder_fusing_when_sibling_not_support_pad auto prog = program::build_program(engine, topology, config, false, true); config.set_property(ov::intel_gpu::optimize_data(true)); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -165,7 +165,7 @@ TEST(remove_redundant_reorders, not_to_fuse_reshape_with_fused_prims) { auto prog = program::build_program(engine, topology, config, false, true); program_wrapper::apply_opt_pass(*prog); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -204,7 +204,7 @@ TEST(remove_redundant_reorders, not_to_fuse_permute) { auto prog = program::build_program(engine, topology, config, false, true); ASSERT_NE(prog, nullptr); - bool opt_data = config.get_property(ov::intel_gpu::optimize_data); + bool opt_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog); program_wrapper::apply_opt_pass(*prog, opt_data); @@ -266,7 +266,7 @@ TEST(remove_redundant_reorders, remove_fused) { auto prog = program::build_program(engine, topology, config, false, true); program_wrapper::apply_opt_pass(*prog); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -293,7 +293,7 @@ TEST(remove_redundant_reorders, fuse_reorder_to_prev_mvn_dyn) { config.set_property(ov::intel_gpu::optimize_data(true)); auto prog = program::build_program(engine, topology, config, false, true); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -336,7 +336,7 @@ TEST(remove_redundant_reorders, fuse_reorder_to_prev_concat_dyn) { config.set_property(ov::intel_gpu::optimize_data(true)); auto prog = program::build_program(engine, topology, config, false, true); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp index f640b02afa99cb..8cb561f4232a6a 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp @@ -1422,7 +1422,7 @@ struct concat_gpu_4d_implicit : public concat_gpu { } auto outputs = concat_network->execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network->get_primitive("concat"))->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); @@ -1642,7 +1642,7 @@ struct concat_gpu_4d_implicit_onednn : public concat_gpu { } auto outputs = concat_network.execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat"))->node->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); @@ -1805,7 +1805,7 @@ struct concat_gpu_4d_explicit : public concat_gpu { } auto outputs = concat_network.execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat"))->node->can_be_optimized(); // If sibling is using onednn impl and batch > 1, the onednn impl cannot process the implicit concat'ed buffer. diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp index 8ade3b6c8e0f31..1fac766cd572ae 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp @@ -1913,7 +1913,7 @@ TEST(reorder_gpu_opt, non_trivial_remove_redundant) auto outputs = net.execute(); auto executed_primitives = net.get_executed_primitives(); - if (config.get_property(ov::intel_gpu::queue_type) != QueueTypes::out_of_order) + if (config.get_queue_type() != QueueTypes::out_of_order) GTEST_SKIP(); ASSERT_TRUE(executed_primitives.count("in") == 1); diff --git a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp index 474182801dbfc2..ba129090c99ce9 100644 --- a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp @@ -54,14 +54,14 @@ void generic_test::run_single_test(bool is_caching_test) { } } std::string input_name = "input" + std::to_string(i); - if ((i == 0) && generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { + if ((i == 0) && generic_params->network_config.get_optimize_data()) { // Add reorder after the first input in case of optimize data flag since it might change the input layout. input_name = "input0_init"; } // First input is provided to the network as input_layout. // Other inputs are provided as input_layout if optimize data flag is off. Otherwise they are provided as data. - if ((i == 0) || !generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { + if ((i == 0) || !generic_params->network_config.get_optimize_data()) { topology.add(input_layout(input_name, input_mems[i]->get_layout())); input_layouts_names.push_back(input_name); } else { @@ -74,7 +74,7 @@ void generic_test::run_single_test(bool is_caching_test) { } } - if (generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { + if (generic_params->network_config.get_optimize_data()) { // Add reorder after the first input in case of optimize data flag since it might change the input layout. topology.add(reorder("input0", input_info("input0_init"), input_mems[0]->get_layout())); } From 456fd32c68fa4a1ae956920e588979ff855b86a2 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 24 Dec 2024 10:33:25 +0400 Subject: [PATCH 13/18] Fixes and visit_attributes method impl Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 2 +- src/inference/src/dev/plugin_config.cpp | 38 ++++++++++--------- .../include/intel_gpu/runtime/options.inl | 2 +- src/plugins/intel_gpu/src/plugin/graph.cpp | 33 +++++++++++++++- 4 files changed, 54 insertions(+), 21 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 36785d071bfc8e..2eacd157ec8b94 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -217,7 +217,7 @@ class OPENVINO_RUNTIME_API PluginConfig { void finalize(std::shared_ptr context, const ov::RTMap& rt_info); - bool visit_attributes(ov::AttributeVisitor& visitor) const; + bool visit_attributes(ov::AttributeVisitor& visitor); protected: virtual void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) {} diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index bbfe88d8737f80..bebcd891251616 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -55,7 +55,7 @@ ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility all } auto option = get_option_ptr(name); - if ((allowed_visibility & option->get_visibility()) == option->get_visibility()) { + if ((allowed_visibility & option->get_visibility()) != option->get_visibility()) { OPENVINO_THROW("Couldn't get unknown property: ", name); } @@ -70,7 +70,7 @@ void PluginConfig::set_property(const ov::AnyMap& config, OptionVisibility allow auto& val = kv.second; auto option = get_option_ptr(name); - if ((allowed_visibility & option->get_visibility()) == option->get_visibility()) { + if ((allowed_visibility & option->get_visibility()) != option->get_visibility()) { if (throw_on_error) OPENVINO_THROW("Couldn't set unknown property: ", name); else @@ -111,14 +111,16 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R m_is_finalized = true; } -bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) const { - // for (const auto& prop : m_user_properties) { - // visitor.on_attribute(prop.first + "__user", prop.second.as()); - // } - // for (const auto& prop : m_options_map) { - // visitor.on_attribute(prop.first + "__internal", prop.second->get_any().as()); - // } - // visitor.on_attribute("is_finalized", m_is_finalized); +bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) { + for (const auto& prop : m_user_properties) { + auto val = prop.second.as(); + visitor.on_attribute(prop.first + "__user", val); + } + for (const auto& prop : m_options_map) { + auto val = prop.second->get_any().as(); + visitor.on_attribute(prop.first + "__internal", val); + } + visitor.on_attribute("is_finalized", m_is_finalized); return true; } @@ -212,20 +214,20 @@ void PluginConfig::cleanup_unsupported(ov::AnyMap& config) const { } std::string PluginConfig::to_string() const { - std::stringstream s; + std::stringstream ss; - s << "-----------------------------------------\n"; - s << "PROPERTIES:\n"; + ss << "-----------------------------------------\n"; + ss << "PROPERTIES:\n"; for (const auto& option : m_options_map) { - s << "\t" << option.first << ": " << option.second->get_any().as() << std::endl; + ss << "\t" << option.first << ": " << option.second->get_any().as() << std::endl; } - s << "USER PROPERTIES:\n"; + ss << "USER PROPERTIES:\n"; for (const auto& user_prop : m_user_properties) { - s << "\t" << user_prop.first << ": " << user_prop.second.as() << std::endl; + ss << "\t" << user_prop.first << ": " << user_prop.second.as() << std::endl; } - return s.str(); + return ss.str(); } void PluginConfig::print_help() const { @@ -276,7 +278,7 @@ void PluginConfig::print_help() const { const size_t max_name_width = static_cast(std::get<0>(*max_name_length_item).size() + std::get<1>(*max_name_length_item).size()); const size_t terminal_width = get_terminal_width(); - ss << std::left << std::setw(max_name_width) << ("Option name") << " | " << " Description " << "\n"; + ss << std::left << std::setw(max_name_width) << "Option name" << " | " << " Description " << "\n"; ss << std::left << std::setw(terminal_width) << std::setfill('-') << "" << "\n"; for (auto& kv : options_desc) { ss << format_text(std::get<0>(kv), std::get<1>(kv), std::get<2>(kv), max_name_width, terminal_width) << "\n"; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl index 1941aaec69b2bf..1e4f7076887a3e 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -45,6 +45,7 @@ OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, partial_build_program, false, " OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "Switch between new and old shape inference flow. Shall be removed soon") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "Threshold for preallocation feature in case when it uses ratio policy") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, use_onednn, false, "Enable/Disable onednn for usage for particular model/platform") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file") @@ -58,7 +59,6 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, "", "Save intermediate in/ou OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool, "", "Save csv file with memory pool info to specified folder") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, "", "Space separated list of iterations where other dump options should be enabled") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, false, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "Disable runtime inplace optimizations for operations like concat and crop") diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 2c595b419fbaa4..1d0c4f86eb8b48 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -2,7 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/graph/serialization/helpers.hpp" #include "intel_gpu/runtime/layout.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/runtime/threading/executor_manager.hpp" #include "openvino/runtime/exec_model_info.hpp" #include "openvino/pass/serialize.hpp" @@ -34,6 +36,34 @@ namespace ov { namespace intel_gpu { +namespace { + + +class OstreamAttributeVisitor : public ov::AttributeVisitor { + cldnn::BinaryOutputBuffer& os; + + template + void append_attribute(const std::string& name, const T& value) { + os << name; + os << value; + } +public: + OstreamAttributeVisitor(cldnn::BinaryOutputBuffer& os) : os(os) {} + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + OPENVINO_THROW("Attribute ", name, " can't be processed\n"); + } + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + append_attribute(name, adapter.get()); + } + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + append_attribute(name, adapter.get()); + } +}; + +} // namespace Graph::Graph(std::shared_ptr model, const RemoteContextImpl::Ptr& context, const ExecutionConfig& config, uint16_t stream_id) : m_context(context) @@ -513,7 +543,8 @@ void Graph::export_model(cldnn::BinaryOutputBuffer &ob) { ob << perf_item.second.second.parentPrimitive; } } - // ob << m_config; + OstreamAttributeVisitor visitor(ob); + m_config.visit_attributes(visitor); ob.set_stream(m_network->get_stream_ptr().get()); m_network->get_program()->save(ob); From 2a49e23892c0ad540d959abe9a73e662b9ccc409 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 24 Dec 2024 11:28:55 +0400 Subject: [PATCH 14/18] Refactor debug knobs Signed-off-by: Vladimir Paramuzov --- .../intel_gpu/runtime/internal_properties.hpp | 56 ++++++++++++++++--- .../include/intel_gpu/runtime/options.inl | 18 +++--- .../intel_gpu/src/graph/layout_optimizer.cpp | 5 -- src/plugins/intel_gpu/src/graph/program.cpp | 3 +- .../src/graph/program_dump_graph.cpp | 2 +- 5 files changed, 60 insertions(+), 24 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index 10e58acee25cf8..aa6afc00b6ef70 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -36,6 +36,40 @@ inline std::ostream& operator<<(std::ostream& os, const QueueTypes& val) { return os; } +enum class DumpFormat : uint8_t { + binary = 0, + text = 1, + text_raw = 2, +}; + +inline std::ostream& operator<<(std::ostream& os, const DumpFormat& val) { + switch (val) { + case DumpFormat::binary: os << "binary"; break; + case DumpFormat::text: os << "text"; break; + case DumpFormat::text_raw: os << "text_raw"; break; + default: os << "unknown"; + } + + return os; +} + +enum class DumpTensors : uint8_t { + all = 0, + in = 1, + out = 2, +}; + +inline std::ostream& operator<<(std::ostream& os, const DumpTensors& val) { + switch (val) { + case DumpTensors::all: os << "all"; break; + case DumpTensors::in: os << "in"; break; + case DumpTensors::out: os << "out"; break; + default: os << "unknown"; + } + + return os; +} + /** * @brief Defines queue type that must be used for model execution */ @@ -56,15 +90,19 @@ static constexpr Property use_onednn{"USE_ONEDNN"} static constexpr Property help{"HELP"}; static constexpr Property verbose{"VERBOSE"}; -static constexpr Property log_to_file{"LOG_TO_FILE"}; -static constexpr Property disable_usm{"DISABLE_USM"}; +static constexpr Property log_to_file{"GPU_LOG_TO_FILE"}; +static constexpr Property disable_usm{"GPU_DISABLE_USM"}; static constexpr Property disable_onednn_post_ops_opt{"DISABLE_ONEDNN_POST_OPS_OPT"}; -static constexpr Property dump_graphs{"GPU_DUMP_GRAPHS"}; -static constexpr Property dump_profiling_data{"DUMP_PROFILING_DATA"}; -static constexpr Property dump_sources{"DUMP_SOURCES"}; -static constexpr Property dump_tensors{"DUMP_TENSORS"}; -static constexpr Property dump_memory_pool{"DUMP_MEMORY_POOL"}; -static constexpr Property dump_iterations{"DUMP_ITERATIONS"}; +static constexpr Property dump_graphs_path{"GPU_DUMP_GRAPHS_PATH"}; +static constexpr Property dump_profiling_data_path{"GPU_DUMP_PROFILING_DATA_PATH"}; +static constexpr Property dump_sources_path{"GPU_DUMP_SOURCES_PATH"}; +static constexpr Property dump_tensors_path{"GPU_DUMP_TENSORS_PATH"}; +static constexpr Property dump_tensors{"DUMP_TENSORS"}; +static constexpr Property dump_layers{"GPU_DUMP_LAYERS"}; +static constexpr Property dump_tensors_format{"DUMP_TENSORS_FORMAT"}; +static constexpr Property dump_memory_pool_path{"GPU_DUMP_MEMORY_POOL_PATH"}; +static constexpr Property dump_batch_limit{"GPU_DUMP_BATCH_LIMIT"}; +static constexpr Property, ov::PropertyMutability::RW> dump_iterations{"GPU_DUMP_ITERATIONS"}; static constexpr Property host_time_profiling{"HOST_TIME_PROFILING"}; static constexpr Property impls_cache_capacity{"IMPLS_CACHE_CAPACITY"}; static constexpr Property disable_async_compilation{"DISABLE_ASYNC_COMPILATION"}; @@ -76,7 +114,7 @@ static constexpr Property disable_fake_alignme static constexpr Property use_usm_host{"USE_USM_HOST"}; static constexpr Property asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"}; static constexpr Property mem_prealloc_options{"MEM_PREALLOC_OPTIONS"}; -static constexpr Property load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"}; +static constexpr Property, ov::PropertyMutability::RW> load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"}; } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl index 1e4f7076887a3e..93b4653034ab92 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -52,12 +52,16 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to spec OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, help, false, "Print help message for all config options") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_usm, false, "Disable USM memory allocations and use only cl_mem") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_onednn_post_ops_opt, false, "Disable optimization pass for onednn post-ops") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data, "", "Save csv file with per-stage and per-primitive profiling data to specified folder") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_graphs, "", "Save intermediate graph representations during model compilation pipeline to specified folder") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_sources, "", "Save generated sources for each kernel to specified folder") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, "", "Save intermediate in/out tensors of each primitive to specified folder") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool, "", "Save csv file with memory pool info to specified folder") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, "", "Space separated list of iterations where other dump options should be enabled") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data_path, "", "Save csv file with per-stage and per-primitive profiling data to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_graphs_path, "", "Save intermediate graph representations during model compilation pipeline to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_sources_path, "", "Save generated sources for each kernel to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_path, "", "Save intermediate in/out tensors of each primitive to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, ov::intel_gpu::DumpTensors::all, "Tensor types to dump. Supported values: all, inputs, outputs") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_format, ov::intel_gpu::DumpFormat::text, "Format of the tensors dump. Supported values: binary, text, text_raw") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_layers, "", "Activate dump for specified layers only") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool_path, "", "Save csv file with memory pool info to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, std::set{}, "Space separated list of iterations where other dump options should be enabled") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits::max(), "Max number of batch elements to dump") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, false, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation") @@ -68,4 +72,4 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fake_alignment, false, "Disable fa OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, use_usm_host, false, "Enforce USM host usage for all allocations") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "Preallocation setting") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, "", "List of layers to load raw binary") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, std::vector{}, "List of layers to load raw binary") diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index bb9271e761bef0..7fab84b8a6c527 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -439,11 +439,6 @@ bool should_use_winograd_2x3_s1(const convolution_node& node, if (disable_winograd_conv) return false; - // cases when NOT to use winograd - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_winograd_conv == 1) - return false; - auto prim = node.get_primitive(); if (input_layout.data_type != data_types::f16 || (input_layout.is_static() && input_layout.feature() % 64 != 0) // current algorithm is effective for ifm to be multiply of 64 diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 12db38bd067599..98979c9edb3211 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -1372,8 +1372,7 @@ program::primitives_info program::get_current_stage_info() const { } void program::save_pass_info(std::string pass_name) { - // TODO: Directory path here can be probably changed to some bool flag - if (!_config.get_dump_graphs().empty()) + if (!_config.get_dump_graphs_path().empty()) optimizer_passes_info.emplace_back(pass_name, get_current_stage_info()); } diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp index 0989e82cc0ff47..8838cd361502cd 100644 --- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp @@ -157,7 +157,7 @@ void dump_full_node(std::ofstream& out, const program_node* node) { } // namespace std::string get_dir_path(const ExecutionConfig& config) { - auto path = config.get_dump_graphs(); + auto path = config.get_dump_graphs_path(); if (path.empty()) { return {}; } From 3ca210ac8841c69c1884f1e189d1496e3fa24ed0 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 24 Dec 2024 15:12:36 +0400 Subject: [PATCH 15/18] split set_prop and set_user_prop again Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 5 +- src/inference/src/dev/plugin_config.cpp | 27 +++- .../include/intel_gpu/graph/program.hpp | 1 - .../intel_gpu/plugin/program_builder.hpp | 10 +- .../intel_gpu/runtime/internal_properties.hpp | 1 + .../include/intel_gpu/runtime/options.inl | 3 +- .../graph_optimizer/prepare_buffer_fusing.cpp | 7 +- .../prepare_primitive_fusing.cpp | 8 +- .../prepare_primitive_fusing_through.cpp | 3 + .../graph/graph_optimizer/reorder_inputs.cpp | 2 +- .../src/graph/impls/ocl/kernels_cache.cpp | 22 +--- .../src/graph/impls/ocl/kernels_cache.hpp | 1 - .../impls/onednn/concatenation_onednn.hpp | 3 +- .../graph/impls/onednn/convolution_onednn.hpp | 3 +- .../impls/onednn/deconvolution_onednn.hpp | 3 +- .../impls/onednn/fully_connected_onednn.hpp | 3 +- .../src/graph/impls/onednn/gemm_onednn.hpp | 3 +- .../graph/impls/onednn/lstm_seq_onednn.hpp | 4 +- .../src/graph/impls/onednn/pooling_onednn.hpp | 3 +- .../impls/onednn/primitive_onednn_base.h | 6 +- .../src/graph/impls/onednn/reduce_onednn.hpp | 3 +- .../src/graph/impls/onednn/reorder_onednn.hpp | 3 +- .../src/graph/include/primitive_inst.h | 2 + .../src/graph/include/program_node.h | 1 + src/plugins/intel_gpu/src/graph/network.cpp | 5 +- .../intel_gpu/src/graph/primitive_inst.cpp | 15 +-- src/plugins/intel_gpu/src/graph/program.cpp | 24 +--- .../intel_gpu/src/graph/program_node.cpp | 3 +- .../fully_connected_kernel_bf_tiled.cpp | 21 ---- src/plugins/intel_gpu/src/plugin/graph.cpp | 6 +- .../intel_gpu/src/plugin/ops/condition.cpp | 2 +- src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 3 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 115 ++++++++++++++++-- .../intel_gpu/src/plugin/program_builder.cpp | 89 +------------- .../src/plugin/sync_infer_request.cpp | 7 +- .../dynamic_quantize_fully_connected.cpp | 4 +- .../dynamic_quantize_fully_connected.hpp | 2 +- .../src/plugin/transformations_pipeline.cpp | 17 ++- .../src/runtime/execution_config.cpp | 2 +- .../intel_gpu/src/runtime/ocl/ocl_device.cpp | 3 - 40 files changed, 213 insertions(+), 232 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 2eacd157ec8b94..08dd148b6d1c36 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -77,7 +77,7 @@ OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__) namespace ov { - +#define ENABLE_DEBUG_CAPS enum class OptionVisibility : uint8_t { RELEASE = 1 << 0, // Option can be set for any build type via public interface, environment and config file RELEASE_INTERNAL = 1 << 1, // Option can be set for any build type via environment and config file only @@ -205,7 +205,8 @@ class OPENVINO_RUNTIME_API PluginConfig { PluginConfig(PluginConfig&& other) = delete; PluginConfig& operator=(PluginConfig&& other) = delete; - void set_property(const ov::AnyMap& properties, OptionVisibility allowed_visibility = OptionVisibility::ANY, bool throw_on_error = true); + void set_property(const ov::AnyMap& properties); + void set_user_property(const ov::AnyMap& properties, OptionVisibility allowed_visibility = OptionVisibility::ANY, bool throw_on_error = true); Any get_property(const std::string& name, OptionVisibility allowed_visibility = OptionVisibility::ANY) const; template diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index bebcd891251616..b756894d5e414f 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -62,7 +62,18 @@ ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility all return option->get_any(); } -void PluginConfig::set_property(const ov::AnyMap& config, OptionVisibility allowed_visibility, bool throw_on_error) { +void PluginConfig::set_property(const ov::AnyMap& config) { + OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); + + for (auto& kv : config) { + auto& name = kv.first; + auto& val = kv.second; + + get_option_ptr(name)->set_any(val); + } +} + +void PluginConfig::set_user_property(const ov::AnyMap& config, OptionVisibility allowed_visibility, bool throw_on_error) { OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); for (auto& kv : config) { @@ -131,12 +142,22 @@ void PluginConfig::apply_debug_options(std::shared_ptr context) if (context) { ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); cleanup_unsupported(config_properties); - set_property(config_properties, OptionVisibility::ANY, throw_on_error); +#ifdef ENABLE_DEBUG_CAPS + for (auto& prop : config_properties) { + std::cout << "Non default config value for " << prop.first << " = " << prop.second.as() << std::endl; + } +#endif + set_user_property(config_properties, OptionVisibility::ANY, throw_on_error); } ov::AnyMap env_properties = read_env({"OV_"}); cleanup_unsupported(env_properties); - set_property(env_properties, OptionVisibility::ANY, throw_on_error); +#ifdef ENABLE_DEBUG_CAPS + for (auto& prop : env_properties) { + std::cout << "Non default env value for " << prop.first << " = " << prop.second.as() << std::endl; + } +#endif + set_user_property(env_properties, OptionVisibility::ANY, throw_on_error); } ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const { diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index acee6df2288a74..2d820de1ef17cf 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -309,7 +309,6 @@ struct program { // if subgraph can be optimized if it consists of only inputs and corresponding outputs bool _can_be_optimized; std::unique_ptr _impls_cache; - const size_t _impls_cache_capacity = 300; std::shared_ptr _compilation_context; bool _loaded_from_cache = false; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp index 5cede62fd17e69..8a5fb44ba7e522 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp @@ -80,7 +80,7 @@ struct PerfCounter { class ProgramBuilder final { public: - ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, bool partialBuild = false, + ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, std::shared_ptr task_executor = nullptr, std::shared_ptr compilation_context = nullptr, bool innerProgram = false); @@ -138,8 +138,8 @@ class ProgramBuilder final { void add_primitive(const ov::Node& op, std::shared_ptr prim, std::vector aliases = {}); - bool use_new_shape_infer() const { return allow_new_shape_infer; } - bool requires_new_shape_infer(const std::shared_ptr& op) const; + bool use_new_shape_infer() const { return m_config.get_allow_new_shape_infer(); } + bool is_inner_program() const { return m_is_inner_program; } bool is_query_mode() { return queryMode; } @@ -157,8 +157,6 @@ class ProgramBuilder final { std::shared_ptr m_topology; CustomLayerMap m_custom_layers; - bool allow_new_shape_infer = false; - bool queryMode; std::shared_ptr m_task_executor; @@ -173,7 +171,7 @@ class ProgramBuilder final { void cleanup_build(); // TODO(eunsoo): remove createTopolpgyOnly argument and add another method to create topology from ngraph function - std::shared_ptr build(const std::vector>& ops, bool partialBuild = false, bool innerProgram = false); + std::shared_ptr build(const std::vector>& ops, bool innerProgram = false); void CreateSingleLayerPrimitive(const std::shared_ptr& op); }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index aa6afc00b6ef70..2981f6beb002e1 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -110,6 +110,7 @@ static constexpr Property disable_runtime_buff static constexpr Property disable_memory_reuse{"DISABLE_MEMORY_REUSE"}; static constexpr Property disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"}; static constexpr Property disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"}; +static constexpr Property disable_fc_swiglu_fusion{"DISABLE_FC_SWIGLU_FUSION"}; static constexpr Property disable_fake_alignment{"DISABLE_FAKE_ALIGNMENT"}; static constexpr Property use_usm_host{"USE_USM_HOST"}; static constexpr Property asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"}; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl index 93b4653034ab92..9d63b6b15e3368 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -46,6 +46,7 @@ OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_new_shape_infer, false, " OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "Threshold for preallocation feature in case when it uses ratio policy") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, use_onednn, false, "Enable/Disable onednn for usage for particular model/platform") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file") @@ -63,11 +64,11 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool_path, "", "Save csv file OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, std::set{}, "Space separated list of iterations where other dump options should be enabled") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits::max(), "Max number of batch elements to dump") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, false, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "Disable runtime inplace optimizations for operations like concat and crop") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "Disable fusions of operations as post-ops/fused-ops") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "Disable pass which merges QKV projections into single MatMul") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fc_swiglu_fusion, false, "Disable pass which merges FC and SwiGLU ops") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fake_alignment, false, "Disable fake alignment feature which tries to keep gpu friendly memory alignment for arbitrary tensor shapes") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, use_usm_host, false, "Enforce USM host usage for all allocations") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations") diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index 65acb0beb66ba0..6408bd1ab74fb7 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -80,8 +80,8 @@ bool concat_in_place_optimization::match(const program_node& concat_node, if (concat_node.is_output() || concat_params.fused_desc.size() > 0 || concat_node.is_in_shape_of_subgraph()) return false; bool do_runtime_buffer_fusing = true; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { + const auto& config = concat_node.get_config(); + GPU_DEBUG_IF(config.get_disable_runtime_buffer_fusing()) { do_runtime_buffer_fusing = false; } @@ -522,8 +522,7 @@ bool crop_in_place_optimization::match(const program_node& node, return false; if (node.get_users().size() > 0) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing && node.is_dynamic()) { + GPU_DEBUG_IF(node.get_config().get_disable_runtime_buffer_fusing() && node.is_dynamic()) { return false; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 05f907dcd81f0a..fb92cef3aca7f2 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -55,6 +55,9 @@ using namespace cldnn; void prepare_primitive_fusing::run(program& p) { + if (p.get_config().get_disable_post_ops_fusions()) + return; + fuse_reorders(p); remove_redundant_reshape(p); fuse_swiglu(p); @@ -164,10 +167,7 @@ void prepare_primitive_fusing::fuse_reorders(program &p) { } void prepare_primitive_fusing::fuse_swiglu(program &p) { - GPU_DEBUG_GET_INSTANCE(debug_config); - bool disable_fc_swiglu_fusion = false; - GPU_DEBUG_IF(debug_config->disable_fc_swiglu_fusion == 1) - disable_fc_swiglu_fusion = true; + bool disable_fc_swiglu_fusion = p.get_config().get_disable_fc_swiglu_fusion(); // Apply only for high performant GPU if (disable_fc_swiglu_fusion || p.get_engine().get_device_info().execution_units_count < 128) return; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp index f63f1bf4efbe21..78b494c52645de 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp @@ -16,6 +16,9 @@ using namespace cldnn; void prepare_primitive_fusing_through::run(program& p) { + if (p.get_config().get_disable_post_ops_fusions()) + return; + auto try_fuse_through = [&](program_node& node) -> std::vector { // This function tries to fuse peer_node to first non reorder or reshape previous primitive. // It returns chain of primitives (reshapes and reorders) including potential fused_node (e.g. Conv, FC, etc) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index b29be318593348..26359025d556ae 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -704,7 +704,7 @@ void reorder_inputs::run(program& p, reorder_factory& rf) { GPU_DEBUG_LOG_PASS << " " << node_ptr->id() << " " << fmt_to_str(fmt) << std::endl; } - GPU_DEBUG_IF(debug_config->verbose >= 2) { + GPU_DEBUG_IF(p.get_config().get_verbose() >= 2) { reorder_cnt total_reorder_count = std::accumulate(p.get_processing_order().begin(), p.get_processing_order().end(), diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp index 423e879936cf0d..c9c1eadeaa6fac 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp @@ -131,14 +131,6 @@ bool kernels_cache::is_cache_enabled() const { return !_config.get_cache_dir().empty(); } -size_t kernels_cache::get_max_kernels_per_batch() const { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->max_kernels_per_batch >= 1) { - return static_cast(debug_config->max_kernels_per_batch); - } - return _config.get_max_kernels_per_batch(); -} - void kernels_cache::get_program_source(const kernels_code& kernels_source_code, std::vector* all_batches) const { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "KernelsCache::BuildAll::GetProgramSource"); std::map>> program_buckets; @@ -201,7 +193,7 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, // Create new kernels batch when the limit is reached // and current kernel's entry_point is duplicated in this kernels batch - if (current_bucket.back().kernels_counter >= get_max_kernels_per_batch() + if (current_bucket.back().kernels_counter >= _config.get_max_kernels_per_batch() || current_bucket.back().entry_point_to_id.find(entry_point) != current_bucket.back().entry_point_to_id.end() || need_separate_batch(entry_point)) { const auto& batch_id = static_cast(current_bucket.size()); @@ -243,9 +235,8 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, b.hash_value = std::hash()(full_code); std::string dump_sources_dir = ""; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_sources.empty()) { - dump_sources_dir = debug_config->dump_sources; + GPU_DEBUG_IF(!_config.get_dump_sources_path().empty()) { + dump_sources_dir = _config.get_dump_sources_path(); } // Add -g -s to build options to allow IGC assembly dumper to associate assembler sources with corresponding OpenCL kernel code lines @@ -301,10 +292,9 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co bool dump_sources = batch.dump_custom_program; std::string dump_sources_dir = ""; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_sources.empty()) { + GPU_DEBUG_IF(!_config.get_dump_sources_path().empty()) { dump_sources = true; - dump_sources_dir = debug_config->dump_sources; + dump_sources_dir = _config.get_dump_sources_path(); } std::string err_log; // accumulated build log from all program's parts (only contains messages from parts which @@ -379,7 +369,7 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co if (is_cache_enabled()) { // If kernels caching is enabled, then we save compiled bucket to binary file with name ${code_hash_value}.cl_cache // Note: Bin file contains full bucket, not separate kernels, so kernels reuse across different models is quite limited - // Bucket size can be changed in get_max_kernels_per_batch() method, but forcing it to 1 will lead to much longer + // Bucket size can be changed by max_kernels_per_batch config option, but forcing it to 1 will lead to much longer // compile time. std::lock_guard lock(cacheAccessMutex); ov::intel_gpu::save_binary(cached_bin_name, getProgramBinaries(program)); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp index b08b087c55854a..98f65feea7bd4f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp @@ -103,7 +103,6 @@ class kernels_cache { std::string get_cache_path() const; bool is_cache_enabled() const; - size_t get_max_kernels_per_batch() const; bool _reuse_kernels = false; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp index 9e0a3fa5cfb390..64b92a15d1f4ba 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp @@ -18,8 +18,9 @@ struct ConcatenationImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; static const std::vector supported_types = { ov::element::f16, ov::element::u8, ov::element::i8 }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp index c3f599fc5db9f6..430c42dee57f75 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp @@ -23,8 +23,9 @@ struct ConvolutionImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& conv_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp index 039cf36261caa0..238214f82dc6fb 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp @@ -19,8 +19,9 @@ struct DeconvolutionImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& deconv_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp index 62129866927ea4..adc96db374b44e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp @@ -21,8 +21,9 @@ struct FullyConnectedImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& fc_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp index 6c576d177043ee..3d64d2009490c0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp @@ -18,8 +18,9 @@ struct GemmImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& gemm_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp index 6fd16a4dd04acf..4b2615c62e2747 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp @@ -22,10 +22,10 @@ struct LSTMSeqImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (info.arch == gpu_arch::unknown) + if (info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; - const auto& lstm_seq_node = node.as(); const auto& in_layout = lstm_seq_node.get_input_layout(0); const auto& out_layout = lstm_seq_node.get_output_layout(0); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp index 4710b0c77b83c7..ced0316e13a08f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp @@ -19,8 +19,9 @@ struct PoolingImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& in_layout = node.get_input_layout(0); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 9a463f6f98291e..93dc37320336f6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -51,8 +51,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _scratchpad_md = _pd.scratchpad_desc(); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->verbose >= 4) { + GPU_DEBUG_IF(config.get_verbose() >= 4) { if (_scratchpad_md.get_size() > 0) { static std::atomic_llong total{0}; int64_t size = _scratchpad_md.get_size() / 1048576; @@ -71,8 +70,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _pd(), _prim() { _enable_profiling = config.get_enable_profiling(); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + GPU_DEBUG_IF(!config.get_dump_profiling_data_path().empty()) { _enable_profiling = true; } } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp index 68d963fd9e369f..4a4a4c60df032d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp @@ -48,8 +48,9 @@ struct ReduceImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& reduce_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp index ad08c516e939d8..9fd3c7a0caaf30 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp @@ -56,8 +56,9 @@ struct ReorderImplementationManager : public ImplementationManager { if (output_fmt == format::custom) return true; + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; if (!one_of(input_fmt.value, supported_formats) || !one_of(output_fmt.value, supported_formats)) diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h index 8b8c4e6b0b6e97..4e37b7df923a56 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h @@ -199,6 +199,8 @@ class primitive_inst { program_node const& get_node() const { return *_node; } network& get_network() const { return _network; } uint32_t get_network_id() const; + const ExecutionConfig& get_config() const { return get_network().get_config(); } + virtual event::ptr set_output_memory(memory::ptr mem, bool check = true, size_t idx = 0); void check_memory_to_set(const memory& mem, const layout& layout) const; const std::list& get_users() const { return _node->get_users(); } diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index d1bbaa8a34cb8f..bc7d0fdfc67502 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -158,6 +158,7 @@ struct program_node { program& get_program() { return myprog; } program& get_program() const { return myprog; } + const ExecutionConfig& get_config() const { return myprog.get_config(); } primitive_impl* get_selected_impl() const { return selected_impl.get(); } void set_selected_impl(std::unique_ptr impl); diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 0ed7ce4d2dadd8..b965ad7e59a34e 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -237,9 +237,8 @@ network::~network() { if (_program != nullptr) _program->cancel_compilation_context(); _memory_pool->clear_pool_for_network(net_id); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { - dump_perf_data_raw(debug_config->dump_profiling_data + "/perf_raw" + std::to_string(net_id) + ".csv", _exec_order); + GPU_DEBUG_IF(!_config.get_dump_profiling_data_path().empty()) { + dump_perf_data_raw(_config.get_dump_profiling_data_path() + "/perf_raw" + std::to_string(net_id) + ".csv", _exec_order); } } diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 796442fe5d7ad5..0fabfa386454c5 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -1073,8 +1073,7 @@ void primitive_inst::realloc_if_needed(bool prev_execution_skipped) { } bool primitive_inst::use_async_compilation() { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_async_compilation) { + GPU_DEBUG_IF(get_config().get_disable_async_compilation()) { return false; } @@ -1568,8 +1567,7 @@ void primitive_inst::do_runtime_in_place_concat() { return false; }; OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_in_place_concat: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { + GPU_DEBUG_IF(get_config().get_disable_runtime_buffer_fusing()) { return; } if (update_shape_done_by_other) { @@ -1678,8 +1676,7 @@ void primitive_inst::do_runtime_skip_scatter_update() { void primitive_inst::do_runtime_in_place_crop() { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_in_place_crop: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { + GPU_DEBUG_IF(get_config().get_disable_runtime_buffer_fusing()) { return; } @@ -1972,8 +1969,7 @@ void primitive_inst::execute() { set_out_event(_impl->execute(_impl_params->dep_events, *this)); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + GPU_DEBUG_IF(!get_config().get_dump_profiling_data_path().empty()) { auto ev = _impl_params->out_event; get_network().get_stream().wait_for_events({ev}); @@ -2307,8 +2303,7 @@ void primitive_inst::update_weights() { reorder_impl->set_arguments(*reorder_inst, args); add_dep_event(reorder_impl->execute({}, *reorder_inst)); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + GPU_DEBUG_IF(!get_config().get_dump_profiling_data_path().empty()) { stream.wait_for_events(_impl_params->dep_events); } diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 98979c9edb3211..fc1c75818b26f0 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -238,12 +238,7 @@ void program::init_program() { _layout_optimizer = cldnn::make_unique(); - size_t impls_cache_capacity = _impls_cache_capacity; - GPU_DEBUG_IF(debug_config->impls_cache_capacity >= 0) { - impls_cache_capacity = debug_config->impls_cache_capacity; - } - - _impls_cache = cldnn::make_unique(impls_cache_capacity); + _impls_cache = cldnn::make_unique(get_config().get_impls_cache_capacity()); // Remove items of compilation context's internal queue when some impl is popped in kernels_cache // compilation context's queue check duplication of inserted task _impls_cache->set_remove_item_callback([this](ImplementationsCache::ItemType& item) { @@ -546,26 +541,13 @@ void program::pre_optimize_graph(bool is_internal) { reorder_factory rf; if (optimize_data) { - GPU_DEBUG_GET_INSTANCE(debug_config); -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(!debug_config->disable_primitive_fusing) { -#else - { -#endif - apply_opt_pass(); - } + apply_opt_pass(); apply_opt_pass(); apply_opt_pass(); -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(!debug_config->disable_primitive_fusing) { -#else - { -#endif - apply_opt_pass(); - } + apply_opt_pass(); apply_opt_pass(); diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 5161887b79e57a..d0efb5a3f9d690 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -1821,8 +1821,7 @@ void program_node::create_onednn_primitive_attributes( // Trying to combine multiplications and additions which are placed one after another. // We do it in the cycle because some optimization cases can be simplified again from time to time do { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_onednn_opt_post_ops) + GPU_DEBUG_IF(get_config().get_disable_onednn_post_ops_opt()) break; optimized_post_ops = try_optimize_post_ops(fused_ops, optimized_post_ops, attrs, optimization_is_finished); } while (!optimization_is_finished); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 0774c62add1643..6cb5e47603c297 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -66,27 +66,6 @@ static bool is_weight_dyn_quantizable(const fully_connected_params& params) { // DYNAMIC_QUANTIZE static size_t get_dynamic_quantize_group_size(const fully_connected_params& params) { auto dynamic_quantization_group_size = params.dynamic_quantization_group_size; - - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { - dynamic_quantization_group_size = debug_config->dynamic_quantize_group_size; - - // Specify which Fully-connected layer would be dynamic-quantized - GPU_DEBUG_IF(!debug_config->dynamic_quantize_layers_without_onednn.empty()) { - auto layers = debug_config->dynamic_quantize_layers_without_onednn; - auto iter = std::find_if(layers.begin(), layers.end(), [&](const std::string& pattern){ - return debug_config->is_layer_name_matched(params.layerID, pattern); - }); - - if (iter != layers.end()) { - dynamic_quantization_group_size = debug_config->dynamic_quantize_group_size; - GPU_DEBUG_COUT << "Found specified Fully-connected layer [" << params.layerID << "]. Enable Dynamic-quantize." << std::endl; - } else { - dynamic_quantization_group_size = 0; - } - } - } - const size_t scale_group_size = params.weights.IFM().v / params.decompression_scale.Feature().v; for (auto group_size : available_quantize_grp_size) { if (dynamic_quantization_group_size >= group_size) { diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 1d0c4f86eb8b48..8f32857f4cf9da 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -69,7 +69,7 @@ Graph::Graph(std::shared_ptr model, const RemoteContextImpl::Ptr& con : m_context(context) , m_config(config) , m_stream_id(stream_id) { - auto program_builder = std::make_shared(model, get_engine(), config, false); + auto program_builder = std::make_shared(model, get_engine(), config); m_config = program_builder->get_config(); build(program_builder->get_compiled_program()); @@ -213,12 +213,12 @@ void Graph::build(std::shared_ptr program) { exit(0); } - GPU_DEBUG_IF(!debug_config->dump_graphs.empty() && m_stream_id == 0) { + GPU_DEBUG_IF(!m_config.get_dump_graphs_path().empty() && m_stream_id == 0) { static int net_id = 0; auto steps_info = get_network()->get_optimizer_passes_info(); size_t step_idx = 0; for (auto& step : steps_info) { - auto xml_path = debug_config->dump_graphs + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml"; + auto xml_path = m_config.get_dump_graphs_path() + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml"; ov::pass::Serialize(xml_path, "").run_on_model(get_runtime_model(step.second, true)); step_idx++; } diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index 617b92cb0d7ebe..03489d6513e1ee 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -27,7 +27,7 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer())); config.finalize(p.get_engine()); - ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true); + ProgramBuilder prog(internal_body, p.get_engine(), config, p.get_task_executor(), p.get_compilation_context(), true); branch.inner_program = prog.get_compiled_program(); auto& input_map = branch.input_map; diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index c9804f9e75f84f..53c5896b21710b 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -300,11 +300,10 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr& op) { + if (op->is_dynamic()) { + return true; + } + + // HACK: SearchSorted has specific shape requirements. + // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine, + // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid. + if (ov::is_type(op)) + return true; + + if (ov::is_type(op)) + return true; + + if (ov::is_type(op)) { + const auto body_function = std::static_pointer_cast(op)->get_function(); + if (body_function->is_dynamic()) + return true; + } + + if (ov::is_type(op) || ov::is_type(op)) { + return true; + } + // When input node has dynamic shape with 4 dimension, this function return false + // because op.is_dynamic() which only checks input shapes return false. + // So, in the case of input data, we need to check output shape. + for (size_t i = 0; i < op->get_output_size(); i++) { + if (op->get_output_partial_shape(i).is_dynamic()) + return true; + } + + for (size_t i = 0; i < op->get_output_size(); i++) { + if (op->get_output_partial_shape(i).size() > 6) + return true; + } + + for (size_t i = 0; i < op->get_input_size(); i++) { + if (op->get_input_partial_shape(i).size() > 6) + return true; + } + + return false; +} + +void set_model_properties(const ov::Model& model, ExecutionConfig& config) { + const auto& ops = model.get_ordered_ops(); + // In the case of inner program, allow_new_shape_infer flag is setted by outside of program. + // So, do not check allow_new_shape_infer for inner program build + for (const auto& op : ops) { + if (requires_new_shape_infer(op)) { + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + break; + } + } + bool is_dynamic = false; + for (const auto& op : ops) { + if (op->is_dynamic()) { + is_dynamic = true; + break; + } + } + bool has_lstm = false; + for (const auto& op : ops) { + if (ov::is_type(op)) { + has_lstm = true; + break; + } + } + + // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, + // smaller # of kernels are built compared to static models. + // So having smaller batch size is even better for dynamic model as we can do more parallel build. + if (is_dynamic) { + config.set_property(ov::intel_gpu::max_kernels_per_batch(4)); + } else { + config.set_property(ov::intel_gpu::max_kernels_per_batch(8)); + } + + config.set_property(ov::intel_gpu::optimize_data(true)); + + if (has_lstm) + config.set_property(ov::intel_gpu::use_onednn(true)); +} + } // namespace #define FACTORY_DECLARATION(op_version, op_name) \ @@ -114,14 +203,13 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p const ExecutionConfig& config, const std::shared_ptr& context) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::clone_and_transform_model"); - GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_DEFINE_MEM_LOGGER("Plugin::clone_and_transform_model"); auto cloned_model = model->clone(); OPENVINO_ASSERT(cloned_model != nullptr, "[GPU] Failed to clone model!"); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name(); + GPU_DEBUG_IF(!config.get_dump_graphs_path().empty()) { + auto path_base = config.get_dump_graphs_path() + "/" + cloned_model->get_name(); ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } @@ -140,8 +228,8 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p new_res->set_friendly_name(old_res->get_friendly_name()); } - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name() + "_" + "transformed_func"; + GPU_DEBUG_IF(!config.get_dump_graphs_path().empty()) { + auto path_base = config.get_dump_graphs_path() + "/" + cloned_model->get_name() + "_" + "transformed_func"; ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } return cloned_model; @@ -189,7 +277,8 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_property(orig_config, OptionVisibility::RELEASE); + config.set_user_property(orig_config, OptionVisibility::RELEASE); + set_model_properties(*model, config); config.finalize(context, get_rt_info(*model)); auto transformed_model = clone_and_transform_model(model, config, context); @@ -208,6 +297,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] LoadExeNetworkImpl: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); + set_model_properties(*model, config); config.finalize(context_impl, get_rt_info(*model)); auto transformed_model = clone_and_transform_model(model, config, context_impl); @@ -238,7 +328,7 @@ ov::SoPtr Plugin::get_default_context(const AnyMap& params) void Plugin::set_property(const ov::AnyMap &config) { auto update_config = [](ExecutionConfig& config, const ov::AnyMap& user_config) { - config.set_property(user_config, OptionVisibility::RELEASE); + config.set_user_property(user_config, OptionVisibility::RELEASE); // Check that custom layers config can be loaded if (user_config.find(ov::intel_gpu::config_file.name()) != user_config.end()) { CustomLayerMap custom_layers; @@ -273,7 +363,8 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& auto ctx = get_default_context(device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_property(orig_config, OptionVisibility::RELEASE); + config.set_user_property(orig_config, OptionVisibility::RELEASE); + set_model_properties(*model, config); config.finalize(ctx, get_rt_info(*model)); ProgramBuilder prog(ctx->get_engine(), config); @@ -328,7 +419,7 @@ std::shared_ptr Plugin::import_model(std::istream& model, } ExecutionConfig config = m_configs_map.at(device_id); - config.set_property(_orig_config, OptionVisibility::RELEASE); + config.set_user_property(_orig_config, OptionVisibility::RELEASE); config.finalize(context_impl, {}); ov::CacheMode cache_mode = config.get_cache_mode(); @@ -648,7 +739,9 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { auto device_id = get_property(ov::device::id.name(), options).as(); auto context = get_default_contexts().at(device_id); const auto& device_info = context->get_engine().get_device_info(); - const auto& config = m_configs_map.at(device_id); + auto config = m_configs_map.at(device_id); + config.set_property(ov::intel_gpu::partial_build_program(true)); + config.finalize(context, {}); uint32_t n_streams = static_cast(config.get_num_streams()); uint64_t occupied_device_mem = 0; auto statistic_result = get_metric(ov::intel_gpu::memory_statistics.name(), options).as>(); @@ -766,7 +859,7 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { TransformationsPipeline transformations(config, context); transformations.apply(cloned_model); - program = std::make_shared(cloned_model, engine, config, true); + program = std::make_shared(cloned_model, engine, config); std::pair device_memory_usage = program->get_compiled_program()->get_estimated_device_mem_usage(); if (device_memory_usage.first == static_cast(-1L) && device_memory_usage.second == static_cast(-1L)) { return static_cast(max_batch_size); diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 5f92cd8c306cb4..449bd3d7d4fe0c 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -12,7 +12,7 @@ #include "openvino/op/loop.hpp" #include "openvino/op/search_sorted.hpp" #include "openvino/runtime/properties.hpp" -#include "ov_ops/dynamic_quantize.hpp" + #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/plugin/program_builder.hpp" @@ -63,7 +63,6 @@ std::string layer_type_name_ID(const std::shared_ptr& op) { } ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, - bool partial_build, std::shared_ptr task_executor, std::shared_ptr compilation_context, bool is_inner_program) @@ -111,7 +110,7 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& CustomLayer::LoadFromFile(custom_layers_config, m_custom_layers, custom_layers_config.empty()); auto ops = model->get_ordered_ops(); - m_program = build(ops, partial_build, is_inner_program); + m_program = build(ops, is_inner_program); } ProgramBuilder::ProgramBuilder(cldnn::engine& engine, const ExecutionConfig& config) @@ -141,43 +140,8 @@ void ProgramBuilder::cleanup_build() { #endif } -std::shared_ptr ProgramBuilder::build(const std::vector>& ops, bool partial_build, bool is_inner_program) { +std::shared_ptr ProgramBuilder::build(const std::vector>& ops, bool is_inner_program) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "ProgramBuilder::build"); - // In the case of inner program, allow_new_shape_infer flag is setted by outside of program. - // So, do not check allow_new_shape_infer for inner program build - for (const auto& op : ops) { - if (requires_new_shape_infer(op)) { - allow_new_shape_infer = true; - break; - } - } - bool is_dynamic = false; - for (const auto& op : ops) { - if (op->is_dynamic()) { - is_dynamic = true; - break; - } - } - - if (is_inner_program) { - allow_new_shape_infer = (m_config.get_allow_new_shape_infer() || allow_new_shape_infer); - } - - // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, - // smaller # of kernels are built compared to static models. - // So having smaller batch size is even better for dynamic model as we can do more parallel build. - if (is_dynamic) { - m_config.set_property(ov::intel_gpu::max_kernels_per_batch(4));; - } else { - m_config.set_property(ov::intel_gpu::max_kernels_per_batch(8));; - } - - m_config.set_property(ov::intel_gpu::partial_build_program(partial_build)); - m_config.set_property(ov::intel_gpu::optimize_data(true)); - m_config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer)); - //if (has_lstm) - m_config.set_property(ov::intel_gpu::use_onednn(true)); - m_config.finalize(m_engine); prepare_build(); { @@ -223,7 +187,6 @@ bool ProgramBuilder::is_op_supported(const std::shared_ptr& op) { if (!data_types_are_supported(op.get())) return false; - allow_new_shape_infer = requires_new_shape_infer(op); CreateSingleLayerPrimitive(op); cleanup_build(); DisableQueryMode(); @@ -280,7 +243,7 @@ std::vector ProgramBuilder::GetInputInfo(const std::shared_pt // Note: Currently Split/Variadic Split are divided to multiple crops // LSTMCell contains its own body network, and each output has a unique pid // But there is no need to maintain output port index for the next node e.g. Result - bool is_legacy_multiple_outputs = !allow_new_shape_infer + bool is_legacy_multiple_outputs = !use_new_shape_infer() || ov::is_type(prevOp) || ov::is_type(prevOp) || ov::is_type(prevOp); @@ -364,50 +327,6 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptradd_primitive(prim); } -bool ProgramBuilder::requires_new_shape_infer(const std::shared_ptr& op) const { - if (op->is_dynamic()) { - return true; - } - - // HACK: SearchSorted has specific shape requirements. - // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine, - // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid. - if (ov::is_type(op)) - return true; - - if (ov::is_type(op)) - return true; - - if (ov::is_type(op)) { - const auto body_function = std::static_pointer_cast(op)->get_function(); - if (body_function->is_dynamic()) - return true; - } - - if (ov::is_type(op) || ov::is_type(op)) { - return true; - } - // When input node has dynamic shape with 4 dimension, this function return false - // because op.is_dynamic() which only checks input shapes return false. - // So, in the case of input data, we need to check output shape. - for (size_t i = 0; i < op->get_output_size(); i++) { - if (op->get_output_partial_shape(i).is_dynamic()) - return true; - } - - for (size_t i = 0; i < op->get_output_size(); i++) { - if (op->get_output_partial_shape(i).size() > 6) - return true; - } - - for (size_t i = 0; i < op->get_input_size(); i++) { - if (op->get_input_partial_shape(i).size() > 6) - return true; - } - - return false; -} - int64_t ProgramBuilder::get_parameter_index(const std::shared_ptr& parameter) const { return m_model->get_parameter_index(parameter); } diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 69e4a041e77f65..46fe4acc2a9955 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -310,15 +310,16 @@ void SyncInferRequest::enqueue() { m_internal_outputs = network->execute(dependencies); auto network_enqueue_end = std::chrono::high_resolution_clock::now(); + const auto& config = network->get_config(); + // If dump layers path is set, only runs first inference. - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0 && debug_config->dump_iteration.empty()) { + GPU_DEBUG_IF(!config.get_dump_tensors_path().empty() && config.get_dump_iterations().empty()) { GPU_DEBUG_INFO << "Only run first inference to dump layers." << std::endl; exit(0); } auto enqueue_end = std::chrono::high_resolution_clock::now(); - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->host_time_profiling) { + GPU_DEBUG_IF(config.get_host_time_profiling()) { network_enqueue_time = std::chrono::duration_cast(network_enqueue_end - network_enqueue_start).count(); const uint64_t total_time = std::chrono::duration_cast(enqueue_end - enqueue_start).count(); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp index 7b365ab7164ba7..12f8198750c4e4 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp @@ -17,7 +17,7 @@ namespace ov { namespace intel_gpu { -DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size) +DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size, bool asymmetric) : ov::pass::MatcherPass() { GPU_DEBUG_GET_INSTANCE(debug_config); using namespace ov::pass::pattern; @@ -56,7 +56,7 @@ DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size config.scale_dt = element::f16; config.group_sizes = shape_group_size; - GPU_DEBUG_IF(debug_config->dynamic_quantize_asym) { + if (asymmetric) { config.quantization_type = QuantizationType::Asymmetric; config.quantization_dt = element::u8; config.zp_dt = element::u8; // it supports u8 only now diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp index b5d956f7872b5c..f56859d506a4e4 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp @@ -12,7 +12,7 @@ namespace intel_gpu { class DynamicQuantizeFullyConnected: public ov::pass::MatcherPass { public: OPENVINO_RTTI("DynamicQuantizeFullyConnected", "0"); - DynamicQuantizeFullyConnected(uint64_t group_size); + DynamicQuantizeFullyConnected(uint64_t group_size, bool asymmetric = false); }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 67f87fabd6a5dc..61f8b838de363d 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -917,18 +917,14 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - bool disable_horizontal_fc_fusion = false; - bool disable_fc_swiglu_fusion = false; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_horizontal_fc_fusion == 1) - disable_horizontal_fc_fusion = true; - GPU_DEBUG_IF(debug_config->disable_fc_swiglu_fusion == 1) - disable_fc_swiglu_fusion = true; + bool disable_horizontal_fc_fusion = config.get_disable_horizontal_fc_fusion(); + bool disable_fc_swiglu_fusion = config.get_disable_fc_swiglu_fusion(); + // mlp fusion is only supported for cldnn on high performant GPUis bool fuse_mlp_swiglu = !device_info.supports_immad && device_info.execution_units_count >= 128 && !disable_fc_swiglu_fusion; - if (!disable_horizontal_fc_fusion) + if (!config.get_disable_horizontal_fc_fusion()) manager.register_pass(fuse_mlp_swiglu); // ZP should not be folded for FC. But still, ZP should be folded for Gather. @@ -981,6 +977,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); if (device_info.supports_immad) { + bool asymmetric_dyn_quant = config.get_asym_dynamic_quantization(); auto dynamic_quantization_group_size = config.get_dynamic_quantization_group_size(); pass_config->set_callback([=](const_node_ptr& root) -> bool { if (root->get_input_node_shared_ptr(0)->get_element_type() == ov::element::Type_t::f32) { @@ -996,7 +993,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } // AZP does not support 8bit weight - GPU_DEBUG_IF(debug_config->dynamic_quantize_asym + GPU_DEBUG_IF(asymmetric_dyn_quant && (root->get_input_element_type(1) == ov::element::i8 || root->get_input_element_type(1) == ov::element::u8)) { GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: asym quantization does not support 8bit weight" << std::endl; return true; @@ -1012,7 +1009,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } return false; }); - manager.register_pass(dynamic_quantization_group_size); + manager.register_pass(dynamic_quantization_group_size, asymmetric_dyn_quant); } // Remove Pad in front of MaxPool if both the pads_begin and pads_end are zero. diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index b4921fb8e16fb3..4876af965991a8 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -69,7 +69,7 @@ void ExecutionConfig::finalize_impl(std::shared_ptr context) { if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { m_enable_lp_transformations = info.supports_imad || info.supports_immad; } - if (info.supports_immad) { + if (!is_set_by_user(ov::intel_gpu::use_onednn) && info.supports_immad) { m_use_onednn = true; } if (get_use_onednn()) { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp index 74dbc016c65d31..4826e502aeaa97 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp @@ -295,9 +295,6 @@ device_info init_device_info(const cl::Device& device, const cl::Context& contex GPU_DEBUG_INFO << "GPU version: " << static_cast(info.gfx_ver.major) << "." << static_cast(info.gfx_ver.minor) << "." << static_cast(info.gfx_ver.revision) << (info.has_separate_cache ? " with separate cache" : "") << std::endl; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_onednn) - info.supports_immad = false; } else if (nv_device_attr_supported) { info.gfx_ver = {static_cast(device.getInfo()), static_cast(device.getInfo()), From 53ac5aac1518d20052da6f46ae3e58da15bc439e Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 24 Dec 2024 15:13:09 +0400 Subject: [PATCH 16/18] extended bool any parsing options Signed-off-by: Vladimir Paramuzov --- src/core/src/any.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp index 346819eced93e5..36e6a74880e115 100644 --- a/src/core/src/any.cpp +++ b/src/core/src/any.cpp @@ -6,6 +6,7 @@ #include #include +#include "openvino/util/common_util.hpp" namespace { template bool contains_type_index(Container&& types, const std::type_info& user_type) { @@ -200,9 +201,14 @@ namespace util { void Read::operator()(std::istream& is, bool& value) const { std::string str; is >> str; - if (str == "YES") { + + std::set off = {"0", "false", "off", "no"}; + std::set on = {"1", "true", "on", "yes"}; + str = util::to_lower(str); + + if (on.count(str)) { value = true; - } else if (str == "NO") { + } else if (off.count(str)) { value = false; } else { OPENVINO_THROW("Could not convert to bool from string " + str); From aaf1bb8f52b60d65d640ba2430b628a87a3e0ea5 Mon Sep 17 00:00:00 2001 From: dmitrygo Date: Tue, 24 Dec 2024 10:40:47 +0400 Subject: [PATCH 17/18] [CPU] New plugin config impl --- src/inference/CMakeLists.txt | 8 +- .../openvino/runtime/plugin_config.hpp | 16 +- .../openvino/runtime/intel_cpu/properties.hpp | 3 +- .../include/openvino/runtime/properties.hpp | 2 +- src/inference/src/dev/plugin_config.cpp | 9 +- src/plugins/intel_cpu/CMakeLists.txt | 2 +- src/plugins/intel_cpu/src/compiled_model.cpp | 95 +- src/plugins/intel_cpu/src/compiled_model.h | 2 + src/plugins/intel_cpu/src/config.cpp | 866 +++++++++--------- src/plugins/intel_cpu/src/config.h | 221 ++--- src/plugins/intel_cpu/src/config_new.cpp | 659 +++++++++++++ src/plugins/intel_cpu/src/config_new.hpp | 72 ++ .../intel_cpu/src/cpu_streams_calculation.cpp | 41 +- src/plugins/intel_cpu/src/graph.cpp | 14 +- src/plugins/intel_cpu/src/graph_context.cpp | 2 +- src/plugins/intel_cpu/src/graph_dumper.cpp | 10 +- src/plugins/intel_cpu/src/graph_dumper.h | 4 +- .../intel_cpu/src/internal_properties.hpp | 22 + .../intel_cpu/src/nodes/fullyconnected.cpp | 4 +- src/plugins/intel_cpu/src/nodes/llm_mlp.cpp | 2 +- src/plugins/intel_cpu/src/nodes/memory.cpp | 1 + src/plugins/intel_cpu/src/nodes/qkv_proj.cpp | 2 +- .../intel_cpu/src/nodes/scaled_attn.cpp | 2 +- src/plugins/intel_cpu/src/nodes/subgraph.cpp | 8 +- src/plugins/intel_cpu/src/options.inl | 76 ++ src/plugins/intel_cpu/src/plugin.cpp | 266 +++--- src/plugins/intel_cpu/src/plugin.h | 12 +- src/plugins/intel_cpu/src/remote_context.cpp | 28 + src/plugins/intel_cpu/src/remote_context.hpp | 46 + .../convert_to_cpu_specific_opset.hpp | 2 +- .../transformation_pipeline.cpp | 52 +- .../src/utils/ngraph_transformation.hpp | 38 +- .../intel_cpu/src/utils/node_dumper.cpp | 50 +- src/plugins/intel_cpu/src/utils/node_dumper.h | 8 +- .../intel_cpu/tests/functional/CMakeLists.txt | 2 +- .../functional/cmake/target_per_test.cmake | 1 + .../ov_executable_network/properties.cpp | 13 - 37 files changed, 1770 insertions(+), 891 deletions(-) create mode 100644 src/plugins/intel_cpu/src/config_new.cpp create mode 100644 src/plugins/intel_cpu/src/config_new.hpp create mode 100644 src/plugins/intel_cpu/src/options.inl create mode 100644 src/plugins/intel_cpu/src/remote_context.cpp create mode 100644 src/plugins/intel_cpu/src/remote_context.hpp diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt index 1358a843a1863e..485acb6c31c955 100644 --- a/src/inference/CMakeLists.txt +++ b/src/inference/CMakeLists.txt @@ -83,6 +83,7 @@ target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE target_include_directories(${TARGET_NAME}_obj PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src" $ + $ $<$:$> # for ov_plugins.hpp $,$>,${CMAKE_CURRENT_BINARY_DIR}/$,${CMAKE_CURRENT_BINARY_DIR}>) @@ -101,7 +102,7 @@ endif() # Create library file from object library add_library(${TARGET_NAME} INTERFACE) -target_link_libraries(${TARGET_NAME} INTERFACE openvino::runtime) +target_link_libraries(${TARGET_NAME} INTERFACE openvino::runtime openvino::shape_inference) target_include_directories(${TARGET_NAME} INTERFACE $) ov_add_clang_format_target(${TARGET_NAME}_clang FOR_SOURCES ${LIBRARY_SRC} ${LIBRARY_HEADERS} ${PUBLIC_HEADERS}) @@ -122,14 +123,15 @@ if (TBBBIND_2_5_FOUND) endif() target_include_directories(${TARGET_NAME}_s PUBLIC - $) + $ + $) if(WIN32) set_target_properties(${TARGET_NAME}_s PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_s) endif() target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} - openvino::runtime::dev openvino::pugixml) + openvino::runtime::dev openvino::pugixml openvino::shape_inference) target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 08dd148b6d1c36..f8125083a2193b 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -10,6 +10,7 @@ #include "openvino/runtime/iremote_context.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/core/except.hpp" +#include #ifndef COUNT_N #define COUNT_N(_1, _2, _3, _4, _5, N, ...) N @@ -133,8 +134,10 @@ struct ConfigOption : public ConfigOptionBase { constexpr static const auto visibility = visibility_; void set_any(const ov::Any any) override { - if (validator) + if (validator) { + // TODO: is very any way to print option name here? OPENVINO_ASSERT(validator(any.as()), "Invalid value: ", any.as()); + } value = any.as(); } @@ -220,18 +223,19 @@ class OPENVINO_RUNTIME_API PluginConfig { bool visit_attributes(ov::AttributeVisitor& visitor); -protected: - virtual void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) {} - virtual void apply_debug_options(std::shared_ptr context); - virtual void finalize_impl(std::shared_ptr context) {} - template bool is_set_by_user(const ov::Property& property) const { return m_user_properties.find(property.name()) != m_user_properties.end(); } +protected: + virtual void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) {} + virtual void apply_debug_options(std::shared_ptr context); + virtual void finalize_impl(std::shared_ptr context) {} + ConfigOptionBase* get_option_ptr(const std::string& name) const { auto it = m_options_map.find(name); + // TODO: print more meaningful error message OPENVINO_ASSERT(it != m_options_map.end(), "Option not found: ", name); OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); diff --git a/src/inference/include/openvino/runtime/intel_cpu/properties.hpp b/src/inference/include/openvino/runtime/intel_cpu/properties.hpp index 9d63a0e078bdef..ac0365c3409190 100644 --- a/src/inference/include/openvino/runtime/intel_cpu/properties.hpp +++ b/src/inference/include/openvino/runtime/intel_cpu/properties.hpp @@ -12,6 +12,7 @@ #pragma once #include "openvino/runtime/properties.hpp" +#include "ov_optional.hpp" namespace ov { @@ -45,7 +46,7 @@ namespace intel_cpu { * ie.set_property(ov::denormals_optimization(false)); // disable denormals optimization * @endcode */ -static constexpr Property denormals_optimization{"CPU_DENORMALS_OPTIMIZATION"}; +static constexpr Property> denormals_optimization{"CPU_DENORMALS_OPTIMIZATION"}; /** * @brief This property defines threshold for sparse weights decompression feature activation diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 8baea3ed408656..cb7afc22774df3 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -1209,7 +1209,7 @@ namespace streams { * @ingroup ov_runtime_cpp_prop_api */ struct Num { - using Base = std::tuple; //!< NumStreams is representable as int32_t + // using Base = std::tuple; //!< NumStreams is representable as int32_t constexpr Num() : num{-1} {}; diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index b756894d5e414f..4ef8fe68c8c93f 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -78,7 +78,14 @@ void PluginConfig::set_user_property(const ov::AnyMap& config, OptionVisibility for (auto& kv : config) { auto& name = kv.first; - auto& val = kv.second; + auto val = kv.second; + + // [WA] ov::Any cannot be casted from int to streams::Num + // Can be reproduced with CpuExecNetworkCheckModelStreamsHasHigherPriorityThanThroughputHint test + // Should be fixed before the merge + if (name == ov::num_streams.name()) { + val = val.as(); + } auto option = get_option_ptr(name); if ((allowed_visibility & option->get_visibility()) != option->get_visibility()) { diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt index aa6ce49a051e00..0ecd4435507da4 100644 --- a/src/plugins/intel_cpu/CMakeLists.txt +++ b/src/plugins/intel_cpu/CMakeLists.txt @@ -373,11 +373,11 @@ if(BUILD_SHARED_LIBS) PRIVATE $ $ - $ $ $ PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src + $ $) target_include_directories(${TARGET_NAME}_obj SYSTEM PUBLIC $) diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index f81c7dbbced99d..ab7091d2988fd8 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -53,14 +53,15 @@ CompiledModel::CompiledModel(const std::shared_ptr& model, m_cfg{cfg}, m_name{model->get_name()}, m_loaded_from_cache(loaded_from_cache), - m_sub_memory_manager(sub_memory_manager) { + m_sub_memory_manager(sub_memory_manager), + m_model_name(model->get_friendly_name()) { m_mutex = std::make_shared(); const auto& core = m_plugin->get_core(); if (!core) OPENVINO_THROW("Unable to get API version. Core is unavailable"); IStreamsExecutor::Config executor_confg; - if (cfg.exclusiveAsyncRequests) { + if (cfg.get_exclusive_async_requests()) { // special case when all InferRequests are muxed into a single queue m_task_executor = m_plugin->get_executor_manager()->get_executor("CPU"); } else { @@ -156,7 +157,7 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const { GraphContext::Ptr ctx; { std::lock_guard lock{*m_mutex.get()}; - auto isQuantizedFlag = (m_cfg.lpTransformsMode == Config::On) && + auto isQuantizedFlag = (m_cfg.get_lp_transforms_mode()) && ov::pass::low_precision::LowPrecision::isFunctionQuantized(m_model); ctx = std::make_shared(m_cfg, @@ -219,16 +220,16 @@ ov::Any CompiledModel::get_property(const std::string& name) const { return m_loaded_from_cache; } - Config engConfig = get_graph()._graph.getConfig(); - auto option = engConfig._config.find(name); - if (option != engConfig._config.end()) { - return option->second; - } + // Config engConfig = get_graph()._graph.getConfig(); + // auto option = engConfig._config.find(name); + // if (option != engConfig._config.end()) { + // return option->second; + // } - // @todo Can't we just use local copy (_cfg) instead? - auto graphLock = get_graph(); - const auto& graph = graphLock._graph; - const auto& config = graph.getConfig(); + // // @todo Can't we just use local copy (_cfg) instead? + // auto graphLock = get_graph(); + // const auto& graph = graphLock._graph; + // const auto& config = graph.getConfig(); auto RO_property = [](const std::string& propertyName) { return ov::PropertyName(propertyName, ov::PropertyMutability::RO); @@ -266,78 +267,22 @@ ov::Any CompiledModel::get_property(const std::string& name) const { } if (name == ov::model_name) { - // @todo Does not seem ok to 'dump()' the whole graph everytime in order to get a name - const std::string modelName = graph.dump()->get_friendly_name(); - return decltype(ov::model_name)::value_type(modelName); + return decltype(ov::model_name)::value_type {m_model_name}; + } else if (name == ov::loaded_from_cache) { + return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache}; } else if (name == ov::optimal_number_of_infer_requests) { - const auto streams = config.streamExecutorConfig.get_streams(); + const auto streams = m_cfg.streamExecutorConfig.get_streams(); return decltype(ov::optimal_number_of_infer_requests)::value_type( streams > 0 ? streams : 1); // ov::optimal_number_of_infer_requests has no negative values - } else if (name == ov::num_streams) { - const auto streams = config.streamExecutorConfig.get_streams(); - return decltype(ov::num_streams)::value_type( - streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2) - OPENVINO_SUPPRESS_DEPRECATED_START - } else if (name == ov::affinity) { - const auto affinity = config.threadBindingType; - switch (affinity) { - case IStreamsExecutor::ThreadBindingType::NONE: - return ov::Affinity::NONE; - case IStreamsExecutor::ThreadBindingType::CORES: - return ov::Affinity::CORE; - case IStreamsExecutor::ThreadBindingType::NUMA: - return ov::Affinity::NUMA; - case IStreamsExecutor::ThreadBindingType::HYBRID_AWARE: - return ov::Affinity::HYBRID_AWARE; - } - return ov::Affinity::NONE; - OPENVINO_SUPPRESS_DEPRECATED_END - } else if (name == ov::inference_num_threads) { - const auto num_threads = config.streamExecutorConfig.get_threads(); - return decltype(ov::inference_num_threads)::value_type(num_threads); - } else if (name == ov::enable_profiling.name()) { - const bool perfCount = config.collectPerfCounters; - return decltype(ov::enable_profiling)::value_type(perfCount); - } else if (name == ov::hint::inference_precision) { - return decltype(ov::hint::inference_precision)::value_type(config.inferencePrecision); - } else if (name == ov::hint::performance_mode) { - return decltype(ov::hint::performance_mode)::value_type(config.hintPerfMode); - } else if (name == ov::log::level) { - return decltype(ov::log::level)::value_type(config.logLevel); - } else if (name == ov::hint::enable_cpu_pinning.name()) { - const bool use_pin = config.enableCpuPinning; - return decltype(ov::hint::enable_cpu_pinning)::value_type(use_pin); - } else if (name == ov::hint::scheduling_core_type) { - const auto stream_mode = config.schedulingCoreType; - return stream_mode; - } else if (name == ov::hint::model_distribution_policy) { - const auto& distribution_policy = config.modelDistributionPolicy; - return distribution_policy; - } else if (name == ov::hint::enable_hyper_threading.name()) { - const bool use_ht = config.enableHyperThreading; - return decltype(ov::hint::enable_hyper_threading)::value_type(use_ht); - } else if (name == ov::hint::execution_mode) { - return config.executionMode; - } else if (name == ov::hint::num_requests) { - return decltype(ov::hint::num_requests)::value_type(config.hintNumRequests); } else if (name == ov::execution_devices) { return decltype(ov::execution_devices)::value_type{m_plugin->get_device_name()}; - } else if (name == ov::intel_cpu::denormals_optimization) { - return decltype(ov::intel_cpu::denormals_optimization)::value_type(config.denormalsOptMode == - Config::DenormalsOptMode::DO_On); - } else if (name == ov::intel_cpu::sparse_weights_decompression_rate) { - return decltype(ov::intel_cpu::sparse_weights_decompression_rate)::value_type( - config.fcSparseWeiDecompressionRate); - } else if (name == ov::hint::dynamic_quantization_group_size) { - return decltype(ov::hint::dynamic_quantization_group_size)::value_type(config.fcDynamicQuantizationGroupSize); - } else if (name == ov::hint::kv_cache_precision) { - return decltype(ov::hint::kv_cache_precision)::value_type(config.kvCachePrecision); } - OPENVINO_THROW("Unsupported property: ", name); + + return m_cfg.get_property(name); } void CompiledModel::export_model(std::ostream& modelStream) const { - ModelSerializer serializer(modelStream, m_cfg.cacheEncrypt); + ModelSerializer serializer(modelStream, m_cfg.get_cache_encryption_callbacks().encrypt); serializer << m_model; } diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h index f7d2903b0526cf..e5251853fd95cb 100644 --- a/src/plugins/intel_cpu/src/compiled_model.h +++ b/src/plugins/intel_cpu/src/compiled_model.h @@ -98,6 +98,8 @@ class CompiledModel : public ov::ICompiledModel { std::vector> m_sub_compiled_models; std::shared_ptr m_sub_memory_manager = nullptr; bool m_has_sub_compiled_models = false; + + std::string m_model_name; }; // This class provides safe access to the internal CompiledModel structures and helps to decouple SyncInferRequest and diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 7d1ee05897e81d..b2d6658bc598b1 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -21,456 +21,456 @@ namespace ov { namespace intel_cpu { -using namespace ov::threading; -using namespace dnnl::impl::cpu::x64; +// using namespace ov::threading; +// using namespace dnnl::impl::cpu::x64; -Config::Config() { - // this is default mode -#if defined(__APPLE__) || defined(_WIN32) - threadBindingType = IStreamsExecutor::NONE; -#else - threadBindingType = IStreamsExecutor::CORES; -#endif +// Config::Config() { +// // this is default mode +// #if defined(__APPLE__) || defined(_WIN32) +// threadBindingType = IStreamsExecutor::NONE; +// #else +// threadBindingType = IStreamsExecutor::CORES; +// #endif -// for the TBB code-path, additional configuration depending on the OS and CPU types -#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO) -# if defined(__APPLE__) || defined(_WIN32) - // 'CORES' is not implemented for Win/MacOS; so the 'NONE' or 'NUMA' is default - auto numaNodes = get_available_numa_nodes(); - if (numaNodes.size() > 1) { - threadBindingType = IStreamsExecutor::NUMA; - } else { - threadBindingType = IStreamsExecutor::NONE; - } -# endif +// // for the TBB code-path, additional configuration depending on the OS and CPU types +// #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO) +// # if defined(__APPLE__) || defined(_WIN32) +// // 'CORES' is not implemented for Win/MacOS; so the 'NONE' or 'NUMA' is default +// auto numaNodes = get_available_numa_nodes(); +// if (numaNodes.size() > 1) { +// threadBindingType = IStreamsExecutor::NUMA; +// } else { +// threadBindingType = IStreamsExecutor::NONE; +// } +// # endif - if (get_available_cores_types().size() > 1 /*Hybrid CPU*/) { - threadBindingType = IStreamsExecutor::HYBRID_AWARE; - } -#endif - CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties()); +// if (get_available_cores_types().size() > 1 /*Hybrid CPU*/) { +// threadBindingType = IStreamsExecutor::HYBRID_AWARE; +// } +// #endif +// CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties()); - updateProperties(); -} +// updateProperties(); +// } -#ifdef CPU_DEBUG_CAPS -/** - * Debug capabilities configuration has more priority than common one - * Some of the debug capabilities also require to enable some of common - * configuration properties - */ -void Config::applyDebugCapsProperties() { - // always enable perf counters for verbose, performance summary and average counters - if (!debugCaps.verbose.empty() || !debugCaps.summaryPerf.empty() || !debugCaps.averageCountersPath.empty()) { - collectPerfCounters = true; - } -} -#endif +// #ifdef CPU_DEBUG_CAPS +// /** +// * Debug capabilities configuration has more priority than common one +// * Some of the debug capabilities also require to enable some of common +// * configuration properties +// */ +// void Config::applyDebugCapsProperties() { +// // always enable perf counters for verbose, performance summary and average counters +// if (!debugCaps.verbose.empty() || !debugCaps.summaryPerf.empty() || !debugCaps.averageCountersPath.empty()) { +// collectPerfCounters = true; +// } +// } +// #endif -void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { - const auto streamExecutorConfigKeys = - streamExecutorConfig.get_property(ov::supported_properties.name()).as>(); - for (const auto& kvp : prop) { - const auto& key = kvp.first; - const auto& val = kvp.second; - if (streamExecutorConfigKeys.end() != - std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) { - streamExecutorConfig.set_property(key, val.as()); - streams = streamExecutorConfig.get_streams(); - threads = streamExecutorConfig.get_threads(); - threadsPerStream = streamExecutorConfig.get_threads_per_stream(); - if (key == ov::num_streams.name()) { - ov::Any value = val.as(); - auto streams_value = value.as(); - if (streams_value == ov::streams::NUMA) { - modelDistributionPolicy = {}; - hintPerfMode = ov::hint::PerformanceMode::LATENCY; - changedHintPerfMode = true; - } else if (streams_value == ov::streams::AUTO) { - hintPerfMode = ov::hint::PerformanceMode::THROUGHPUT; - changedHintPerfMode = true; - } else { - streamsChanged = true; - } - } - OPENVINO_SUPPRESS_DEPRECATED_START - } else if (key == ov::affinity.name()) { - try { - changedCpuPinning = true; - ov::Affinity affinity = val.as(); -#if defined(__APPLE__) - enableCpuPinning = false; - threadBindingType = affinity == ov::Affinity::NONE ? IStreamsExecutor::ThreadBindingType::NONE - : IStreamsExecutor::ThreadBindingType::NUMA; -#else - enableCpuPinning = - (affinity == ov::Affinity::CORE || affinity == ov::Affinity::HYBRID_AWARE) ? true : false; - switch (affinity) { - case ov::Affinity::NONE: - threadBindingType = IStreamsExecutor::ThreadBindingType::NONE; - break; - case ov::Affinity::CORE: { - threadBindingType = IStreamsExecutor::ThreadBindingType::CORES; - } break; - case ov::Affinity::NUMA: - threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA; - break; - case ov::Affinity::HYBRID_AWARE: - threadBindingType = IStreamsExecutor::ThreadBindingType::HYBRID_AWARE; - break; - default: - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - key, - ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE."); - } -#endif - } catch (const ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - key, - ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE."); - } - OPENVINO_SUPPRESS_DEPRECATED_END - } else if (key == ov::hint::performance_mode.name()) { - try { - hintPerfMode = !changedHintPerfMode ? val.as() : hintPerfMode; - } catch (const ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - key, - ". Expected only ov::hint::PerformanceMode::LATENCY/THROUGHPUT/CUMULATIVE_THROUGHPUT."); - } - } else if (key == ov::log::level.name()) { - try { - logLevel = val.as(); - } catch (const ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - key, - ". Expected only ov::log::Level::NO/ERR/WARNING/INFO/DEBUG/TRACE."); - } - } else if (key == ov::hint::num_requests.name()) { - try { - ov::Any value = val.as(); - int val_i = value.as(); - if (val_i < 0) - OPENVINO_THROW("invalid value."); - hintNumRequests = static_cast(val_i); - } catch (const ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - ov::hint::num_requests.name(), - ". Expected only > 0."); - } - } else if (key == ov::hint::enable_cpu_pinning.name()) { - try { - enableCpuPinning = val.as(); - changedCpuPinning = true; - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - ov::hint::enable_cpu_pinning.name(), - ". Expected only true/false."); - } - } else if (key == ov::hint::scheduling_core_type.name()) { - try { - schedulingCoreType = val.as(); - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - ov::hint::scheduling_core_type.name(), - ". Expected only ov::hint::SchedulingCoreType::ANY_CORE/PCORE_ONLY/ECORE_ONLY"); - } - } else if (key == ov::hint::model_distribution_policy.name()) { - auto error_info = [&]() { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - ov::hint::model_distribution_policy.name(), - ". CPU plugin only support {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}"); - }; +// void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { +// const auto streamExecutorConfigKeys = +// streamExecutorConfig.get_property(ov::supported_properties.name()).as>(); +// for (const auto& kvp : prop) { +// const auto& key = kvp.first; +// const auto& val = kvp.second; +// if (streamExecutorConfigKeys.end() != +// std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) { +// streamExecutorConfig.set_property(key, val.as()); +// streams = streamExecutorConfig.get_streams(); +// threads = streamExecutorConfig.get_threads(); +// threadsPerStream = streamExecutorConfig.get_threads_per_stream(); +// if (key == ov::num_streams.name()) { +// ov::Any value = val.as(); +// auto streams_value = value.as(); +// if (streams_value == ov::streams::NUMA) { +// modelDistributionPolicy = {}; +// hintPerfMode = ov::hint::PerformanceMode::LATENCY; +// changedHintPerfMode = true; +// } else if (streams_value == ov::streams::AUTO) { +// hintPerfMode = ov::hint::PerformanceMode::THROUGHPUT; +// changedHintPerfMode = true; +// } else { +// streamsChanged = true; +// } +// } +// OPENVINO_SUPPRESS_DEPRECATED_START +// } else if (key == ov::affinity.name()) { +// try { +// changedCpuPinning = true; +// ov::Affinity affinity = val.as(); +// #if defined(__APPLE__) +// enableCpuPinning = false; +// threadBindingType = affinity == ov::Affinity::NONE ? IStreamsExecutor::ThreadBindingType::NONE +// : IStreamsExecutor::ThreadBindingType::NUMA; +// #else +// enableCpuPinning = +// (affinity == ov::Affinity::CORE || affinity == ov::Affinity::HYBRID_AWARE) ? true : false; +// switch (affinity) { +// case ov::Affinity::NONE: +// threadBindingType = IStreamsExecutor::ThreadBindingType::NONE; +// break; +// case ov::Affinity::CORE: { +// threadBindingType = IStreamsExecutor::ThreadBindingType::CORES; +// } break; +// case ov::Affinity::NUMA: +// threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA; +// break; +// case ov::Affinity::HYBRID_AWARE: +// threadBindingType = IStreamsExecutor::ThreadBindingType::HYBRID_AWARE; +// break; +// default: +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// key, +// ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE."); +// } +// #endif +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// key, +// ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE."); +// } +// OPENVINO_SUPPRESS_DEPRECATED_END +// // } else if (key == ov::hint::performance_mode.name()) { +// // try { +// // hintPerfMode = !changedHintPerfMode ? val.as() : hintPerfMode; +// // } catch (const ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // "for property key ", +// // key, +// // ". Expected only ov::hint::PerformanceMode::LATENCY/THROUGHPUT/CUMULATIVE_THROUGHPUT."); +// // } +// // } else if (key == ov::log::level.name()) { +// // try { +// // logLevel = val.as(); +// // } catch (const ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // key, +// // ". Expected only ov::log::Level::NO/ERR/WARNING/INFO/DEBUG/TRACE."); +// // } +// // } else if (key == ov::hint::num_requests.name()) { +// // try { +// // ov::Any value = val.as(); +// // int val_i = value.as(); +// // if (val_i < 0) +// // OPENVINO_THROW("invalid value."); +// // hintNumRequests = static_cast(val_i); +// // } catch (const ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // "for property key ", +// // ov::hint::num_requests.name(), +// // ". Expected only > 0."); +// // } +// // } else if (key == ov::hint::enable_cpu_pinning.name()) { +// // try { +// // enableCpuPinning = val.as(); +// // changedCpuPinning = true; +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // "for property key ", +// // ov::hint::enable_cpu_pinning.name(), +// // ". Expected only true/false."); +// // } +// // } else if (key == ov::hint::scheduling_core_type.name()) { +// // try { +// // schedulingCoreType = val.as(); +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // "for property key ", +// // ov::hint::scheduling_core_type.name(), +// // ". Expected only ov::hint::SchedulingCoreType::ANY_CORE/PCORE_ONLY/ECORE_ONLY"); +// // } +// // } else if (key == ov::hint::model_distribution_policy.name()) { +// // auto error_info = [&]() { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // "for property key ", +// // ov::hint::model_distribution_policy.name(), +// // ". CPU plugin only support {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}"); +// // }; - try { - for (auto& row : val.as>()) { - if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)) { - error_info(); - } - } - modelDistributionPolicy = val.as>(); - } catch (ov::Exception&) { - error_info(); - } - } else if (key == ov::hint::enable_hyper_threading.name()) { - try { - enableHyperThreading = val.as(); - changedHyperThreading = true; - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - ov::hint::enable_hyper_threading.name(), - ". Expected only true/false."); - } - } else if (key == ov::intel_cpu::sparse_weights_decompression_rate.name()) { - float val_f = 0.0f; - try { - val_f = val.as(); - } catch (const ov::Exception&) { - OPENVINO_THROW("Wrong value for property key ", - ov::intel_cpu::sparse_weights_decompression_rate.name(), - ". Expected only float numbers"); - } - if (val_f < 0.f || val_f > 1.f) { - OPENVINO_THROW("Wrong value for property key ", - ov::intel_cpu::sparse_weights_decompression_rate.name(), - ". Sparse rate must be in range [0.0f,1.0f]"); - } else { - fcSparseWeiDecompressionRate = val_f; - } - } else if (key == ov::hint::dynamic_quantization_group_size.name()) { - try { - fcDynamicQuantizationGroupSizeSetExplicitly = true; - fcDynamicQuantizationGroupSize = val.as(); - } catch (const ov::Exception&) { - OPENVINO_THROW("Wrong value for property key ", - ov::hint::dynamic_quantization_group_size.name(), - ". Expected only unsinged integer numbers"); - } - } else if (key == ov::enable_profiling.name()) { - try { - collectPerfCounters = val.as(); - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - ov::enable_profiling.name(), - ". Expected only true/false"); - } - } else if (key == ov::internal::exclusive_async_requests.name()) { - try { - exclusiveAsyncRequests = val.as(); - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - ov::internal::exclusive_async_requests.name(), - ". Expected only true/false"); - } - } else if (key == ov::intel_cpu::lp_transforms_mode.name()) { - try { - lpTransformsMode = val.as() ? LPTransformsMode::On : LPTransformsMode::Off; - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - key, - ". Expected value only ov::intel_cpu::Config::LPTransformsMode::On/Off"); - } - } else if (key == ov::device::id.name()) { - device_id = val.as(); - if (!device_id.empty()) { - OPENVINO_THROW("CPU plugin supports only '' as device id"); - } - } else if (key == ov::hint::inference_precision.name()) { - try { - auto const prec = val.as(); - inferencePrecisionSetExplicitly = true; - if (prec == ov::element::bf16) { - if (hasHardwareSupport(ov::element::bf16)) { - inferencePrecision = ov::element::bf16; - } - } else if (prec == ov::element::f16) { - if (hasHardwareSupport(ov::element::f16)) { - inferencePrecision = ov::element::f16; - } - } else if (one_of(prec, element::f32, element::undefined)) { - inferencePrecision = prec; - } else { - OPENVINO_THROW("invalid value"); - } - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - ov::hint::inference_precision.name(), - ". Supported values: bf16, f16, f32, undefined"); - } - } else if (ov::intel_cpu::cpu_runtime_cache_capacity.name() == key) { - int val_i = -1; - try { - ov::Any value = val.as(); - val_i = value.as(); - } catch (const ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - ov::intel_cpu::cpu_runtime_cache_capacity.name(), - ". Expected only integer numbers"); - } - // any negative value will be treated - // as zero that means disabling the cache - rtCacheCapacity = std::max(val_i, 0); - } else if (ov::intel_cpu::denormals_optimization.name() == key) { - try { - denormalsOptMode = val.as() ? DenormalsOptMode::DO_On : DenormalsOptMode::DO_Off; - } catch (ov::Exception&) { - denormalsOptMode = DenormalsOptMode::DO_Keep; - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - ov::intel_cpu::denormals_optimization.name(), - ". Expected only true/false"); - } - } else if (key == ov::intel_cpu::snippets_mode.name()) { - try { - auto const mode = val.as(); - if (mode == ov::intel_cpu::SnippetsMode::ENABLE) - snippetsMode = SnippetsMode::Enable; - else if (mode == ov::intel_cpu::SnippetsMode::IGNORE_CALLBACK) - snippetsMode = SnippetsMode::IgnoreCallback; - else if (mode == ov::intel_cpu::SnippetsMode::DISABLE) - snippetsMode = SnippetsMode::Disable; - else - OPENVINO_THROW("invalid value"); - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - ov::intel_cpu::snippets_mode.name(), - ". Expected values: ov::intel_cpu::SnippetsMode::ENABLE/DISABLE/IGNORE_CALLBACK"); - } - } else if (key == ov::hint::execution_mode.name()) { - try { - executionMode = val.as(); - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - ov::hint::execution_mode.name(), - ". Supported values: ov::hint::ExecutionMode::PERFORMANCE/ACCURACY"); - } - } else if (key == ov::hint::kv_cache_precision.name()) { - try { - kvCachePrecisionSetExplicitly = true; - auto const prec = val.as(); - if (one_of(prec, ov::element::f32, ov::element::f16, ov::element::bf16, ov::element::u8)) { - kvCachePrecision = prec; - } else { - OPENVINO_THROW("invalid value"); - } - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - ov::hint::kv_cache_precision.name(), - ". Supported values: u8, bf16, f16, f32"); - } - } else if (key == ov::cache_encryption_callbacks.name()) { - try { - auto encryption_callbacks = val.as(); - cacheEncrypt = encryption_callbacks.encrypt; - cacheDecrypt = encryption_callbacks.decrypt; - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value for property key ", ov::cache_encryption_callbacks.name()); - } - } else if (key == ov::internal::caching_with_mmap.name()) { - } else { - OPENVINO_THROW("NotFound: Unsupported property ", key, " by CPU plugin."); - } - } - // apply execution mode after all the params are handled to prevent possible conflicts - // when both execution_mode and inference_precision are specified - if (!inferencePrecisionSetExplicitly) { - if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { - inferencePrecision = ov::element::f32; -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) - if (hasHardwareSupport(ov::element::f16)) { - inferencePrecision = ov::element::f16; - } -#endif - if (mayiuse(avx512_core_bf16)) - inferencePrecision = ov::element::bf16; - } else { - inferencePrecision = ov::element::undefined; - } - } - // enable ACL fast math in PERFORMANCE mode -#if defined(OV_CPU_WITH_ACL) - if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { - aclFastMath = true; - } -#endif - // disable dynamic quantization and kv quantization for best accuracy - if (executionMode == ov::hint::ExecutionMode::ACCURACY) { - if (!fcDynamicQuantizationGroupSizeSetExplicitly) { - fcDynamicQuantizationGroupSize = 0; - } - if (!kvCachePrecisionSetExplicitly) { - kvCachePrecision = ov::element::f32; - } - } +// // try { +// // for (auto& row : val.as>()) { +// // if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)) { +// // error_info(); +// // } +// // } +// // modelDistributionPolicy = val.as>(); +// // } catch (ov::Exception&) { +// // error_info(); +// // } +// // } else if (key == ov::hint::enable_hyper_threading.name()) { +// // try { +// // enableHyperThreading = val.as(); +// // changedHyperThreading = true; +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // "for property key ", +// // ov::hint::enable_hyper_threading.name(), +// // ". Expected only true/false."); +// // } +// // } else if (key == ov::intel_cpu::sparse_weights_decompression_rate.name()) { +// // float val_f = 0.0f; +// // try { +// // val_f = val.as(); +// // } catch (const ov::Exception&) { +// // OPENVINO_THROW("Wrong value for property key ", +// // ov::intel_cpu::sparse_weights_decompression_rate.name(), +// // ". Expected only float numbers"); +// // } +// // if (val_f < 0.f || val_f > 1.f) { +// // OPENVINO_THROW("Wrong value for property key ", +// // ov::intel_cpu::sparse_weights_decompression_rate.name(), +// // ". Sparse rate must be in range [0.0f,1.0f]"); +// // } else { +// // fcSparseWeiDecompressionRate = val_f; +// // } +// // } else if (key == ov::hint::dynamic_quantization_group_size.name()) { +// // try { +// // fcDynamicQuantizationGroupSizeSetExplicitly = true; +// // fcDynamicQuantizationGroupSize = val.as(); +// // } catch (const ov::Exception&) { +// // OPENVINO_THROW("Wrong value for property key ", +// // ov::hint::dynamic_quantization_group_size.name(), +// // ". Expected only unsinged integer numbers"); +// // } +// // } else if (key == ov::enable_profiling.name()) { +// // try { +// // collectPerfCounters = val.as(); +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // ov::enable_profiling.name(), +// // ". Expected only true/false"); +// // } +// // } else if (key == ov::internal::exclusive_async_requests.name()) { +// // try { +// // exclusiveAsyncRequests = val.as(); +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // ov::internal::exclusive_async_requests.name(), +// // ". Expected only true/false"); +// // } +// // } else if (key == ov::intel_cpu::lp_transforms_mode.name()) { +// // try { +// // lpTransformsMode = val.as() ? LPTransformsMode::On : LPTransformsMode::Off; +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // key, +// // ". Expected value only ov::intel_cpu::Config::LPTransformsMode::On/Off"); +// // } +// // } else if (key == ov::device::id.name()) { +// // device_id = val.as(); +// // if (!device_id.empty()) { +// // OPENVINO_THROW("CPU plugin supports only '' as device id"); +// // } +// // } else if (key == ov::hint::inference_precision.name()) { +// // try { +// // auto const prec = val.as(); +// // inferencePrecisionSetExplicitly = true; +// // if (prec == ov::element::bf16) { +// // if (hasHardwareSupport(ov::element::bf16)) { +// // inferencePrecision = ov::element::bf16; +// // } +// // } else if (prec == ov::element::f16) { +// // if (hasHardwareSupport(ov::element::f16)) { +// // inferencePrecision = ov::element::f16; +// // } +// // } else if (one_of(prec, element::f32, element::undefined)) { +// // inferencePrecision = prec; +// // } else { +// // OPENVINO_THROW("invalid value"); +// // } +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // ov::hint::inference_precision.name(), +// // ". Supported values: bf16, f16, f32, undefined"); +// // } +// // } else if (ov::intel_cpu::cpu_runtime_cache_capacity.name() == key) { +// // int val_i = -1; +// // try { +// // ov::Any value = val.as(); +// // val_i = value.as(); +// // } catch (const ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // ov::intel_cpu::cpu_runtime_cache_capacity.name(), +// // ". Expected only integer numbers"); +// // } +// // // any negative value will be treated +// // // as zero that means disabling the cache +// // rtCacheCapacity = std::max(val_i, 0); +// // } else if (ov::intel_cpu::denormals_optimization.name() == key) { +// // try { +// // denormalsOptMode = val.as() ? DenormalsOptMode::DO_On : DenormalsOptMode::DO_Off; +// // } catch (ov::Exception&) { +// // denormalsOptMode = DenormalsOptMode::DO_Keep; +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // ov::intel_cpu::denormals_optimization.name(), +// // ". Expected only true/false"); +// // } +// // } else if (key == ov::intel_cpu::snippets_mode.name()) { +// // try { +// // auto const mode = val.as(); +// // if (mode == ov::intel_cpu::SnippetsMode::ENABLE) +// // snippetsMode = SnippetsMode::Enable; +// // else if (mode == ov::intel_cpu::SnippetsMode::IGNORE_CALLBACK) +// // snippetsMode = SnippetsMode::IgnoreCallback; +// // else if (mode == ov::intel_cpu::SnippetsMode::DISABLE) +// // snippetsMode = SnippetsMode::Disable; +// // else +// // OPENVINO_THROW("invalid value"); +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // ov::intel_cpu::snippets_mode.name(), +// // ". Expected values: ov::intel_cpu::SnippetsMode::ENABLE/DISABLE/IGNORE_CALLBACK"); +// // } +// // } else if (key == ov::hint::execution_mode.name()) { +// // try { +// // executionMode = val.as(); +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // "for property key ", +// // ov::hint::execution_mode.name(), +// // ". Supported values: ov::hint::ExecutionMode::PERFORMANCE/ACCURACY"); +// // } +// // } else if (key == ov::hint::kv_cache_precision.name()) { +// // try { +// // kvCachePrecisionSetExplicitly = true; +// // auto const prec = val.as(); +// // if (one_of(prec, ov::element::f32, ov::element::f16, ov::element::bf16, ov::element::u8)) { +// // kvCachePrecision = prec; +// // } else { +// // OPENVINO_THROW("invalid value"); +// // } +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // ov::hint::kv_cache_precision.name(), +// // ". Supported values: u8, bf16, f16, f32"); +// // } +// // } else if (key == ov::cache_encryption_callbacks.name()) { +// // try { +// // auto encryption_callbacks = val.as(); +// // cacheEncrypt = encryption_callbacks.encrypt; +// // cacheDecrypt = encryption_callbacks.decrypt; +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value for property key ", ov::cache_encryption_callbacks.name()); +// // } +// // } else if (key == ov::internal::caching_with_mmap.name()) { +// } else { +// OPENVINO_THROW("NotFound: Unsupported property ", key, " by CPU plugin."); +// } +// } +// // // apply execution mode after all the params are handled to prevent possible conflicts +// // // when both execution_mode and inference_precision are specified +// // if (!inferencePrecisionSetExplicitly) { +// // if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { +// // inferencePrecision = ov::element::f32; +// // #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +// // if (hasHardwareSupport(ov::element::f16)) { +// // inferencePrecision = ov::element::f16; +// // } +// // #endif +// // if (mayiuse(avx512_core_bf16)) +// // inferencePrecision = ov::element::bf16; +// // } else { +// // inferencePrecision = ov::element::undefined; +// // } +// // } +// // enable ACL fast math in PERFORMANCE mode +// // #if defined(OV_CPU_WITH_ACL) +// // if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { +// // aclFastMath = true; +// // } +// // #endif +// // disable dynamic quantization and kv quantization for best accuracy +// if (executionMode == ov::hint::ExecutionMode::ACCURACY) { +// if (!fcDynamicQuantizationGroupSizeSetExplicitly) { +// fcDynamicQuantizationGroupSize = 0; +// } +// if (!kvCachePrecisionSetExplicitly) { +// kvCachePrecision = ov::element::f32; +// } +// } - if (!prop.empty()) - _config.clear(); +// if (!prop.empty()) +// _config.clear(); - if (exclusiveAsyncRequests) { // Exclusive request feature disables the streams - streams = 1; - streamsChanged = true; - } +// if (exclusiveAsyncRequests) { // Exclusive request feature disables the streams +// streams = 1; +// streamsChanged = true; +// } -#if defined(OV_CPU_WITH_SHL) - // TODO: multi-stream execution is unsafe when SHL is used: - // The library uses global static variables as flags and counters. - streams = 1; - streamsChanged = true; -#endif +// #if defined(OV_CPU_WITH_SHL) +// // TODO: multi-stream execution is unsafe when SHL is used: +// // The library uses global static variables as flags and counters. +// streams = 1; +// streamsChanged = true; +// #endif - this->modelType = modelType; +// this->modelType = modelType; - CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties()); - updateProperties(); -} +// CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties()); +// updateProperties(); +// } -void Config::updateProperties() { - if (!_config.empty()) - return; +// void Config::updateProperties() { +// if (!_config.empty()) +// return; - if (collectPerfCounters == true) - _config.insert({ov::enable_profiling.name(), "YES"}); - else - _config.insert({ov::enable_profiling.name(), "NO"}); - if (exclusiveAsyncRequests == true) - _config.insert({ov::internal::exclusive_async_requests.name(), "YES"}); - else - _config.insert({ov::internal::exclusive_async_requests.name(), "NO"}); +// if (collectPerfCounters == true) +// _config.insert({ov::enable_profiling.name(), "YES"}); +// else +// _config.insert({ov::enable_profiling.name(), "NO"}); +// if (exclusiveAsyncRequests == true) +// _config.insert({ov::internal::exclusive_async_requests.name(), "YES"}); +// else +// _config.insert({ov::internal::exclusive_async_requests.name(), "NO"}); - _config.insert({ov::device::id.name(), device_id}); +// _config.insert({ov::device::id.name(), device_id}); - _config.insert({ov::hint::performance_mode.name(), ov::util::to_string(hintPerfMode)}); - _config.insert({ov::hint::num_requests.name(), std::to_string(hintNumRequests)}); -} +// _config.insert({ov::hint::performance_mode.name(), ov::util::to_string(hintPerfMode)}); +// _config.insert({ov::hint::num_requests.name(), std::to_string(hintNumRequests)}); +// } -void Config::applyRtInfo(const std::shared_ptr& model) { - // if user sets explicitly, it will be higher priority than rt_info - if (!kvCachePrecisionSetExplicitly && - model->has_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()})) { - this->kvCachePrecision = - model->get_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()}); - } - if (!fcDynamicQuantizationGroupSizeSetExplicitly && - model->has_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()})) { - this->fcDynamicQuantizationGroupSize = - model->get_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()}); - } -} +// void Config::applyRtInfo(const std::shared_ptr& model) { +// // if user sets explicitly, it will be higher priority than rt_info +// if (!kvCachePrecisionSetExplicitly && +// model->has_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()})) { +// this->kvCachePrecision = +// model->get_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()}); +// } +// if (!fcDynamicQuantizationGroupSizeSetExplicitly && +// model->has_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()})) { +// this->fcDynamicQuantizationGroupSize = +// model->get_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()}); +// } +// } } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 1aa08f4412f0b3..22808caff1f8f5 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -4,118 +4,121 @@ #pragma once -#include -#include -#include - -#include "internal_properties.hpp" -#include "openvino/core/type/element_type.hpp" -#include "openvino/runtime/properties.hpp" -#include "openvino/runtime/threading/istreams_executor.hpp" -#include "openvino/util/common_util.hpp" -#include "utils/debug_caps_config.h" +// #include +// #include +// #include + +// #include "internal_properties.hpp" +// #include "openvino/core/type/element_type.hpp" +// #include "openvino/runtime/properties.hpp" +// #include "openvino/runtime/threading/istreams_executor.hpp" +// #include "openvino/util/common_util.hpp" +// #include "utils/debug_caps_config.h" +#include "config_new.hpp" namespace ov { namespace intel_cpu { -struct Config { - Config(); - - enum LPTransformsMode { - Off, - On, - }; - - enum DenormalsOptMode { - DO_Keep, - DO_Off, - DO_On, - }; - - enum SnippetsMode { - Enable, - IgnoreCallback, - Disable, - }; - - enum class ModelType { CNN, LLM, Unknown }; - - bool collectPerfCounters = false; - bool exclusiveAsyncRequests = false; - SnippetsMode snippetsMode = SnippetsMode::Enable; - std::string dumpToDot = {}; - std::string device_id = {}; - float fcSparseWeiDecompressionRate = 1.0f; - uint64_t fcDynamicQuantizationGroupSize = 32; - bool fcDynamicQuantizationGroupSizeSetExplicitly = false; - bool kvCachePrecisionSetExplicitly = false; -#if defined(OV_CPU_WITH_ACL) - bool aclFastMath = false; -#endif -#if defined(OPENVINO_ARCH_X86_64) - ov::element::Type kvCachePrecision = ov::element::u8; - size_t rtCacheCapacity = 5000ul; -#else - ov::element::Type kvCachePrecision = ov::element::f16; - // TODO: Executor cache may leads to incorrect behavior on oneDNN ACL primitives - size_t rtCacheCapacity = 0ul; -#endif - ov::threading::IStreamsExecutor::Config streamExecutorConfig; - int streams = 1; - bool streamsChanged = false; - int threads = 0; - int threadsPerStream = 0; - ov::threading::IStreamsExecutor::ThreadBindingType threadBindingType = - ov::threading::IStreamsExecutor::ThreadBindingType::NONE; - ov::hint::PerformanceMode hintPerfMode = ov::hint::PerformanceMode::LATENCY; - std::vector> streamsRankTable; - bool changedHintPerfMode = false; - ov::log::Level logLevel = ov::log::Level::NO; - uint32_t hintNumRequests = 0; - bool enableCpuPinning = true; - bool changedCpuPinning = false; - ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE; - std::set modelDistributionPolicy = {}; - int streamsRankLevel = 1; - int numSubStreams = 0; - bool enableNodeSplit = false; - bool enableHyperThreading = true; - bool changedHyperThreading = false; -#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) - LPTransformsMode lpTransformsMode = LPTransformsMode::On; -#else - // Currently INT8 mode is not optimized on ARM / RISCV or other non-x86 platforms, fallback to FP32 mode. - LPTransformsMode lpTransformsMode = LPTransformsMode::Off; -#endif - // default inference precision - ov::element::Type inferencePrecision = ov::element::f32; - bool inferencePrecisionSetExplicitly = false; - ov::hint::ExecutionMode executionMode = ov::hint::ExecutionMode::PERFORMANCE; - - DenormalsOptMode denormalsOptMode = DenormalsOptMode::DO_Keep; - - // The denormals-are-zeros flag was introduced in the Pentium 4 and Intel Xeon processor - // In earlier IA-32 processors and in some models of the Pentium 4 processor, this flag (bit 6) - // is reserved. - bool DAZOn = false; - - void readProperties(const ov::AnyMap& config, const ModelType modelType = ModelType::Unknown); - - void updateProperties(); - - void applyRtInfo(const std::shared_ptr& model); - - std::map _config; - - int modelPreferThreads = -1; - ModelType modelType = ModelType::Unknown; - std::function cacheEncrypt; - std::function cacheDecrypt; - -#ifdef CPU_DEBUG_CAPS - DebugCapsConfig debugCaps; - void applyDebugCapsProperties(); -#endif -}; + +using Config = ExecutionConfig; +// struct Config { +// Config(); + +// enum LPTransformsMode { +// Off, +// On, +// }; + +// enum DenormalsOptMode { +// DO_Keep, +// DO_Off, +// DO_On, +// }; + +// enum SnippetsMode { +// Enable, +// IgnoreCallback, +// Disable, +// }; + +// enum class ModelType { CNN, LLM, Unknown }; + +// bool collectPerfCounters = false; +// bool exclusiveAsyncRequests = false; +// SnippetsMode snippetsMode = SnippetsMode::Enable; +// std::string dumpToDot = {}; +// std::string device_id = {}; +// float fcSparseWeiDecompressionRate = 1.0f; +// uint64_t fcDynamicQuantizationGroupSize = 32; +// bool fcDynamicQuantizationGroupSizeSetExplicitly = false; +// bool kvCachePrecisionSetExplicitly = false; +// #if defined(OV_CPU_WITH_ACL) +// bool aclFastMath = false; +// #endif +// #if defined(OPENVINO_ARCH_X86_64) +// ov::element::Type kvCachePrecision = ov::element::u8; +// size_t rtCacheCapacity = 5000ul; +// #else +// ov::element::Type kvCachePrecision = ov::element::f16; +// // TODO: Executor cache may leads to incorrect behavior on oneDNN ACL primitives +// size_t rtCacheCapacity = 0ul; +// #endif +// ov::threading::IStreamsExecutor::Config streamExecutorConfig; +// int streams = 1; +// bool streamsChanged = false; +// int threads = 0; +// int threadsPerStream = 0; +// ov::threading::IStreamsExecutor::ThreadBindingType threadBindingType = +// ov::threading::IStreamsExecutor::ThreadBindingType::NONE; +// ov::hint::PerformanceMode hintPerfMode = ov::hint::PerformanceMode::LATENCY; +// std::vector> streamsRankTable; +// bool changedHintPerfMode = false; +// ov::log::Level logLevel = ov::log::Level::NO; +// uint32_t hintNumRequests = 0; +// bool enableCpuPinning = true; +// bool changedCpuPinning = false; +// ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE; +// std::set modelDistributionPolicy = {}; +// int streamsRankLevel = 1; +// int numSubStreams = 0; +// bool enableNodeSplit = false; +// bool enableHyperThreading = true; +// bool changedHyperThreading = false; +// #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) +// LPTransformsMode lpTransformsMode = LPTransformsMode::On; +// #else +// // Currently INT8 mode is not optimized on ARM / RISCV or other non-x86 platforms, fallback to FP32 mode. +// LPTransformsMode lpTransformsMode = LPTransformsMode::Off; +// #endif +// // default inference precision +// ov::element::Type inferencePrecision = ov::element::f32; +// bool inferencePrecisionSetExplicitly = false; +// ov::hint::ExecutionMode executionMode = ov::hint::ExecutionMode::PERFORMANCE; + +// DenormalsOptMode denormalsOptMode = DenormalsOptMode::DO_Keep; + +// // The denormals-are-zeros flag was introduced in the Pentium 4 and Intel Xeon processor +// // In earlier IA-32 processors and in some models of the Pentium 4 processor, this flag (bit 6) +// // is reserved. +// bool DAZOn = false; + +// void readProperties(const ov::AnyMap& config, const ModelType modelType = ModelType::Unknown); + +// void updateProperties(); + +// void applyRtInfo(const std::shared_ptr& model); + +// std::map _config; + +// int modelPreferThreads = -1; +// ModelType modelType = ModelType::Unknown; +// std::function cacheEncrypt; +// std::function cacheDecrypt; + +// #ifdef CPU_DEBUG_CAPS +// DebugCapsConfig debugCaps; +// void applyDebugCapsProperties(); +// #endif +// }; } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/config_new.cpp b/src/plugins/intel_cpu/src/config_new.cpp new file mode 100644 index 00000000000000..d205631eb00df1 --- /dev/null +++ b/src/plugins/intel_cpu/src/config_new.cpp @@ -0,0 +1,659 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "config_new.hpp" +#include "utils/precision_support.h" +#include "utils/codec_xor.hpp" + +#include "cpu/x64/cpu_isa_traits.hpp" + +// #include +// #include +// #include + +// #include "cpu/x64/cpu_isa_traits.hpp" +// #include "openvino/core/parallel.hpp" +// #include "openvino/core/type/element_type_traits.hpp" +// #include "openvino/runtime/intel_cpu/properties.hpp" +// #include "openvino/runtime/internal_properties.hpp" +// #include "openvino/runtime/properties.hpp" +// #include "utils/cpu_utils.hpp" +// #include "utils/debug_capabilities.h" +// #include "utils/precision_support.h" + +namespace ov { +namespace intel_cpu { + +ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { + #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + #include "options.inl" + #undef OV_CONFIG_OPTION + + set_default_values(); +} + +ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() { + m_user_properties = other.m_user_properties; + m_is_finalized = false; // copy is not automatically finalized + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); + } + + streamExecutorConfig = other.streamExecutorConfig; + modelPreferThreads = other.modelPreferThreads; + modelType = other.modelType; + DAZOn = other.DAZOn; + streamsRankTable = other.streamsRankTable; + streamsRankLevel = other.streamsRankLevel; + numSubStreams = other.numSubStreams; + enableNodeSplit = other.enableNodeSplit; +} + +ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) { + m_user_properties = other.m_user_properties; + m_is_finalized = false; // copy is not automatically finalized + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); + } + + streamExecutorConfig = other.streamExecutorConfig; + modelPreferThreads = other.modelPreferThreads; + modelType = other.modelType; + DAZOn = other.DAZOn; + streamsRankTable = other.streamsRankTable; + streamsRankLevel = other.streamsRankLevel; + numSubStreams = other.numSubStreams; + enableNodeSplit = other.enableNodeSplit; + + return *this; +} + +const ov::PluginConfig::OptionsDesc& ExecutionConfig::get_options_desc() const { + static ov::PluginConfig::OptionsDesc help_map { + #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__) + #include "options.inl" + #undef OV_CONFIG_OPTION + }; + return help_map; +} + +void ExecutionConfig::apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) { + apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); + apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); +} + +void ExecutionConfig::finalize_impl(std::shared_ptr context) { + // const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + apply_hints(); + + if (get_exclusive_async_requests()) { + m_num_streams = 1; + } + +#if defined(OV_CPU_WITH_SHL) + // TODO: multi-stream execution is unsafe when SHL is used: + // The library uses global static variables as flags and counters. + m_num_streams = 1; +#endif + + if (!m_cache_encryption_callbacks.value.encrypt || !m_cache_encryption_callbacks.value.decrypt) { + m_cache_encryption_callbacks.value.encrypt = codec_xor_str; + m_cache_encryption_callbacks.value.decrypt = codec_xor_str; + } +} + +void ExecutionConfig::set_default_values() { +#if defined(OPENVINO_ARCH_X86_64) + m_cpu_runtime_cache_capacity = 5000ul; +#else + // TODO: Executor cache may leads to incorrect behavior on oneDNN ACL primitives + // TODO: Verify on RISC-V platforms + m_cpu_runtime_cache_capacity = 0ul; +#endif + +#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) + m_lp_transforms_mode = true; +#else + // Currently INT8 mode is not optimized on ARM / RISCV or other non-x86 platforms, fallback to FP32 mode. + m_lp_transforms_mode = false; +#endif +} + +void ExecutionConfig::apply_hints() { + apply_execution_hints(); + apply_performance_hints(); +} + +void ExecutionConfig::apply_execution_hints() { + if (get_execution_mode() == ov::hint::ExecutionMode::PERFORMANCE) { + if (!is_set_by_user(ov::hint::inference_precision)) { +#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) + m_inference_precision = ov::element::f16; +#else + if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16)) + m_inference_precision = ov::element::bf16; +#endif + } + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size)) { + m_dynamic_quantization_group_size = 32; + } + if (!is_set_by_user(ov::hint::kv_cache_precision)) { +#if defined(OPENVINO_ARCH_X86_64) + m_kv_cache_precision = ov::element::u8; +#else + m_kv_cache_precision = ov::element::f16; +#endif + } +#if defined(OV_CPU_WITH_ACL) + if (!is_set_by_user(ov::intel_cpu::acl_fast_math)) { + m_acl_fast_math = true; + } +#endif + } + + if (get_execution_mode() == ov::hint::ExecutionMode::ACCURACY) { + if (!is_set_by_user(ov::hint::inference_precision)) { + m_inference_precision = ov::element::undefined; + } + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size)) { + m_dynamic_quantization_group_size = 0; + } + if (!is_set_by_user(ov::hint::kv_cache_precision)) { + m_kv_cache_precision = ov::element::f32; + } +#if defined(OV_CPU_WITH_ACL) + if (!is_set_by_user(ov::intel_cpu::acl_fast_math)) { + m_acl_fast_math = false; + } +#endif + } + + if (!hasHardwareSupport(m_inference_precision)) { + m_inference_precision = ov::element::f32; + } +} + +void ExecutionConfig::apply_performance_hints() { + // if (is_set_by_user(ov::hint::performance_mode)) { + // const auto mode = get_property(ov::hint::performance_mode); + // if (!is_set_by_user(ov::num_streams)) { + // if (mode == ov::hint::PerformanceMode::LATENCY) { + // set_property(ov::num_streams(1)); + // } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) { + // set_property(ov::num_streams(ov::streams::AUTO)); + // } + // } + // } + + // if (get_property(ov::num_streams) == ov::streams::AUTO) { + // int32_t n_streams = std::max(info.num_ccs, 2); + // set_property(ov::num_streams(n_streams)); + // } + + // if (get_property(ov::internal::exclusive_async_requests)) { + // set_property(ov::num_streams(1)); + // } + + // // Allow kernels reuse only for single-stream scenarios + // if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) { + // if (get_property(ov::num_streams) != 1) { + // set_property(ov::intel_gpu::hint::enable_kernels_reuse(false)); + // } + // } +} + + +// using namespace ov::threading; +// using namespace dnnl::impl::cpu::x64; + +// Config::Config() { +// // this is default mode +// #if defined(__APPLE__) || defined(_WIN32) +// threadBindingType = IStreamsExecutor::NONE; +// #else +// threadBindingType = IStreamsExecutor::CORES; +// #endif + +// // for the TBB code-path, additional configuration depending on the OS and CPU types +// #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO) +// # if defined(__APPLE__) || defined(_WIN32) +// // 'CORES' is not implemented for Win/MacOS; so the 'NONE' or 'NUMA' is default +// auto numaNodes = get_available_numa_nodes(); +// if (numaNodes.size() > 1) { +// threadBindingType = IStreamsExecutor::NUMA; +// } else { +// threadBindingType = IStreamsExecutor::NONE; +// } +// # endif + +// if (get_available_cores_types().size() > 1 /*Hybrid CPU*/) { +// threadBindingType = IStreamsExecutor::HYBRID_AWARE; +// } +// #endif +// CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties()); + +// updateProperties(); +// } + +// #ifdef CPU_DEBUG_CAPS +// /** +// * Debug capabilities configuration has more priority than common one +// * Some of the debug capabilities also require to enable some of common +// * configuration properties +// */ +// void Config::applyDebugCapsProperties() { +// // always enable perf counters for verbose, performance summary and average counters +// if (!debugCaps.verbose.empty() || !debugCaps.summaryPerf.empty() || !debugCaps.averageCountersPath.empty()) { +// collectPerfCounters = true; +// } +// } +// #endif + +// void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { +// const auto streamExecutorConfigKeys = +// streamExecutorConfig.get_property(ov::supported_properties.name()).as>(); +// for (const auto& kvp : prop) { +// const auto& key = kvp.first; +// const auto& val = kvp.second; +// if (streamExecutorConfigKeys.end() != +// std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) { +// streamExecutorConfig.set_property(key, val.as()); +// streams = streamExecutorConfig.get_streams(); +// threads = streamExecutorConfig.get_threads(); +// threadsPerStream = streamExecutorConfig.get_threads_per_stream(); +// if (key == ov::num_streams.name()) { +// ov::Any value = val.as(); +// auto streams_value = value.as(); +// if (streams_value == ov::streams::NUMA) { +// modelDistributionPolicy = {}; +// hintPerfMode = ov::hint::PerformanceMode::LATENCY; +// changedHintPerfMode = true; +// } else if (streams_value == ov::streams::AUTO) { +// hintPerfMode = ov::hint::PerformanceMode::THROUGHPUT; +// changedHintPerfMode = true; +// } else { +// streamsChanged = true; +// } +// } +// OPENVINO_SUPPRESS_DEPRECATED_START +// } else if (key == ov::affinity.name()) { +// try { +// changedCpuPinning = true; +// ov::Affinity affinity = val.as(); +// #if defined(__APPLE__) +// enableCpuPinning = false; +// threadBindingType = affinity == ov::Affinity::NONE ? IStreamsExecutor::ThreadBindingType::NONE +// : IStreamsExecutor::ThreadBindingType::NUMA; +// #else +// enableCpuPinning = +// (affinity == ov::Affinity::CORE || affinity == ov::Affinity::HYBRID_AWARE) ? true : false; +// switch (affinity) { +// case ov::Affinity::NONE: +// threadBindingType = IStreamsExecutor::ThreadBindingType::NONE; +// break; +// case ov::Affinity::CORE: { +// threadBindingType = IStreamsExecutor::ThreadBindingType::CORES; +// } break; +// case ov::Affinity::NUMA: +// threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA; +// break; +// case ov::Affinity::HYBRID_AWARE: +// threadBindingType = IStreamsExecutor::ThreadBindingType::HYBRID_AWARE; +// break; +// default: +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// key, +// ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE."); +// } +// #endif +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// key, +// ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE."); +// } +// OPENVINO_SUPPRESS_DEPRECATED_END +// } else if (key == ov::hint::performance_mode.name()) { +// try { +// hintPerfMode = !changedHintPerfMode ? val.as() : hintPerfMode; +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// key, +// ". Expected only ov::hint::PerformanceMode::LATENCY/THROUGHPUT/CUMULATIVE_THROUGHPUT."); +// } +// } else if (key == ov::log::level.name()) { +// try { +// logLevel = val.as(); +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// key, +// ". Expected only ov::log::Level::NO/ERR/WARNING/INFO/DEBUG/TRACE."); +// } +// } else if (key == ov::hint::num_requests.name()) { +// try { +// ov::Any value = val.as(); +// int val_i = value.as(); +// if (val_i < 0) +// OPENVINO_THROW("invalid value."); +// hintNumRequests = static_cast(val_i); +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// ov::hint::num_requests.name(), +// ". Expected only > 0."); +// } +// } else if (key == ov::hint::enable_cpu_pinning.name()) { +// try { +// enableCpuPinning = val.as(); +// changedCpuPinning = true; +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// ov::hint::enable_cpu_pinning.name(), +// ". Expected only true/false."); +// } +// } else if (key == ov::hint::scheduling_core_type.name()) { +// try { +// schedulingCoreType = val.as(); +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// ov::hint::scheduling_core_type.name(), +// ". Expected only ov::hint::SchedulingCoreType::ANY_CORE/PCORE_ONLY/ECORE_ONLY"); +// } +// } else if (key == ov::hint::model_distribution_policy.name()) { +// auto error_info = [&]() { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// ov::hint::model_distribution_policy.name(), +// ". CPU plugin only support {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}"); +// }; + +// try { +// for (auto& row : val.as>()) { +// if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)) { +// error_info(); +// } +// } +// modelDistributionPolicy = val.as>(); +// } catch (ov::Exception&) { +// error_info(); +// } +// } else if (key == ov::hint::enable_hyper_threading.name()) { +// try { +// enableHyperThreading = val.as(); +// changedHyperThreading = true; +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// ov::hint::enable_hyper_threading.name(), +// ". Expected only true/false."); +// } +// } else if (key == ov::intel_cpu::sparse_weights_decompression_rate.name()) { +// float val_f = 0.0f; +// try { +// val_f = val.as(); +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value for property key ", +// ov::intel_cpu::sparse_weights_decompression_rate.name(), +// ". Expected only float numbers"); +// } +// if (val_f < 0.f || val_f > 1.f) { +// OPENVINO_THROW("Wrong value for property key ", +// ov::intel_cpu::sparse_weights_decompression_rate.name(), +// ". Sparse rate must be in range [0.0f,1.0f]"); +// } else { +// fcSparseWeiDecompressionRate = val_f; +// } +// } else if (key == ov::hint::dynamic_quantization_group_size.name()) { +// try { +// fcDynamicQuantizationGroupSizeSetExplicitly = true; +// fcDynamicQuantizationGroupSize = val.as(); +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value for property key ", +// ov::hint::dynamic_quantization_group_size.name(), +// ". Expected only unsinged integer numbers"); +// } +// } else if (key == ov::enable_profiling.name()) { +// try { +// collectPerfCounters = val.as(); +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// ov::enable_profiling.name(), +// ". Expected only true/false"); +// } +// } else if (key == ov::internal::exclusive_async_requests.name()) { +// try { +// exclusiveAsyncRequests = val.as(); +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// ov::internal::exclusive_async_requests.name(), +// ". Expected only true/false"); +// } +// } else if (key == ov::intel_cpu::lp_transforms_mode.name()) { +// try { +// lpTransformsMode = val.as() ? LPTransformsMode::On : LPTransformsMode::Off; +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// key, +// ". Expected value only ov::intel_cpu::Config::LPTransformsMode::On/Off"); +// } +// } else if (key == ov::device::id.name()) { +// device_id = val.as(); +// if (!device_id.empty()) { +// OPENVINO_THROW("CPU plugin supports only '' as device id"); +// } +// } else if (key == ov::hint::inference_precision.name()) { +// try { +// auto const prec = val.as(); +// inferencePrecisionSetExplicitly = true; +// if (prec == ov::element::bf16) { +// if (hasHardwareSupport(ov::element::bf16)) { +// inferencePrecision = ov::element::bf16; +// } +// } else if (prec == ov::element::f16) { +// if (hasHardwareSupport(ov::element::f16)) { +// inferencePrecision = ov::element::f16; +// } +// } else if (one_of(prec, element::f32, element::undefined)) { +// inferencePrecision = prec; +// } else { +// OPENVINO_THROW("invalid value"); +// } +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// ov::hint::inference_precision.name(), +// ". Supported values: bf16, f16, f32, undefined"); +// } +// } else if (ov::intel_cpu::cpu_runtime_cache_capacity.name() == key) { +// int val_i = -1; +// try { +// ov::Any value = val.as(); +// val_i = value.as(); +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// ov::intel_cpu::cpu_runtime_cache_capacity.name(), +// ". Expected only integer numbers"); +// } +// // any negative value will be treated +// // as zero that means disabling the cache +// rtCacheCapacity = std::max(val_i, 0); +// } else if (ov::intel_cpu::denormals_optimization.name() == key) { +// try { +// denormalsOptMode = val.as() ? DenormalsOptMode::DO_On : DenormalsOptMode::DO_Off; +// } catch (ov::Exception&) { +// denormalsOptMode = DenormalsOptMode::DO_Keep; +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// ov::intel_cpu::denormals_optimization.name(), +// ". Expected only true/false"); +// } +// } else if (key == ov::intel_cpu::snippets_mode.name()) { +// try { +// auto const mode = val.as(); +// if (mode == ov::intel_cpu::SnippetsMode::ENABLE) +// snippetsMode = SnippetsMode::Enable; +// else if (mode == ov::intel_cpu::SnippetsMode::IGNORE_CALLBACK) +// snippetsMode = SnippetsMode::IgnoreCallback; +// else if (mode == ov::intel_cpu::SnippetsMode::DISABLE) +// snippetsMode = SnippetsMode::Disable; +// else +// OPENVINO_THROW("invalid value"); +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// ov::intel_cpu::snippets_mode.name(), +// ". Expected values: ov::intel_cpu::SnippetsMode::ENABLE/DISABLE/IGNORE_CALLBACK"); +// } +// } else if (key == ov::hint::execution_mode.name()) { +// try { +// executionMode = val.as(); +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// ov::hint::execution_mode.name(), +// ". Supported values: ov::hint::ExecutionMode::PERFORMANCE/ACCURACY"); +// } +// } else if (key == ov::hint::kv_cache_precision.name()) { +// try { +// kvCachePrecisionSetExplicitly = true; +// auto const prec = val.as(); +// if (one_of(prec, ov::element::f32, ov::element::f16, ov::element::bf16, ov::element::u8)) { +// kvCachePrecision = prec; +// } else { +// OPENVINO_THROW("invalid value"); +// } +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// ov::hint::kv_cache_precision.name(), +// ". Supported values: u8, bf16, f16, f32"); +// } +// } else if (key == ov::cache_encryption_callbacks.name()) { +// try { +// auto encryption_callbacks = val.as(); +// cacheEncrypt = encryption_callbacks.encrypt; +// cacheDecrypt = encryption_callbacks.decrypt; +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value for property key ", ov::cache_encryption_callbacks.name()); +// } +// } else if (key == ov::internal::caching_with_mmap.name()) { +// } else { +// OPENVINO_THROW("NotFound: Unsupported property ", key, " by CPU plugin."); +// } +// } +// // apply execution mode after all the params are handled to prevent possible conflicts +// // when both execution_mode and inference_precision are specified +// if (!inferencePrecisionSetExplicitly) { +// if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { +// inferencePrecision = ov::element::f32; +// #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +// if (hasHardwareSupport(ov::element::f16)) { +// inferencePrecision = ov::element::f16; +// } +// #endif +// if (mayiuse(avx512_core_bf16)) +// inferencePrecision = ov::element::bf16; +// } else { +// inferencePrecision = ov::element::undefined; +// } +// } +// // enable ACL fast math in PERFORMANCE mode +// #if defined(OV_CPU_WITH_ACL) +// if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { +// aclFastMath = true; +// } +// #endif +// // disable dynamic quantization and kv quantization for best accuracy +// if (executionMode == ov::hint::ExecutionMode::ACCURACY) { +// if (!fcDynamicQuantizationGroupSizeSetExplicitly) { +// fcDynamicQuantizationGroupSize = 0; +// } +// if (!kvCachePrecisionSetExplicitly) { +// kvCachePrecision = ov::element::f32; +// } +// } + +// if (!prop.empty()) +// _config.clear(); + +// if (exclusiveAsyncRequests) { // Exclusive request feature disables the streams +// streams = 1; +// streamsChanged = true; +// } + +// #if defined(OV_CPU_WITH_SHL) +// // TODO: multi-stream execution is unsafe when SHL is used: +// // The library uses global static variables as flags and counters. +// streams = 1; +// streamsChanged = true; +// #endif + +// this->modelType = modelType; + +// CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties()); +// updateProperties(); +// } + +// void Config::updateProperties() { +// if (!_config.empty()) +// return; + +// if (collectPerfCounters == true) +// _config.insert({ov::enable_profiling.name(), "YES"}); +// else +// _config.insert({ov::enable_profiling.name(), "NO"}); +// if (exclusiveAsyncRequests == true) +// _config.insert({ov::internal::exclusive_async_requests.name(), "YES"}); +// else +// _config.insert({ov::internal::exclusive_async_requests.name(), "NO"}); + +// _config.insert({ov::device::id.name(), device_id}); + +// _config.insert({ov::hint::performance_mode.name(), ov::util::to_string(hintPerfMode)}); +// _config.insert({ov::hint::num_requests.name(), std::to_string(hintNumRequests)}); +// } + +// void Config::applyRtInfo(const std::shared_ptr& model) { +// // if user sets explicitly, it will be higher priority than rt_info +// if (!kvCachePrecisionSetExplicitly && +// model->has_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()})) { +// this->kvCachePrecision = +// model->get_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()}); +// } +// if (!fcDynamicQuantizationGroupSizeSetExplicitly && +// model->has_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()})) { +// this->fcDynamicQuantizationGroupSize = +// model->get_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()}); +// } +// } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/config_new.hpp b/src/plugins/intel_cpu/src/config_new.hpp new file mode 100644 index 00000000000000..ff6e717fd5cf32 --- /dev/null +++ b/src/plugins/intel_cpu/src/config_new.hpp @@ -0,0 +1,72 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +// #include +// #include +// #include + +// #include "openvino/core/type/element_type.hpp" +// #include "openvino/runtime/properties.hpp" +// #include "openvino/runtime/threading/istreams_executor.hpp" +// #include "openvino/util/common_util.hpp" +// #include "utils/debug_caps_config.h" + +#include "openvino/runtime/plugin_config.hpp" +#include "openvino/runtime/internal_properties.hpp" +#include "openvino/runtime/intel_cpu/properties.hpp" +#include "internal_properties.hpp" + +#include "utils/general_utils.h" + +namespace ov { +namespace intel_cpu { + +struct ExecutionConfig : public ov::PluginConfig { + ExecutionConfig(); + ExecutionConfig(std::initializer_list values) : ExecutionConfig() { set_property(ov::AnyMap(values)); } + explicit ExecutionConfig(const ov::AnyMap& properties) : ExecutionConfig() { set_property(properties); } + explicit ExecutionConfig(const ov::AnyMap::value_type& property) : ExecutionConfig() { set_property(property); } + + ExecutionConfig(const ExecutionConfig& other); + ExecutionConfig& operator=(const ExecutionConfig& other); + + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_GETTERS(__VA_ARGS__) + #include "options.inl" + #undef OV_CONFIG_OPTION + + void finalize_impl(std::shared_ptr context) override; + void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override; + + // TODO: move to GraphContext + ov::threading::IStreamsExecutor::Config streamExecutorConfig; + // TODO: make local for streams calculation logic + int modelPreferThreads = -1; + // TODO: move to GraphContext + enum class ModelType { CNN, LLM, Unknown }; + ModelType modelType = ModelType::Unknown; + + bool DAZOn = false; + + std::vector> streamsRankTable; + int streamsRankLevel = 1; + int numSubStreams = 0; + bool enableNodeSplit = false; + +private: + void set_default_values(); + void apply_user_properties(); + void apply_hints(); + void apply_execution_hints(); + void apply_performance_hints(); + const ov::PluginConfig::OptionsDesc& get_options_desc() const override; + + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + #include "options.inl" + #undef OV_CONFIG_OPTION +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index 3af6a52d5f3342..3b34b7000d5a66 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -672,39 +672,48 @@ std::vector> generate_stream_info(const int streams, Config& config, std::vector>& proc_type_table, int preferred_nthreads_per_stream) { - int model_prefer_threads = preferred_nthreads_per_stream; - proc_type_table = apply_scheduling_core_type(config.schedulingCoreType, proc_type_table); + auto threadsPerStream = config.streamExecutorConfig.get_threads_per_stream(); - proc_type_table = apply_hyper_threading(config.enableHyperThreading, - config.changedHyperThreading, - ov::util::to_string(config.hintPerfMode), + int model_prefer_threads = preferred_nthreads_per_stream; + auto core_type = config.get_scheduling_core_type(); + proc_type_table = apply_scheduling_core_type(core_type, proc_type_table); + config.set_property(ov::hint::scheduling_core_type(core_type)); + + auto enable_hyper_threading = config.get_enable_hyper_threading(); + proc_type_table = apply_hyper_threading(enable_hyper_threading, + config.is_set_by_user(ov::hint::enable_hyper_threading), + ov::util::to_string(config.get_performance_mode()), proc_type_table); + config.set_property(ov::hint::enable_hyper_threading(enable_hyper_threading)); + if (-1 == preferred_nthreads_per_stream) { model_prefer_threads = get_model_prefer_threads(streams, proc_type_table, model, config); } - auto streams_info_table = get_streams_info_table(config.streams, - config.streamsChanged, - config.threads, - config.hintNumRequests, + auto streams_info_table = get_streams_info_table(config.get_num_streams(), + config.is_set_by_user(ov::num_streams), + config.get_inference_num_threads(), + config.get_num_requests(), model_prefer_threads, input_current_socket_id, - ov::util::to_string(config.hintPerfMode), - config.modelDistributionPolicy, + ov::util::to_string(config.get_performance_mode()), + config.get_model_distribution_policy(), proc_type_table); // streams_info_table = {{1, 1, 56, 1, 1}, {-1, 1, 28, 1, 1}, {-1, 1, 28, 0, 0}}; - if (config.modelDistributionPolicy.find(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) != - config.modelDistributionPolicy.end()) { + auto modelDistributionPolicy = config.get_model_distribution_policy(); + if (modelDistributionPolicy.find(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) != modelDistributionPolicy.end()) { config.streamsRankTable = get_streams_rank_table(streams_info_table, config.streamsRankLevel, config.numSubStreams); } + auto enable_cpu_pinning = config.get_enable_cpu_pinning(); auto cpu_pinning = - get_cpu_pinning(config.enableCpuPinning, config.changedCpuPinning, proc_type_table, streams_info_table); + get_cpu_pinning(enable_cpu_pinning, config.is_set_by_user(ov::hint::enable_cpu_pinning), proc_type_table, streams_info_table); + config.set_property(ov::hint::enable_cpu_pinning(cpu_pinning)); config.streamExecutorConfig = IStreamsExecutor::Config{"CPUStreamsExecutor", - config.streams, - config.threadsPerStream, + config.get_num_streams(), + threadsPerStream, ov::hint::SchedulingCoreType::ANY_CORE, false, cpu_pinning, diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index aab78a4d5f15bd..79ce3b61d3685c 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -55,8 +55,8 @@ namespace ov { namespace intel_cpu { Graph::~Graph() { - CPU_DEBUG_CAP_ENABLE(summary_perf(*this)); - CPU_DEBUG_CAP_ENABLE(average_counters(*this)); + CPU_DEBUG_CAP_ENABLE(dump_summary_perf(*this)); + CPU_DEBUG_CAP_ENABLE(dump_average_counters(*this)); } template @@ -1409,9 +1409,9 @@ class UpdateNodes : public UpdateNodesBase { /* group all the profiling macros into a single one * to avoid cluttering a core logic */ #define VERBOSE_PERF_DUMP_ITT_DEBUG_LOG(ittScope, node, config) \ - VERBOSE(node, config.debugCaps.verbose); \ - PERF(node, config.collectPerfCounters); \ - DUMP(node, config.debugCaps, infer_count); \ + VERBOSE(node, config.get_verbose()); \ + PERF(node, config.get_enable_profiling()); \ + DUMP(node, config, infer_count); \ OV_ITT_SCOPED_TASK(ittScope, node->profiling.execute); \ DEBUG_LOG(*node); @@ -1452,7 +1452,7 @@ static int GetNumaNodeId(const GraphContext::CPtr& context) { int numaNodeId = -1; #if defined(__x86_64__) && defined(__linux__) if ((context->getCPUStreamExecutor()) && - (context->getConfig().hintPerfMode == ov::hint::PerformanceMode::LATENCY)) { + (context->getConfig().get_performance_mode() == ov::hint::PerformanceMode::LATENCY)) { numaNodeId = context->getCPUStreamExecutor()->get_numa_node_id(); } #endif @@ -1788,7 +1788,7 @@ bool Graph::InsertNode(NodePtr parent, NodePtr child, NodePtr node, int parentPo void Graph::EnforceInferencePrecision() { CPU_DEBUG_CAP_ENABLE(EnforceInferPrcDebug inferPrecDebug); - const auto inferPrec = getConfig().inferencePrecision; + const auto inferPrec = getConfig().get_inference_precision(); if (one_of(inferPrec, element::f32, element::undefined, ov::element::f16)) return; // nothing to do, only precision reduction is currently allowed diff --git a/src/plugins/intel_cpu/src/graph_context.cpp b/src/plugins/intel_cpu/src/graph_context.cpp index 462cdab2a9b5c0..2707f92f05c9db 100644 --- a/src/plugins/intel_cpu/src/graph_context.cpp +++ b/src/plugins/intel_cpu/src/graph_context.cpp @@ -22,7 +22,7 @@ GraphContext::GraphContext(const Config& config, subMemoryManager(sub_memory_manager), memoryStatesRegister(std::make_shared()), networkMemoryControl(std::make_shared()) { - rtParamsCache = std::make_shared(config.rtCacheCapacity); + rtParamsCache = std::make_shared(config.get_cpu_runtime_cache_capacity()); // primitive/executors can be shared across sub-stream // but scratch pad cannot be shared. numNumaNodes = 1; diff --git a/src/plugins/intel_cpu/src/graph_dumper.cpp b/src/plugins/intel_cpu/src/graph_dumper.cpp index ffd58fdb162899..ef516e47ba0bcc 100644 --- a/src/plugins/intel_cpu/src/graph_dumper.cpp +++ b/src/plugins/intel_cpu/src/graph_dumper.cpp @@ -218,7 +218,7 @@ std::shared_ptr dump_graph_as_ie_ngraph_net(const Graph& graph) { #ifdef CPU_DEBUG_CAPS void serialize(const Graph& graph) { - const std::string& path = graph.getConfig().debugCaps.execGraphPath; + const std::string& path = graph.getConfig().get_exec_graph_path(); if (path.empty()) return; @@ -264,11 +264,11 @@ void serializeToCout(const Graph& graph) { } } -void summary_perf(const Graph& graph) { +void dump_summary_perf(const Graph& graph) { if (!graph.getGraphContext()) { return; } - const std::string& summaryPerf = graph.getConfig().debugCaps.summaryPerf; + const std::string& summaryPerf = graph.getConfig().get_summary_perf(); if (summaryPerf.empty() || !std::stoi(summaryPerf)) return; @@ -349,7 +349,7 @@ void summary_perf(const Graph& graph) { } } -void average_counters(const Graph& graph) { +void dump_average_counters(const Graph& graph) { /** * @todo improve logic for a graph with inner graphs: * - collect counters only for the outer graph if full path is specified @@ -358,7 +358,7 @@ void average_counters(const Graph& graph) { * For example: 0_MyModel.csv */ - const std::string& path = graph.getConfig().debugCaps.averageCountersPath; + const std::string& path = graph.getConfig().get_average_counters(); if (path.empty()) return; diff --git a/src/plugins/intel_cpu/src/graph_dumper.h b/src/plugins/intel_cpu/src/graph_dumper.h index 40af2fd44c61e6..dd97346aad1792 100644 --- a/src/plugins/intel_cpu/src/graph_dumper.h +++ b/src/plugins/intel_cpu/src/graph_dumper.h @@ -14,8 +14,8 @@ namespace intel_cpu { std::shared_ptr dump_graph_as_ie_ngraph_net(const Graph& graph); #ifdef CPU_DEBUG_CAPS void serialize(const Graph& graph); -void summary_perf(const Graph& graph); -void average_counters(const Graph& graph); +void dump_summary_perf(const Graph& graph); +void dump_average_counters(const Graph& graph); #endif // CPU_DEBUG_CAPS } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/internal_properties.hpp b/src/plugins/intel_cpu/src/internal_properties.hpp index 7f3feb59779bd4..12e8c84b79af59 100644 --- a/src/plugins/intel_cpu/src/internal_properties.hpp +++ b/src/plugins/intel_cpu/src/internal_properties.hpp @@ -68,5 +68,27 @@ inline std::istream& operator>>(std::istream& is, SnippetsMode& mode) { */ static constexpr Property snippets_mode{"SNIPPETS_MODE"}; +/** + * @brief Enables fast-math mode for ARM Compute Library (ACL). + */ +static constexpr Property acl_fast_math{"ACL_FAST_MATH"}; + + +enum class BlobDumpFormat { + BIN, + TEXT, +}; + +static constexpr Property verbose{"VERBOSE"}; +static constexpr Property exec_graph_path{"EXEC_GRAPH_PATH"}; +static constexpr Property average_counters{"AVERAGE_COUNTERS"}; +static constexpr Property blob_dump_dir{"BLOB_DUMP_DIR"}; +static constexpr Property blob_dump_format{"BLOB_DUMP_FORMAT"}; +static constexpr Property blob_dump_node_exec_id{"BLOB_DUMP_NODE_EXEC_ID"}; +static constexpr Property blob_dump_node_ports{"BLOB_DUMP_NODE_PORTS"}; +static constexpr Property blob_dump_node_type{"BLOB_DUMP_NODE_TYPE"}; +static constexpr Property blob_dump_node_name{"BLOB_DUMP_NODE_NAME"}; +static constexpr Property summary_perf{"SUMMARY_PERF"}; + } // namespace intel_cpu } // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 2df6c0ae7522cc..46977f9a063904 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -470,8 +470,8 @@ void FullyConnected::initSupportedPrimitiveDescriptors() { attrs.sparseWeights = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS)->getParent(), getOriginalInputPrecisionAtPort(DATA), - context->getConfig().fcSparseWeiDecompressionRate); - attrs.dynamicQuantizationGroupSize = context->getConfig().fcDynamicQuantizationGroupSize; + context->getConfig().get_sparse_weights_decompression_rate()); + attrs.dynamicQuantizationGroupSize = context->getConfig().get_dynamic_quantization_group_size(); attrs.modelType = context->getConfig().modelType; postOps = getPostOps(fusedWith); diff --git a/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp b/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp index b475a602c3cd1a..a3c97a5d0862d1 100644 --- a/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp +++ b/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp @@ -503,7 +503,7 @@ LLMMLP::LLMMLP(const std::shared_ptr& op, const GraphContext::CPtr con : Node(op, context, NgraphShapeInferFactory(op)) { std::string errorMessage; const auto& config = context->getConfig(); - if (!isSupportedOperation(op, errorMessage, config.fcDynamicQuantizationGroupSize)) { + if (!isSupportedOperation(op, errorMessage, config.get_dynamic_quantization_group_size())) { OPENVINO_THROW("CPU: " + errorMessage); } const auto node_mlp = std::dynamic_pointer_cast(op); diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index 5a0bd7a1e3dff1..703260b9c4ed26 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -13,6 +13,7 @@ #include "scaled_attn.h" #include "shape_inference/shape_inference_pass_through.hpp" #include "utils/general_utils.h" +#include "openvino/util/common_util.hpp" using namespace dnnl; diff --git a/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp b/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp index ce7bfae07591d6..f07456aeffe389 100644 --- a/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp +++ b/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp @@ -343,7 +343,7 @@ QKVProjection::QKVProjection(const std::shared_ptr& op, const GraphCon if (concurrency == 0) concurrency = parallel_get_max_threads(); - if (!isSupportedOperation(op, errorMessage, concurrency, config.fcDynamicQuantizationGroupSize)) { + if (!isSupportedOperation(op, errorMessage, concurrency, config.get_dynamic_quantization_group_size())) { OPENVINO_THROW("CPU: " + errorMessage); } const auto node = std::dynamic_pointer_cast(op); diff --git a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp index 7fe3fc8dc5045d..f1eaad0c53be2a 100644 --- a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp +++ b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp @@ -1834,7 +1834,7 @@ void ScaledDotProductAttention::updatePastkv(const MemoryPtr& mem_cur_k, const M ov::element::Type ScaledDotProductAttention::getKVCachePrecision() { ov::element::Type kvcache_precision; auto rtPrecision = getRuntimePrecision(); - auto kvCachePrecisionHint = context->getConfig().kvCachePrecision; + auto kvCachePrecisionHint = context->getConfig().get_kv_cache_precision(); bool enableKVCacheFP16 = m_config.config.fuse_concat && mayiuse(cpu_isa_t::avx2) && rtPrecision != ov::element::bf16 && kvCachePrecisionHint == ov::element::f16; kvcache_precision = enableKVCacheFP16 ? ov::element::f16 : rtPrecision; diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index 2b0c7b55fb043d..f2ff8ad03ff5a1 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -460,9 +460,9 @@ void Subgraph::initSupportedPrimitiveDescriptors() { const auto originalInputPrecision = getOriginalInputPrecisionAtPort(i); const auto precision = ((originalInputPrecision == ov::element::f32) && - one_of(context->getConfig().inferencePrecision, ov::element::bf16, ov::element::f16) && + one_of(context->getConfig().get_inference_precision(), ov::element::bf16, ov::element::f16) && subgraph_attrs->snippet->has_domain_sensitive_ops()) - ? context->getConfig().inferencePrecision + ? context->getConfig().get_inference_precision() : originalInputPrecision; if (supportedPrecisions.count(precision) == 0) OPENVINO_THROW("Subgraph node with name `", getName(), "` doesn't support ", precision, " precision."); @@ -654,7 +654,7 @@ Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() { ov::snippets::pass::Canonicalization, ov::snippets::pass::AnalyzeBroadcastableInputs, broadcastable_inputs); - if (one_of(context->getConfig().inferencePrecision, ov::element::bf16, ov::element::f16) && + if (one_of(context->getConfig().get_inference_precision(), ov::element::bf16, ov::element::f16) && subgraph_attrs->snippet->has_domain_sensitive_ops()) { // enforce BF16 precisions to supported operations // MatMul has to be decomposed to Brgemm operations before enforcement @@ -664,7 +664,7 @@ Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() { ov::snippets::pass::MatMulToBrgemm, pass::EnforcePrecision, element::f32, - context->getConfig().inferencePrecision); + context->getConfig().get_inference_precision()); } SNIPPETS_REGISTER_PASS_RELATIVE_X86_64(Place::Before, ov::snippets::pass::PropagatePrecision, diff --git a/src/plugins/intel_cpu/src/options.inl b/src/plugins/intel_cpu/src/options.inl new file mode 100644 index 00000000000000..a4028b2269d378 --- /dev/null +++ b/src/plugins/intel_cpu/src/options.inl @@ -0,0 +1,76 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// Namespace, property name, default value, [validator], description + +// OV_CONFIG_RELEASE_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty") // ??? +OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f32, + [](ov::element::Type val) { return one_of(val, ov::element::f32, ov::element::bf16, ov::element::f16, ov::element::undefined); }, + "Model floating-point inference precision. Supported values: f32, bf16, f16, undefined") +OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, + "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc") +OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, + "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy") + +OV_CONFIG_RELEASE_OPTION(ov, num_streams, 1, "Defines number of streams to be used for inference") +OV_CONFIG_RELEASE_OPTION(ov, inference_num_threads, 0, "Defines maximum number of threads that can be used for inference tasks") +OV_CONFIG_RELEASE_OPTION(ov::hint, num_requests, 0, "Provides number of requests populated by the application") // TODO: Do we need validator? +OV_CONFIG_RELEASE_OPTION(ov::internal, exclusive_async_requests, false, "") + +OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not") +OV_CONFIG_RELEASE_OPTION(ov::hint, enable_hyper_threading, false, "Defined if hyper threading is used during inference") +OV_CONFIG_RELEASE_OPTION(ov::hint, scheduling_core_type, ov::hint::SchedulingCoreType::ANY_CORE, "Defines CPU core type which can be used during inference") + +OV_CONFIG_RELEASE_OPTION(ov::hint, model_distribution_policy, {}, + [](std::set val) { + for (auto& row : val) { + if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)) + return false; + } + return true; + }, + "Defines model distribution policy for inference with multiple sockets/devices. Supported values: TENSOR_PARALLEL") + + +OV_CONFIG_RELEASE_OPTION(ov::hint, dynamic_quantization_group_size, 0, + "Defines group size for dynamic quantization optimization. Supported values: [0, UINT64_MAX], where 0 - disabled DQ, UINT64_MAX - per-tensor DQ") +OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::f32, + [](ov::element::Type val) { return one_of(val, ov::element::f32, ov::element::f16, ov::element::bf16, ov::element::u8); }, + "Specifies precision for kv cache compression. Supported values: f32, bf16, f16, u8") + +OV_CONFIG_RELEASE_OPTION(ov::intel_cpu, cpu_runtime_cache_capacity, 0, + [](int val) { return val >= 0; }, + "Defines how many records can be stored in the CPU runtime parameters cache per CPU runtime parameter type per stream. Supported values: [0, INT32_MAX]") +OV_CONFIG_RELEASE_OPTION(ov::intel_cpu, sparse_weights_decompression_rate, 1.0f, + [](float val) { return val >= 0.f && val <= 1.f; }, + "Defines threshold for sparse weights decompression feature activation (1.0 means the feature is disabled). Supported values: [0.0f, 1.0f]") +OV_CONFIG_RELEASE_OPTION(ov::intel_cpu, denormals_optimization, nullptr, + "DefineS whether to perform denormals optimization (enables FTZ and DAZ)") + +OV_CONFIG_RELEASE_OPTION(ov::intel_cpu, lp_transforms_mode, false, "Defines if Low Precision Trasformations (LPT) should be enabled") +OV_CONFIG_RELEASE_OPTION(ov::intel_cpu, snippets_mode, SnippetsMode::ENABLE, + [](ov::intel_cpu::SnippetsMode val) { return one_of(val, ov::intel_cpu::SnippetsMode::ENABLE, ov::intel_cpu::SnippetsMode::DISABLE, ov::intel_cpu::SnippetsMode::IGNORE_CALLBACK); }, + "Defines Snippets code generation pipeline mode. Supported values: ov::intel_cpu::SnippetsMode::ENABLE/DISABLE/IGNORE_CALLBACK") + +OV_CONFIG_RELEASE_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin") +OV_CONFIG_RELEASE_OPTION(ov::log, level, ov::log::Level::NO, "Defines Log level") +OV_CONFIG_RELEASE_OPTION(ov::device, id, "", "ID of the current device") + +OV_CONFIG_RELEASE_OPTION(ov, cache_encryption_callbacks, EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model") +OV_CONFIG_RELEASE_OPTION(ov::internal, caching_with_mmap, true, "Defines if caching with mmap should be enabled") + +#if defined(OV_CPU_WITH_ACL) + OV_CONFIG_RELEASE_OPTION(ov::intel_cpu, acl_fast_math, false, "Defines if ACL fast-math mode should be enabled") +#endif + +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, verbose, "0", "Enables logging for debugging purposes.") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, exec_graph_path, "", "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, average_counters, "", "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, blob_dump_dir, "cpu_dump", "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, blob_dump_format, BlobDumpFormat::TEXT, "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, blob_dump_node_exec_id, "", "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, blob_dump_node_ports, "", "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, blob_dump_node_type, "", "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, blob_dump_node_name, "", "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, summary_perf, "", "") \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index db55c728df725e..f587712a9fe3c0 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -135,6 +135,7 @@ Plugin::Plugin() : deviceFullName(getDeviceFullName()), specialSetup(new CPUSpec auto& ov_version = ov::get_openvino_version(); m_compiled_model_runtime_properties["OV_VERSION"] = std::string(ov_version.buildNumber); m_msg_manager = ov::threading::message_manager(); + m_remote_context = std::make_shared(get_device_name()); } Plugin::~Plugin() { @@ -144,24 +145,35 @@ Plugin::~Plugin() { executor_manager()->clear("CPUCallbackExecutor"); } -static bool streamsSet(const ov::AnyMap& config) { - return config.count(ov::num_streams.name()); +namespace { + +ov::RTMap get_rt_info(const ov::Model& model) { + ov::RTMap rt_info; + if (model.has_rt_info("runtime_options")) + rt_info = model.get_rt_info("runtime_options"); + + if (model.has_rt_info("__weights_path")) { + rt_info[ov::weights_path.name()] = model.get_rt_info("__weights_path"); + } + return rt_info; } +} // namespace + void Plugin::get_performance_streams(Config& config, const std::shared_ptr& model) const { - int streams_set = config.streams; + int streams_set = config.get_num_streams(); int streams; - if (config.streamsChanged) { + if (config.is_set_by_user(ov::num_streams)) { streams = streams_set; - } else if (config.hintPerfMode == ov::hint::PerformanceMode::LATENCY) { + } else if (config.get_performance_mode() == ov::hint::PerformanceMode::LATENCY) { streams = 1; - } else if (config.hintPerfMode == ov::hint::PerformanceMode::THROUGHPUT) { + } else if (config.get_performance_mode() == ov::hint::PerformanceMode::THROUGHPUT) { streams = 0; } else { streams = streams_set == 1 ? 0 : streams_set; } - if (!((0 == streams_set) && config.streamsChanged)) { + if (!((0 == streams_set) && config.is_set_by_user(ov::num_streams))) { get_num_streams(streams, model, config); } else { config.streamExecutorConfig = IStreamsExecutor::Config{"CPUStreamsExecutor", streams}; @@ -169,6 +181,11 @@ void Plugin::get_performance_streams(Config& config, const std::shared_ptr& model, bool imported) const { + + conf.streamExecutorConfig.set_property(ov::num_streams.name(), conf.get_property(ov::num_streams.name()).as()); + conf.streamExecutorConfig.set_property(ov::inference_num_threads.name(), conf.get_property(ov::inference_num_threads.name()).as()); + // conf.streamExecutorConfig.set_property(ov::threads_per_stream.name(), conf.get_property(ov::threads_per_stream.name())); + const auto model_prefer_name = std::string("MODEL_PREFER_THREADS"); if (imported && model->has_rt_info("intel_cpu_hints_config")) { // load model_prefer_threads from cache @@ -207,7 +224,7 @@ static Config::ModelType getModelType(const std::shared_ptr& model) } std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, - const ov::AnyMap& orig_config) const { + const ov::AnyMap& properties) const { OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Plugin::compile_model"); CREATE_DEBUG_TIMER(debugLoadTimer); @@ -239,27 +256,26 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< } } - auto config = orig_config; + // auto config = orig_config; const std::shared_ptr cloned_model = model->clone(); - Config::ModelType modelType = getModelType(model); DEBUG_LOG(PrintableModel(*cloned_model, "org_")); // update the props after the perf mode translated to configs // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not? - Config conf = engConfig; - conf.applyRtInfo(cloned_model); - conf.readProperties(config, modelType); + // Config conf = engConfig; + // conf.applyRtInfo(cloned_model); + // conf.readProperties(config, modelType); - Transformations transformations(cloned_model, conf); + Config config = m_plugin_config; + config.set_property(properties, OptionVisibility::RELEASE); + config.modelType = getModelType(model); - transformations.UpToLpt(); + Transformations transformations(cloned_model, config); - calculate_streams(conf, cloned_model); + transformations.UpToLpt(); - if (!conf.cacheEncrypt || !conf.cacheDecrypt) { - conf.cacheEncrypt = codec_xor_str; - conf.cacheDecrypt = codec_xor_str; - } + calculate_streams(config, cloned_model); + config.finalize(get_default_context(), get_rt_info(*model)); transformations.PostLpt(); transformations.Snippets(); @@ -288,85 +304,90 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< } // SSE runtime check is needed for some ATOM machine, which is x86-64 but w/o SSE - static Xbyak::util::Cpu cpu; - if (cpu.has(Xbyak::util::Cpu::tSSE)) { - if (conf.denormalsOptMode == Config::DenormalsOptMode::DO_On) { + if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::sse41)) { + auto denormals_optimization = config.get_denormals_optimization(); + if (denormals_optimization && *denormals_optimization == true) { flush_to_zero(true); - conf.DAZOn = denormals_as_zero(true); - } else if (conf.denormalsOptMode == Config::DenormalsOptMode::DO_Off) { + config.DAZOn = denormals_as_zero(true); + } else if (denormals_optimization && *denormals_optimization == false) { flush_to_zero(false); denormals_as_zero(false); } } - return std::make_shared(cloned_model, shared_from_this(), conf, false); + + return std::make_shared(cloned_model, shared_from_this(), config, false); } void Plugin::set_property(const ov::AnyMap& config) { - // @todo after Legacy configuration is dropped, use some wrapper class to keep both the property and - // "ifSetExplicitly" flag - streamsExplicitlySetForEngine = streamsSet(config); - - engConfig.readProperties(config); + m_plugin_config.set_property(config, OptionVisibility::RELEASE); } ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) const { - if (name == ov::optimal_number_of_infer_requests) { - const auto streams = engConfig.streamExecutorConfig.get_streams(); - return decltype(ov::optimal_number_of_infer_requests)::value_type( - streams); // ov::optimal_number_of_infer_requests has no negative values - } else if (name == ov::num_streams) { - const auto streams = engConfig.streamExecutorConfig.get_streams(); - return decltype(ov::num_streams)::value_type( - streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2) - OPENVINO_SUPPRESS_DEPRECATED_START - } else if (name == ov::affinity) { - const auto affinity = engConfig.threadBindingType; - switch (affinity) { - case IStreamsExecutor::ThreadBindingType::NONE: - return ov::Affinity::NONE; - case IStreamsExecutor::ThreadBindingType::CORES: - return ov::Affinity::CORE; - case IStreamsExecutor::ThreadBindingType::NUMA: - return ov::Affinity::NUMA; - case IStreamsExecutor::ThreadBindingType::HYBRID_AWARE: - return ov::Affinity::HYBRID_AWARE; - } - return ov::Affinity::NONE; - OPENVINO_SUPPRESS_DEPRECATED_END - } else if (name == ov::device::id.name()) { - return decltype(ov::device::id)::value_type{engConfig.device_id}; - } else if (name == ov::inference_num_threads) { - const auto threads = engConfig.streamExecutorConfig.get_threads(); - return decltype(ov::inference_num_threads)::value_type(threads); - } else if (name == ov::enable_profiling.name()) { - const bool perfCount = engConfig.collectPerfCounters; - return decltype(ov::enable_profiling)::value_type(perfCount); - } else if (name == ov::hint::inference_precision) { - return decltype(ov::hint::inference_precision)::value_type(engConfig.inferencePrecision); - } else if (name == ov::hint::performance_mode) { - return engConfig.hintPerfMode; - } else if (name == ov::hint::enable_cpu_pinning) { - const bool pin_value = engConfig.enableCpuPinning; - return decltype(ov::hint::enable_cpu_pinning)::value_type(pin_value); - } else if (name == ov::hint::scheduling_core_type) { - const auto core_type = engConfig.schedulingCoreType; - return core_type; - } else if (name == ov::hint::model_distribution_policy) { - const auto& distribution_policy = engConfig.modelDistributionPolicy; - return distribution_policy; - } else if (name == ov::hint::enable_hyper_threading) { - const bool ht_value = engConfig.enableHyperThreading; - return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value); - } else if (name == ov::hint::num_requests) { - return decltype(ov::hint::num_requests)::value_type(engConfig.hintNumRequests); - } else if (name == ov::hint::execution_mode) { - return engConfig.executionMode; - } else if (name == ov::internal::compiled_model_runtime_properties.name()) { + // if (name == ov::optimal_number_of_infer_requests) { + // const auto streams = engConfig.streamExecutorConfig.get_streams(); + // return decltype(ov::optimal_number_of_infer_requests)::value_type( + // streams); // ov::optimal_number_of_infer_requests has no negative values + // } else if (name == ov::num_streams) { + // const auto streams = engConfig.streamExecutorConfig.get_streams(); + // return decltype(ov::num_streams)::value_type( + // streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2) + // OPENVINO_SUPPRESS_DEPRECATED_START + // } else if (name == ov::affinity) { + // const auto affinity = engConfig.threadBindingType; + // switch (affinity) { + // case IStreamsExecutor::ThreadBindingType::NONE: + // return ov::Affinity::NONE; + // case IStreamsExecutor::ThreadBindingType::CORES: + // return ov::Affinity::CORE; + // case IStreamsExecutor::ThreadBindingType::NUMA: + // return ov::Affinity::NUMA; + // case IStreamsExecutor::ThreadBindingType::HYBRID_AWARE: + // return ov::Affinity::HYBRID_AWARE; + // } + // return ov::Affinity::NONE; + // OPENVINO_SUPPRESS_DEPRECATED_END + // } else if (name == ov::device::id.name()) { + // return decltype(ov::device::id)::value_type{engConfig.device_id}; + // } else if (name == ov::inference_num_threads) { + // const auto threads = engConfig.streamExecutorConfig.get_threads(); + // return decltype(ov::inference_num_threads)::value_type(threads); + // } else if (name == ov::enable_profiling.name()) { + // const bool perfCount = engConfig.collectPerfCounters; + // return decltype(ov::enable_profiling)::value_type(perfCount); + // } else if (name == ov::hint::inference_precision) { + // return decltype(ov::hint::inference_precision)::value_type(engConfig.inferencePrecision); + // } else if (name == ov::hint::performance_mode) { + // return engConfig.hintPerfMode; + // } else if (name == ov::hint::enable_cpu_pinning) { + // const bool pin_value = engConfig.enableCpuPinning; + // return decltype(ov::hint::enable_cpu_pinning)::value_type(pin_value); + // } else if (name == ov::hint::scheduling_core_type) { + // const auto core_type = engConfig.schedulingCoreType; + // return core_type; + // } else if (name == ov::hint::model_distribution_policy) { + // const auto& distribution_policy = engConfig.modelDistributionPolicy; + // return distribution_policy; + // } else if (name == ov::hint::enable_hyper_threading) { + // const bool ht_value = engConfig.enableHyperThreading; + // return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value); + // } else if (name == ov::hint::num_requests) { + // return decltype(ov::hint::num_requests)::value_type(engConfig.hintNumRequests); + // } else if (name == ov::hint::execution_mode) { + // return engConfig.executionMode; + // } else if (name == ov::log::level) { + // return engConfig.logLevel; + // } else if (name == ov::internal::exclusive_async_requests.name()) { + // return engConfig.exclusiveAsyncRequests; + // } else if (name == ov::hint::dynamic_quantization_group_size) { + // return decltype(ov::hint::dynamic_quantization_group_size)::value_type( + // engConfig.fcDynamicQuantizationGroupSize); + // } else if (name == ov::hint::kv_cache_precision) { + // return decltype(ov::hint::kv_cache_precision)::value_type(engConfig.kvCachePrecision); + + if (name == ov::internal::compiled_model_runtime_properties.name()) { auto model_runtime_properties = ov::Any(m_compiled_model_runtime_properties); return decltype(ov::internal::compiled_model_runtime_properties)::value_type( std::move(model_runtime_properties.as())); - } else if (name == ov::log::level) { - return engConfig.logLevel; } else if (name == ov::internal::compiled_model_runtime_properties_supported.name()) { ov::Any res = true; auto it = options.find(ov::internal::compiled_model_runtime_properties.name()); @@ -383,13 +404,7 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) } } return res; - } else if (name == ov::internal::exclusive_async_requests.name()) { - return engConfig.exclusiveAsyncRequests; - } else if (name == ov::hint::dynamic_quantization_group_size) { - return decltype(ov::hint::dynamic_quantization_group_size)::value_type( - engConfig.fcDynamicQuantizationGroupSize); - } else if (name == ov::hint::kv_cache_precision) { - return decltype(ov::hint::kv_cache_precision)::value_type(engConfig.kvCachePrecision); + } return get_ro_property(name, options); } @@ -485,12 +500,6 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio } else if (name == ov::internal::caching_properties) { std::vector cachingProperties = {ov::device::full_name}; return decltype(ov::internal::caching_properties)::value_type(std::move(cachingProperties)); - } else if (name == ov::intel_cpu::denormals_optimization) { - return decltype(ov::intel_cpu::denormals_optimization)::value_type(engConfig.denormalsOptMode == - Config::DenormalsOptMode::DO_On); - } else if (name == ov::intel_cpu::sparse_weights_decompression_rate) { - return decltype(ov::intel_cpu::sparse_weights_decompression_rate)::value_type( - engConfig.fcSparseWeiDecompressionRate); } else if (name == ov::execution_devices) { return decltype(ov::execution_devices)::value_type{get_device_name()}; } else if (name == ov::device::type) { @@ -511,27 +520,27 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio #endif } - OPENVINO_THROW("Cannot get unsupported property: ", name); + return m_plugin_config.get_property(name, OptionVisibility::RELEASE); + // OPENVINO_THROW("Cannot get unsupported property: ", name); } -ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& model, const ov::AnyMap& config) const { +ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& model, const ov::AnyMap& properties) const { WeightsSharing::Ptr fake_w_cache; if (model == nullptr) { OPENVINO_THROW("Only ngraph-based models are supported!"); } - Config conf = engConfig; - Config::ModelType modelType = getModelType(model); - conf.applyRtInfo(model); - conf.readProperties(config, modelType); - - auto context = std::make_shared(conf, fake_w_cache, false); + Config config = m_plugin_config; + config.set_property(properties, OptionVisibility::RELEASE); + config.modelType = getModelType(model); + config.finalize(get_default_context(), get_rt_info(*model)); + auto context = std::make_shared(config, fake_w_cache, false); auto supported = ov::get_supported_nodes( model, [&](std::shared_ptr& model) { - Transformations transformation(model, conf); + Transformations transformation(model, config); transformation.UpToLpt(); transformation.PostLpt(); transformation.Snippets(); @@ -555,22 +564,30 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& return res; } -std::shared_ptr Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const { +std::shared_ptr Plugin::import_model(std::istream& model_stream, const ov::AnyMap& properties) const { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); CacheDecrypt decrypt{codec_xor}; bool decript_from_string = false; - if (config.count(ov::cache_encryption_callbacks.name())) { - auto encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as(); + if (properties.count(ov::cache_encryption_callbacks.name())) { + auto encryption_callbacks = properties.at(ov::cache_encryption_callbacks.name()).as(); decrypt.m_decrypt_str = encryption_callbacks.decrypt; decript_from_string = true; } - auto _config = config; + auto _properties = properties; std::shared_ptr model_buffer; - if (_config.count(ov::internal::cached_model_buffer.name())) { - model_buffer = _config.at(ov::internal::cached_model_buffer.name()).as>(); - _config.erase(ov::internal::cached_model_buffer.name()); + if (_properties.count(ov::internal::cached_model_buffer.name())) { + model_buffer = _properties.at(ov::internal::cached_model_buffer.name()).as>(); + _properties.erase(ov::internal::cached_model_buffer.name()); + } + + // check ov::loaded_from_cache property and erase it to avoid exception in readProperties. + const auto& it = _properties.find(ov::loaded_from_cache.name()); + bool loaded_from_cache = false; + if (it != _properties.end()) { + loaded_from_cache = it->second.as(); + _properties.erase(it); } ModelDeserializer deserializer( @@ -585,23 +602,18 @@ std::shared_ptr Plugin::import_model(std::istream& model_str std::shared_ptr model; deserializer >> model; - Config conf = engConfig; - Config::ModelType modelType = getModelType(model); - conf.applyRtInfo(model); - // check ov::loaded_from_cache property and erase it to avoid exception in readProperties. - const auto& it = _config.find(ov::loaded_from_cache.name()); - bool loaded_from_cache = false; - if (it != _config.end()) { - loaded_from_cache = it->second.as(); - _config.erase(it); - } - conf.readProperties(_config, modelType); + Config config = m_plugin_config; + config.set_property(properties, OptionVisibility::RELEASE); + config.modelType = getModelType(model); // import config props from caching model - calculate_streams(conf, model, true); - auto compiled_model = std::make_shared(model, shared_from_this(), conf, loaded_from_cache); + calculate_streams(config, model, true); + config.finalize(get_default_context(), get_rt_info(*model)); + + auto compiled_model = std::make_shared(model, shared_from_this(), config, loaded_from_cache); return compiled_model; } + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index a67602ec4ece12..a8c596b4acdaad 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -6,6 +6,7 @@ #include "compiled_model.h" #include "openvino/runtime/threading/cpu_message.hpp" +#include "remote_context.hpp" namespace ov { namespace intel_cpu { @@ -43,19 +44,22 @@ class Plugin : public ov::IPlugin { std::shared_ptr m_msg_manager; + std::shared_ptr get_default_context() const { + return m_remote_context; + } + private: ov::Any get_ro_property(const std::string& name, const ov::AnyMap& options) const; void get_performance_streams(Config& config, const std::shared_ptr& model) const; void calculate_streams(Config& conf, const std::shared_ptr& model, bool imported = false) const; - Config engConfig; - /* Explicily configured streams have higher priority than performance hints. - So track if streams is set explicitly (not auto-configured) */ - bool streamsExplicitlySetForEngine = false; + Config m_plugin_config; const std::string deviceFullName; ov::AnyMap m_compiled_model_runtime_properties; std::shared_ptr specialSetup; + + std::shared_ptr m_remote_context; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/remote_context.cpp b/src/plugins/intel_cpu/src/remote_context.cpp new file mode 100644 index 00000000000000..4c457c8d634e3d --- /dev/null +++ b/src/plugins/intel_cpu/src/remote_context.cpp @@ -0,0 +1,28 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "remote_context.hpp" + +namespace ov { +namespace intel_cpu { + +RemoteContextImpl::RemoteContextImpl(const std::string& device_name) : m_device_name(device_name) {} + +const ov::AnyMap& RemoteContextImpl::get_property() const { + return {}; +} + +ov::SoPtr RemoteContextImpl::create_tensor(const ov::element::Type& type, + const ov::Shape& shape, + const ov::AnyMap& params) { + // TODO: should we check `params` are not empty params? + return create_host_tensor(type, shape); +} + +const std::string& RemoteContextImpl::get_device_name() const { + return m_device_name; +} + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/remote_context.hpp b/src/plugins/intel_cpu/src/remote_context.hpp new file mode 100644 index 00000000000000..6cc58e34c96473 --- /dev/null +++ b/src/plugins/intel_cpu/src/remote_context.hpp @@ -0,0 +1,46 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/runtime/iremote_context.hpp" + +namespace ov { +namespace intel_cpu { + +class RemoteContextImpl : public ov::IRemoteContext { +public: + RemoteContextImpl(const std::string& device_name); + + /** + * @brief Returns name of a device on which underlying object is allocated. + * @return A device name string in fully specified format `[.[.]]` (e.g. GPU.0.1). + */ + const std::string& get_device_name() const override; + + /** + * @brief Returns a map of device-specific parameters + * @return A map of name/Any elements. + */ + const ov::AnyMap& get_property() const override; + + /** + * @brief Allocates memory tensor in device memory or wraps user-supplied memory handle + * using the specified tensor description and low-level device-specific parameters. + * Returns a pointer to the object that implements the RemoteTensor interface. + * @param type Defines the element type of the tensor. + * @param shape Defines the shape of the tensor. + * @param params Map of the low-level tensor object parameters. + * @return Pointer to a plugin object that implements the RemoteTensor interface. + */ + ov::SoPtr create_tensor(const ov::element::Type& type, + const ov::Shape& shape, + const ov::AnyMap& params) override; + +private: + std::string m_device_name; +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp index 9793c63de821ec..4159a6a2dcaa0b 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp @@ -61,7 +61,7 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr& model, const C IC, OC, G, - config.inferencePrecision); + config.get_inference_precision()); }); CPU_REGISTER_PASS_X64(manager, pass::ConvertFCToFCQuantizedLegacy); diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 469abbd99eb149..0872472ac47dfa 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -305,9 +305,9 @@ void Transformations::UpToLpt() { levels::int8, levels::int8_narrow_range}; - const bool useLpt = config.lpTransformsMode == Config::LPTransformsMode::On && + const bool useLpt = config.get_lp_transforms_mode() && LowPrecision::isFunctionQuantized(model, supported_fq_levels) && - CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(config.debugCaps, Lpt); + CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(config, Lpt); const auto defaultPrecisions = useLpt ? precision_set::get_int8_support() : std::vector{}; @@ -397,7 +397,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis if (!hasHardwareSupport(ov::element::bf16)) map.insert({ov::element::bf16, ov::element::f32}); // TODO: Remove 'hasHardwareSupport' when all nodes are able to handle f16 properly. - if (!one_of(config.inferencePrecision, element::f16, element::undefined) || !hasHardwareSupport(element::f16)) { + if (!one_of(config.get_inference_precision(), element::f16, element::undefined) || !hasHardwareSupport(element::f16)) { map.insert({ov::element::f16, ov::element::f32}); } return map; @@ -407,7 +407,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis // It cannot be static data, because it may be difference for different inferencePrecision const auto precisions = get_convert_precisions(); - if (config.inferencePrecision == ov::element::f16) { + if (config.get_inference_precision() == ov::element::f16) { precisions_map fp_convert_precision_map = {{ov::element::f32, ov::element::f16}}; #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) type_to_fuse_map fuse_map = {{ov::opset1::FakeQuantize::get_type_info_static(), fuse_type_to_fq}}; @@ -609,12 +609,12 @@ void Transformations::PreLpt(const std::vector& defaultPrecis // snippets pipeline as well, where MVN is decomposed to simple ops, these simple ops will not // tokenized into subgraph again. // CVS-134277 to fully enable GN as snippets to disable this GroupNormalizationDecomposition entirly. - if (node->is_dynamic() || !one_of(config.inferencePrecision, element::f32, element::undefined) || - config.snippetsMode == Config::SnippetsMode::Disable) + if (node->is_dynamic() || !one_of(config.get_inference_precision(), element::f32, element::undefined) || + config.get_snippets_mode() == SnippetsMode::DISABLE) return false; - if (config.snippetsMode != Config::SnippetsMode::IgnoreCallback) { + if (config.get_snippets_mode() != SnippetsMode::IGNORE_CALLBACK) { const auto group_norm = ov::as_type_ptr(node); - if (!group_norm || !implication(config.inferencePrecision == element::undefined, + if (!group_norm || !implication(config.get_inference_precision() == element::undefined, group_norm->get_element_type() == element::f32)) return false; const auto num_groups = static_cast(group_norm->get_num_groups()); @@ -902,12 +902,12 @@ void Transformations::PostLpt() { #if defined(OPENVINO_ARCH_X86_64) // MLP & QKV fusion optimizations is focused on throughput, only enabled on AMX-bf16 & LLM serving use cases. auto can_use_amx_bf16_int8 = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx) && - (config.inferencePrecision == element::bf16); + (config.get_inference_precision() == element::bf16); auto can_use_amx_fp16 = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx_fp16) && - (config.inferencePrecision == element::f16); + (config.get_inference_precision() == element::f16); if (can_use_amx_bf16_int8 || can_use_amx_fp16) { - const auto fcDynamicQuantizationGroupSize = config.fcDynamicQuantizationGroupSize; + const auto fcDynamicQuantizationGroupSize = config.get_dynamic_quantization_group_size(); CPU_REGISTER_PASS_X64(postLPTPassManager, MLPFusion); CPU_SET_CALLBACK_X64( postLPTPassManager, @@ -961,7 +961,7 @@ void Transformations::PostLpt() { ov::intel_cpu::DecomposeRMSNorm); // markup Rope Input when BF16/F16 inference. - if (one_of(config.inferencePrecision, ov::element::bf16, ov::element::f16)) { + if (one_of(config.get_inference_precision(), ov::element::bf16, ov::element::f16)) { CPU_REGISTER_PASS_COMMON(postLPTPassManager, ov::pass::MarkRopeInputsToKeepInMixedPrecision); CPU_REGISTER_PASS_COMMON(postLPTPassManager, ov::pass::MarkFloatingPointRange); } @@ -984,11 +984,11 @@ void Transformations::MainSnippets(void) { return false; }; - if (config.snippetsMode == Config::SnippetsMode::Disable || !is_supported_isa()) + if (config.get_snippets_mode() == SnippetsMode::DISABLE || !is_supported_isa()) return; // TODO [123659] Implement common logic to split optimization and limitation conditions - const auto ignoreCallback = config.snippetsMode == Config::SnippetsMode::IgnoreCallback; + const auto ignoreCallback = config.get_snippets_mode() == SnippetsMode::IGNORE_CALLBACK; // [111813]: At the moment Snippets supports Transpose on output of MHA pattern only if it is an one node between // MatMul and Result. However there may be Convert [f32->bf16] before Result since: @@ -996,7 +996,7 @@ void Transformations::MainSnippets(void) { // - CPU Node Subgraph requires bf16 on output when inference precision is bf16. // To avoid situations when Transpose is not alone node between MatMul and Result, // Plugin disables Transpose tokenization on output - bool mha_token_enable_transpose_on_output = one_of(config.inferencePrecision, element::f32, element::undefined); + bool mha_token_enable_transpose_on_output = one_of(config.get_inference_precision(), element::f32, element::undefined); size_t concurrency = config.streamExecutorConfig.get_threads_per_stream(); if (concurrency == 0) concurrency = parallel_get_max_threads(); @@ -1004,7 +1004,7 @@ void Transformations::MainSnippets(void) { // Runtime caching should be enabled in case of dynamic Subgraphs in CPU Plugin: to reduce overheads of // ShapeInference and CodeGeneration If runtime cache capacity is zero, it means that rtCache won't be used and we // shouldn't tokenize dynamic Subgraphs - it will lead to performance degradations - bool is_dynamic_mha_token_enabled = config.rtCacheCapacity != 0; + bool is_dynamic_mha_token_enabled = config.get_cpu_runtime_cache_capacity() != 0; #if defined(OPENVINO_ARCH_ARM64) // ARM has 32 gprs. After excluding 2 registers for work amounts, 1 register for runtime parameters, 1 platform // register, 3 registers for temporary use, and 2 stack related registers, it has 23 remaining registers. @@ -1036,7 +1036,7 @@ void Transformations::MainSnippets(void) { #if defined(OPENVINO_ARCH_ARM64) CPU_REGISTER_PASS_ARM(snippetsManager, SnippetsMarkSkipped); #else - CPU_REGISTER_PASS_X64(snippetsManager, SnippetsMarkSkipped, config.inferencePrecision == ov::element::bf16); + CPU_REGISTER_PASS_X64(snippetsManager, SnippetsMarkSkipped, config.get_inference_precision() == ov::element::bf16); #endif CPU_DISABLE_PASS_COMMON(snippetsManager, snippets::pass::TokenizeFCSnippets); } @@ -1048,11 +1048,11 @@ void Transformations::MainSnippets(void) { false; #else (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2) && - one_of(config.inferencePrecision, ov::element::f32, element::undefined)) || + one_of(config.get_inference_precision(), ov::element::f32, element::undefined)) || (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) && - one_of(config.inferencePrecision, ov::element::bf16, ov::element::f32, element::undefined)) || + one_of(config.get_inference_precision(), ov::element::bf16, ov::element::f32, element::undefined)) || (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx_fp16) && - one_of(config.inferencePrecision, ov::element::f16)); + one_of(config.get_inference_precision(), ov::element::f16)); #endif if (!isMHASupported) { CPU_DISABLE_PASS_COMMON(snippetsManager, snippets::pass::TokenizeMHASnippets); @@ -1067,13 +1067,13 @@ void Transformations::MainSnippets(void) { const auto in_type0 = matmul->get_input_element_type(0); const auto in_type1 = matmul->get_input_element_type(1); const auto is_fp32 = (in_type0 == ov::element::f32 && in_type1 == ov::element::f32 && - one_of(config.inferencePrecision, element::f32, element::undefined)); + one_of(config.get_inference_precision(), element::f32, element::undefined)); const auto is_fp16 = (in_type0 == ov::element::f16 || in_type1 == ov::element::f16) || - (in_type0 == element::f32 && in_type1 == ov::element::f32 && config.inferencePrecision == ov::element::f16); + (in_type0 == element::f32 && in_type1 == ov::element::f32 && config.get_inference_precision() == ov::element::f16); const auto is_bf16 = (in_type0 == ov::element::bf16 && in_type1 == ov::element::bf16) || ((in_type0 == element::f32 && in_type1 == ov::element::f32 && - config.inferencePrecision == ov::element::bf16)); + config.get_inference_precision() == ov::element::bf16)); const auto is_int8 = in_type0 == ov::element::i8; if (is_fp32) return true; @@ -1240,7 +1240,7 @@ void Transformations::MainSnippets(void) { snippets::pass::TokenizeSnippets); auto mm_supports_transpose_b = [this, ignoreCallback](const std::shared_ptr& n) { - MAYBE_UNUSED(config.inferencePrecision); + MAYBE_UNUSED(config.get_inference_precision()); if (!ignoreCallback) return false; // Note: BrgemmTPP doesn't support transposed KN natively @@ -1260,7 +1260,7 @@ void Transformations::MainSnippets(void) { } ov::element::TypeVector precisions; auto push_precision = [&](const ov::element::Type& precision) { - if (config.inferencePrecision == ov::element::bf16 && precision == ov::element::f32) + if (config.get_inference_precision() == ov::element::bf16 && precision == ov::element::f32) precisions.push_back(ov::element::bf16); else precisions.push_back(precision); @@ -1300,7 +1300,7 @@ void Transformations::PostSnippets(void) { } void Transformations::Snippets(void) { - const bool useSnippets = config.snippetsMode != Config::SnippetsMode::Disable && + const bool useSnippets = config.get_snippets_mode() != SnippetsMode::DISABLE && CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(config.debugCaps, Snippets); if (!useSnippets) return; diff --git a/src/plugins/intel_cpu/src/utils/ngraph_transformation.hpp b/src/plugins/intel_cpu/src/utils/ngraph_transformation.hpp index 356ca2f1141f82..610dcbff1de310 100644 --- a/src/plugins/intel_cpu/src/utils/ngraph_transformation.hpp +++ b/src/plugins/intel_cpu/src/utils/ngraph_transformation.hpp @@ -98,25 +98,25 @@ class TransformationDumper { } // namespace ov // 'EXPAND' wrapper is necessary to ensure __VA_ARGS__ behaves the same on all the platforms -# define CPU_DEBUG_CAP_EXPAND(x) x -# define CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_config, _type) \ - _config.disable.transformations.filter[DebugCapsConfig::TransformationFilter::Type::_type] -# define CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(...) \ - CPU_DEBUG_CAP_EXPAND(!CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(__VA_ARGS__)) -# define CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type) \ - OPENVINO_ASSERT(CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(_this->config.debugCaps, _type)); \ - auto dumperPtr = \ - _this->config.debugCaps.dumpIR.transformations.filter[DebugCapsConfig::TransformationFilter::Type::_type] \ - ? std::unique_ptr( \ - new TransformationDumper(_this->config.debugCaps, \ - DebugCapsConfig::TransformationFilter::Type::_type, \ - _this->model)) \ - : nullptr -# define CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(_this, _type) \ - if (CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_this->config.debugCaps, _type)) \ - return; \ - CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type) -#else +// # define CPU_DEBUG_CAP_EXPAND(x) x +// # define CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_config, _type) \ +// _config.disable.transformations.filter[DebugCapsConfig::TransformationFilter::Type::_type] +// # define CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(...) \ +// CPU_DEBUG_CAP_EXPAND(!CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(__VA_ARGS__)) +// # define CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type) \ +// OPENVINO_ASSERT(CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(_this->config.debugCaps, _type)); \ +// auto dumperPtr = \ +// _this->config.debugCaps.dumpIR.transformations.filter[DebugCapsConfig::TransformationFilter::Type::_type] \ +// ? std::unique_ptr( \ +// new TransformationDumper(_this->config.debugCaps, \ +// DebugCapsConfig::TransformationFilter::Type::_type, \ +// _this->model)) \ +// : nullptr +// # define CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(_this, _type) \ +// if (CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_this->config.debugCaps, _type)) \ +// return; \ +// CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type) +// #else # define CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_config, _type) false # define CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(...) true # define CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type) diff --git a/src/plugins/intel_cpu/src/utils/node_dumper.cpp b/src/plugins/intel_cpu/src/utils/node_dumper.cpp index 6e8a1a798256ed..f1175e09322f16 100644 --- a/src/plugins/intel_cpu/src/utils/node_dumper.cpp +++ b/src/plugins/intel_cpu/src/utils/node_dumper.cpp @@ -24,20 +24,16 @@ static void formatNodeName(std::string& name) { std::replace(name.begin(), name.end(), ':', '-'); } -static bool shouldBeDumped(const NodePtr& node, const DebugCapsConfig& config, const std::string& portsKind) { - const auto& dumpFilters = config.blobDumpFilters; - - if (dumpFilters.empty()) - return false; - - if (dumpFilters.count(DebugCapsConfig::FILTER::BY_PORTS)) { // filter by ports configured - if (dumpFilters.at(DebugCapsConfig::FILTER::BY_PORTS) != "ALL" && - portsKind != dumpFilters.at(DebugCapsConfig::FILTER::BY_PORTS)) +static bool shouldBeDumped(const NodePtr& node, const Config& config, const std::string& portsKind) { + const auto& filter_by_ports = config.get_blob_dump_node_ports(); + if (!filter_by_ports.empty()) { // filter by ports configured + if (filter_by_ports != "ALL" && portsKind != filter_by_ports) return false; } - if (dumpFilters.count(DebugCapsConfig::FILTER::BY_EXEC_ID)) { // filter by exec id configured - std::stringstream ss(dumpFilters.at(DebugCapsConfig::FILTER::BY_EXEC_ID)); + const auto& filter_by_exec_id = config.get_blob_dump_node_exec_id(); + if (!filter_by_exec_id.empty()) { // filter by exec id configured + std::stringstream ss(filter_by_exec_id); int id; bool matched = false; @@ -52,8 +48,9 @@ static bool shouldBeDumped(const NodePtr& node, const DebugCapsConfig& config, c return false; } - if (dumpFilters.count(DebugCapsConfig::FILTER::BY_TYPE)) { // filter by type configured - std::stringstream ss(dumpFilters.at(DebugCapsConfig::FILTER::BY_TYPE)); + const auto& filter_by_type = config.get_blob_dump_node_type(); + if (!filter_by_type.empty()) { // filter by type configured + std::stringstream ss(filter_by_type); std::string type; bool matched = false; @@ -68,24 +65,25 @@ static bool shouldBeDumped(const NodePtr& node, const DebugCapsConfig& config, c return false; } - if (dumpFilters.count(DebugCapsConfig::FILTER::BY_NAME)) { // filter by name configured - if (dumpFilters.at(DebugCapsConfig::FILTER::BY_NAME) != + const auto& filter_by_name = config.get_blob_dump_node_name(); + if (!filter_by_name.empty()) { // filter by name configured + if (filter_by_name != "*" && // to have 'single char' option for matching all the names !std::regex_match(node->getName(), - std::regex(dumpFilters.at(DebugCapsConfig::FILTER::BY_NAME)))) // name does not match + std::regex(filter_by_name))) // name does not match return false; } return true; } -static void dump(const BlobDumper& bd, const std::string& file, const DebugCapsConfig& config) { - switch (config.blobDumpFormat) { - case DebugCapsConfig::FORMAT::BIN: { +static void dump(const BlobDumper& bd, const std::string& file, const Config& config) { + switch (config.get_blob_dump_format()) { + case BlobDumpFormat::BIN: { bd.dump(file); break; } - case DebugCapsConfig::FORMAT::TEXT: { + case BlobDumpFormat::TEXT: { bd.dumpAsTxt(file); break; } @@ -94,7 +92,7 @@ static void dump(const BlobDumper& bd, const std::string& file, const DebugCapsC } } -static void dumpInternalBlobs(const NodePtr& node, const DebugCapsConfig& config) { +static void dumpInternalBlobs(const NodePtr& node, const Config& config) { std::string nodeName = node->getName(); formatNodeName(nodeName); @@ -103,7 +101,7 @@ static void dumpInternalBlobs(const NodePtr& node, const DebugCapsConfig& config for (size_t i = 0; i < internalBlobs.size(); i++) { const auto& blb = internalBlobs[i]; std::string file_name = NameFromType(node->getType()) + "_" + nodeName + "_blb" + std::to_string(i) + ".ieb"; - auto dump_file = config.blobDumpDir + "/#" + std::to_string(node->getExecIndex()) + "_" + file_name; + auto dump_file = config.get_blob_dump_dir() + "/#" + std::to_string(node->getExecIndex()) + "_" + file_name; if (blb->getDesc().getPrecision() == ov::element::u1) continue; @@ -113,7 +111,7 @@ static void dumpInternalBlobs(const NodePtr& node, const DebugCapsConfig& config } } -void dumpInputBlobs(const NodePtr& node, const DebugCapsConfig& config, int count) { +void dumpInputBlobs(const NodePtr& node, const Config& config, int count) { if (!shouldBeDumped(node, config, "IN")) return; @@ -133,7 +131,7 @@ void dumpInputBlobs(const NodePtr& node, const DebugCapsConfig& config, int coun if (file_name.size() > 240) file_name = file_name.substr(file_name.size() - 240); - auto dump_file = config.blobDumpDir + "/#" + exec_order + "_" + file_name; + auto dump_file = config.get_blob_dump_dir() + "/#" + exec_order + "_" + file_name; std::cout << "Dump inputs: " << dump_file << std::endl; auto& desc = prEdge->getMemory().getDesc(); @@ -147,7 +145,7 @@ void dumpInputBlobs(const NodePtr& node, const DebugCapsConfig& config, int coun dumpInternalBlobs(node, config); } -void dumpOutputBlobs(const NodePtr& node, const DebugCapsConfig& config, int count) { +void dumpOutputBlobs(const NodePtr& node, const Config& config, int count) { if (!shouldBeDumped(node, config, "OUT")) return; @@ -166,7 +164,7 @@ void dumpOutputBlobs(const NodePtr& node, const DebugCapsConfig& config, int cou if (file_name.size() > 240) file_name = file_name.substr(file_name.size() - 240); - auto dump_file = config.blobDumpDir + "/#" + exec_order + "_" + file_name; + auto dump_file = config.get_blob_dump_dir() + "/#" + exec_order + "_" + file_name; std::cout << "Dump outputs: " << dump_file << std::endl; auto& desc = childEdge->getMemory().getDesc(); diff --git a/src/plugins/intel_cpu/src/utils/node_dumper.h b/src/plugins/intel_cpu/src/utils/node_dumper.h index bed94930fa02c4..3d69486154ed23 100644 --- a/src/plugins/intel_cpu/src/utils/node_dumper.h +++ b/src/plugins/intel_cpu/src/utils/node_dumper.h @@ -11,16 +11,16 @@ namespace ov { namespace intel_cpu { -void dumpInputBlobs(const NodePtr& node, const DebugCapsConfig& config, int count = -1); -void dumpOutputBlobs(const NodePtr& node, const DebugCapsConfig& config, int count = -1); +void dumpInputBlobs(const NodePtr& node, const Config& config, int count = -1); +void dumpOutputBlobs(const NodePtr& node, const Config& config, int count = -1); class DumpHelper { const NodePtr& node; const int count; - const DebugCapsConfig& config; + const Config& config; public: - explicit DumpHelper(const NodePtr& _node, const DebugCapsConfig& _config, int _count = -1) + explicit DumpHelper(const NodePtr& _node, const Config& _config, int _count = -1) : node(_node), count(_count), config(_config) { diff --git a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt index 40a4fc4a1739c4..9329e415f49f6a 100644 --- a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt +++ b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt @@ -27,7 +27,7 @@ endif() target_link_libraries(cpuUtils PRIVATE ${CPU_UTILS_LINK_LIBRARIES}) target_include_directories(cpuUtils PUBLIC ${CPU_UTILS_INCLUDE_PATHS}) -set(INCLUDES ${CMAKE_CURRENT_SOURCE_DIR} $/src) +set(INCLUDES ${CMAKE_CURRENT_SOURCE_DIR} $/src $) set(DEPENDENCIES openvino_intel_cpu_plugin openvino_template_extension) set(LINK_LIBRARIES funcSharedTests cpuUtils openvino::snippets ov_snippets_models) diff --git a/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake b/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake index 057869a864d87b..04080efdd990f3 100644 --- a/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake +++ b/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake @@ -98,6 +98,7 @@ endfunction() if(ENABLE_CPU_SPECIFIC_TARGET_PER_TEST) create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src ov_cpu_func_subgraph) create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests ov_cpu_func_slt) + create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/behavior/ov_executable_network ov_cpu_func_behavior) endif() # examples of targets: diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp index 73086b78a0de95..5d30ef57700dd2 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp @@ -54,19 +54,6 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable ASSERT_EQ(supportedProperties, expectedSupportedProperties); } -TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkGetROPropertiesDoesNotThrow) { - ov::Core ie; - std::vector properties; - - ov::CompiledModel compiledModel = ie.compile_model(model, deviceName); - - OV_ASSERT_NO_THROW(properties = compiledModel.get_property(ov::supported_properties)); - - for (const auto& property : properties) { - OV_ASSERT_NO_THROW((void)compiledModel.get_property(property)); - } -} - TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSetROPropertiesThrow) { ov::Core ie; std::vector properties; From 0ce36c056047135b0fed3487001ae9ed9645ca4a Mon Sep 17 00:00:00 2001 From: dmitrygo Date: Thu, 2 Jan 2025 13:48:01 +0400 Subject: [PATCH 18/18] some fixes --- src/inference/dev_api/openvino/runtime/plugin_config.hpp | 4 ++-- src/plugins/intel_cpu/src/plugin.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index f8125083a2193b..b6d1a18c01e6f4 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -78,7 +78,7 @@ OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__) namespace ov { -#define ENABLE_DEBUG_CAPS + enum class OptionVisibility : uint8_t { RELEASE = 1 << 0, // Option can be set for any build type via public interface, environment and config file RELEASE_INTERNAL = 1 << 1, // Option can be set for any build type via environment and config file only @@ -247,7 +247,7 @@ class OPENVINO_RUNTIME_API PluginConfig { if (!is_set_by_user(property)) { auto rt_info_val = rt_info.find(property.name()); if (rt_info_val != rt_info.end()) { - set_property(property(rt_info_val->second.template as())); + set_user_property(ov::AnyMap({property(rt_info_val->second.template as())}), OptionVisibility::RELEASE); } } } diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index f587712a9fe3c0..1359138b53fcc2 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -267,7 +267,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< // conf.readProperties(config, modelType); Config config = m_plugin_config; - config.set_property(properties, OptionVisibility::RELEASE); + config.set_user_property(properties, OptionVisibility::RELEASE); config.modelType = getModelType(model); Transformations transformations(cloned_model, config); @@ -319,7 +319,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< } void Plugin::set_property(const ov::AnyMap& config) { - m_plugin_config.set_property(config, OptionVisibility::RELEASE); + m_plugin_config.set_user_property(config, OptionVisibility::RELEASE); } ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) const { @@ -532,7 +532,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& } Config config = m_plugin_config; - config.set_property(properties, OptionVisibility::RELEASE); + config.set_user_property(properties, OptionVisibility::RELEASE); config.modelType = getModelType(model); config.finalize(get_default_context(), get_rt_info(*model)); auto context = std::make_shared(config, fake_w_cache, false); @@ -603,7 +603,7 @@ std::shared_ptr Plugin::import_model(std::istream& model_str deserializer >> model; Config config = m_plugin_config; - config.set_property(properties, OptionVisibility::RELEASE); + config.set_user_property(properties, OptionVisibility::RELEASE); config.modelType = getModelType(model); // import config props from caching model