diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp index 346819eced93e5..36e6a74880e115 100644 --- a/src/core/src/any.cpp +++ b/src/core/src/any.cpp @@ -6,6 +6,7 @@ #include #include +#include "openvino/util/common_util.hpp" namespace { template bool contains_type_index(Container&& types, const std::type_info& user_type) { @@ -200,9 +201,14 @@ namespace util { void Read::operator()(std::istream& is, bool& value) const { std::string str; is >> str; - if (str == "YES") { + + std::set off = {"0", "false", "off", "no"}; + std::set on = {"1", "true", "on", "yes"}; + str = util::to_lower(str); + + if (on.count(str)) { value = true; - } else if (str == "NO") { + } else if (off.count(str)) { value = false; } else { OPENVINO_THROW("Could not convert to bool from string " + str); diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt index 5f40c6fa54b6d0..485acb6c31c955 100644 --- a/src/inference/CMakeLists.txt +++ b/src/inference/CMakeLists.txt @@ -83,11 +83,12 @@ target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE target_include_directories(${TARGET_NAME}_obj PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src" $ + $ $<$:$> # for ov_plugins.hpp $,$>,${CMAKE_CURRENT_BINARY_DIR}/$,${CMAKE_CURRENT_BINARY_DIR}>) -target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt openvino::util openvino::core::dev) +target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt openvino::util openvino::core::dev nlohmann_json::nlohmann_json) ov_mark_target_as_cc(${TARGET_NAME}_obj) # OpenVINO Runtime is public API => need to mark this library as important for ABI free @@ -101,7 +102,7 @@ endif() # Create library file from object library add_library(${TARGET_NAME} INTERFACE) -target_link_libraries(${TARGET_NAME} INTERFACE openvino::runtime) +target_link_libraries(${TARGET_NAME} INTERFACE openvino::runtime openvino::shape_inference) target_include_directories(${TARGET_NAME} INTERFACE $) ov_add_clang_format_target(${TARGET_NAME}_clang FOR_SOURCES ${LIBRARY_SRC} ${LIBRARY_HEADERS} ${PUBLIC_HEADERS}) @@ -122,14 +123,15 @@ if (TBBBIND_2_5_FOUND) endif() target_include_directories(${TARGET_NAME}_s PUBLIC - $) + $ + $) if(WIN32) set_target_properties(${TARGET_NAME}_s PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_s) endif() target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} - openvino::runtime::dev openvino::pugixml) + openvino::runtime::dev openvino::pugixml openvino::shape_inference) target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp new file mode 100644 index 00000000000000..b6d1a18c01e6f4 --- /dev/null +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -0,0 +1,274 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "openvino/core/attribute_visitor.hpp" +#include "openvino/runtime/iremote_context.hpp" +#include "openvino/runtime/properties.hpp" +#include "openvino/core/except.hpp" +#include + +#ifndef COUNT_N + #define COUNT_N(_1, _2, _3, _4, _5, N, ...) N +#endif + +#ifndef COUNT + #define COUNT(...) EXPAND(COUNT_N(__VA_ARGS__, 5, 4, 3, 2, 1)) +#endif + +#ifndef CAT + #define CAT(a, b) a ## b +#endif + +#ifndef EXPAND + #define EXPAND(N) N +#endif + +#define GET_EXCEPT_LAST_IMPL(N, ...) CAT(GET_EXCEPT_LAST_IMPL_, N)(__VA_ARGS__) +#define GET_EXCEPT_LAST_IMPL_2(_0, _1) _0 +#define GET_EXCEPT_LAST_IMPL_3(_0, _1, _2) _0, _1 +#define GET_EXCEPT_LAST_IMPL_4(_0, _1, _2, _3) _0, _1, _2 + +#define GET_EXCEPT_LAST(...) EXPAND(GET_EXCEPT_LAST_IMPL(COUNT(__VA_ARGS__), __VA_ARGS__)) + +#define GET_LAST_IMPL(N, ...) CAT(GET_LAST_IMPL_, N)(__VA_ARGS__) +#define GET_LAST_IMPL_0(_0, ...) _0 +#define GET_LAST_IMPL_1(_0, _1, ...) _1 +#define GET_LAST_IMPL_2(_0, _1, _2, ...) _2 +#define GET_LAST_IMPL_3(_0, _1, _2, _3, ...) _3 +#define GET_LAST_IMPL_4(_0, _1, _2, _3, _4, ...) _4 +#define GET_LAST_IMPL_5(_0, _1, _2, _3, _4, _5, ...) _5 +#define GET_LAST_IMPL_6(_0, _1, _2, _3, _4, _5, _6, ...) _6 + +#define GET_LAST(...) GET_LAST_IMPL(COUNT(__VA_ARGS__), _, __VA_ARGS__ ,,,,,,,,,,,) + +#define OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ + ConfigOption m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)}; + +#define OV_CONFIG_DECLARE_GETTERS(PropertyNamespace, PropertyVar, Visibility, ...) \ + const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() const { \ + if (m_is_finalized) { \ + return m_ ## PropertyVar.value; \ + } else { \ + if (m_user_properties.find(PropertyNamespace::PropertyVar.name()) != m_user_properties.end()) { \ + return m_user_properties.at(PropertyNamespace::PropertyVar.name()).as(); \ + } else { \ + return m_ ## PropertyVar.value; \ + } \ + } \ + } + +#define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \ + m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar; + +#define OV_CONFIG_OPTION_HELP(PropertyNamespace, PropertyVar, Visibility, DefaultValue, ...) \ + { #PropertyNamespace "::" #PropertyVar, PropertyNamespace::PropertyVar.name(), GET_LAST(__VA_ARGS__)}, + +#define OV_CONFIG_RELEASE_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE, __VA_ARGS__) + +#define OV_CONFIG_RELEASE_INTERNAL_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE_INTERNAL, __VA_ARGS__) + +#define OV_CONFIG_DEBUG_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__) + +namespace ov { + +enum class OptionVisibility : uint8_t { + RELEASE = 1 << 0, // Option can be set for any build type via public interface, environment and config file + RELEASE_INTERNAL = 1 << 1, // Option can be set for any build type via environment and config file only + DEBUG = 1 << 2, // Option can be set for debug builds only via environment and config file +#ifdef ENABLE_DEBUG_CAPS + ANY = 0x07, // Any visibility is valid including DEBUG +#else + ANY = 0x03, // Any visibility is valid excluding DEBUG +#endif +}; + +inline OptionVisibility operator&(OptionVisibility a, OptionVisibility b) { + typedef std::underlying_type::type underlying_type; + return static_cast(static_cast(a) & static_cast(b)); +} + +inline OptionVisibility operator|(OptionVisibility a, OptionVisibility b) { + typedef std::underlying_type::type underlying_type; + return static_cast(static_cast(a) | static_cast(b)); +} + +inline OptionVisibility operator~(OptionVisibility a) { + typedef std::underlying_type::type underlying_type; + return static_cast(~static_cast(a)); +} + +inline std::ostream& operator<<(std::ostream& os, const OptionVisibility& visibility) { + switch (visibility) { + case OptionVisibility::RELEASE: os << "RELEASE"; break; + case OptionVisibility::RELEASE_INTERNAL: os << "RELEASE_INTERNAL"; break; + case OptionVisibility::DEBUG: os << "DEBUG"; break; + default: os << "UNKNOWN"; break; + } + + return os; +} + +struct ConfigOptionBase { + explicit ConfigOptionBase() {} + virtual ~ConfigOptionBase() = default; + + virtual void set_any(const ov::Any any) = 0; + virtual ov::Any get_any() const = 0; + virtual bool is_valid_value(ov::Any val) = 0; + virtual OptionVisibility get_visibility() const = 0; +}; + +template +struct ConfigOption : public ConfigOptionBase { + ConfigOption(const T& default_val, std::function validator = nullptr) + : ConfigOptionBase(), value(default_val), validator(validator) {} + T value; + constexpr static const auto visibility = visibility_; + + void set_any(const ov::Any any) override { + if (validator) { + // TODO: is very any way to print option name here? + OPENVINO_ASSERT(validator(any.as()), "Invalid value: ", any.as()); + } + value = any.as(); + } + + ov::Any get_any() const override { + return ov::Any(value); + } + + bool is_valid_value(ov::Any val) override { + try { + auto v = val.as(); + return validator ? validator(v) : true; + } catch (std::exception&) { + return false; + } + } + + OptionVisibility get_visibility() const override { + return visibility; + } + + operator T() const { + return value; + } + + ConfigOption& operator=(const T& val) { + value = val; + return *this; + } + + bool operator==(const T& val) const { + return value == val; + } + + bool operator!=(const T& val) const { + return !(*this == val); + } + +private: + std::function validator; +}; + +// Base class for configuration of plugins +// Implementation should provide a list of properties with default values and validators (optional) +// and prepare a map string property name -> ConfigOptionBase pointer +// For the sake of efficiency, we expect that plugin properties are defined as class members of the derived class +// and accessed directly in the plugin's code (i.e. w/o get_property()/set_property() calls) +// get/set property members are provided to handle external property access +// The class provides a helpers to read the properties from configuration file and from environment variables +// +// Expected order of properties resolution: +// 1. Assign default value for each property per device +// 2. Save user properties passed via Core::set_property() call to user_properties +// 3. Save user properties passed via Core::compile_model() call to user_properties +// 4. Apply RT info properties to user_properties if they were not set by user +// 5. Read and apply properties from the config file as user_properties +// 6. Read and apply properties from the the environment variables as user_properties +// 7. Apply user_properties to actual plugin properties +// 8. Update dependant properties if they were not set by user either way +class OPENVINO_RUNTIME_API PluginConfig { +public: + PluginConfig() {} + virtual ~PluginConfig() = default; + + // Disable copy and move as we need to setup m_options_map properly and ensure that + // values are a part of current config object + PluginConfig(const PluginConfig& other) = delete; + PluginConfig& operator=(const PluginConfig& other) = delete; + PluginConfig(PluginConfig&& other) = delete; + PluginConfig& operator=(PluginConfig&& other) = delete; + + void set_property(const ov::AnyMap& properties); + void set_user_property(const ov::AnyMap& properties, OptionVisibility allowed_visibility = OptionVisibility::ANY, bool throw_on_error = true); + Any get_property(const std::string& name, OptionVisibility allowed_visibility = OptionVisibility::ANY) const; + + template + util::EnableIfAllStringAny set_property(Properties&&... properties) { + set_property(ov::AnyMap{std::forward(properties)...}); + } + + std::string to_string() const; + + void finalize(std::shared_ptr context, const ov::RTMap& rt_info); + + bool visit_attributes(ov::AttributeVisitor& visitor); + + template + bool is_set_by_user(const ov::Property& property) const { + return m_user_properties.find(property.name()) != m_user_properties.end(); + } + +protected: + virtual void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) {} + virtual void apply_debug_options(std::shared_ptr context); + virtual void finalize_impl(std::shared_ptr context) {} + + ConfigOptionBase* get_option_ptr(const std::string& name) const { + auto it = m_options_map.find(name); + // TODO: print more meaningful error message + OPENVINO_ASSERT(it != m_options_map.end(), "Option not found: ", name); + OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); + + return it->second; + } + + template + void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { + if (!is_set_by_user(property)) { + auto rt_info_val = rt_info.find(property.name()); + if (rt_info_val != rt_info.end()) { + set_user_property(ov::AnyMap({property(rt_info_val->second.template as())}), OptionVisibility::RELEASE); + } + } + } + + ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const; + ov::AnyMap read_env(const std::vector& prefixes) const; + void cleanup_unsupported(ov::AnyMap& config) const; + + std::map m_options_map; + + // List of properties explicitly set by user via Core::set_property() or Core::compile_model() or ov::Model's runtime info + ov::AnyMap m_user_properties; + using OptionMapEntry = decltype(m_options_map)::value_type; + + // property variable name, string name, default value, description + using OptionsDesc = std::vector>; + virtual const OptionsDesc& get_options_desc() const { static OptionsDesc empty; return empty; } + const std::string get_help_message(const std::string& name = "") const; + void print_help() const; + + bool m_is_finalized = false; +}; + +} // namespace ov diff --git a/src/inference/include/openvino/runtime/intel_cpu/properties.hpp b/src/inference/include/openvino/runtime/intel_cpu/properties.hpp index 9d63a0e078bdef..ac0365c3409190 100644 --- a/src/inference/include/openvino/runtime/intel_cpu/properties.hpp +++ b/src/inference/include/openvino/runtime/intel_cpu/properties.hpp @@ -12,6 +12,7 @@ #pragma once #include "openvino/runtime/properties.hpp" +#include "ov_optional.hpp" namespace ov { @@ -45,7 +46,7 @@ namespace intel_cpu { * ie.set_property(ov::denormals_optimization(false)); // disable denormals optimization * @endcode */ -static constexpr Property denormals_optimization{"CPU_DENORMALS_OPTIMIZATION"}; +static constexpr Property> denormals_optimization{"CPU_DENORMALS_OPTIMIZATION"}; /** * @brief This property defines threshold for sparse weights decompression feature activation diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 8baea3ed408656..cb7afc22774df3 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -1209,7 +1209,7 @@ namespace streams { * @ingroup ov_runtime_cpp_prop_api */ struct Num { - using Base = std::tuple; //!< NumStreams is representable as int32_t + // using Base = std::tuple; //!< NumStreams is representable as int32_t constexpr Num() : num{-1} {}; diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp new file mode 100644 index 00000000000000..4ef8fe68c8c93f --- /dev/null +++ b/src/inference/src/dev/plugin_config.cpp @@ -0,0 +1,328 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/runtime/plugin_config.hpp" +#include "openvino/core/any.hpp" +#include "openvino/core/except.hpp" +#include "openvino/runtime/device_id_parser.hpp" +#include "openvino/util/common_util.hpp" +#include "openvino/util/env_util.hpp" +#include +#include +#include + +#ifdef JSON_HEADER +# include +#else +# include +#endif + +#ifdef _WIN32 +#include +#else +#include +#include +#endif + +namespace { +size_t get_terminal_width() { + const size_t default_width = 120; +#ifdef _WIN32 + CONSOLE_SCREEN_BUFFER_INFO csbi; + if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) { + return csbi.srWindow.Right - csbi.srWindow.Left + 1; + } else { + return default_width; + } +#else + struct winsize w; + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) { + return w.ws_col; + } else { + return default_width; + } +#endif // _WIN32 +} +} + +namespace ov { + + +ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility allowed_visibility) const { + if (m_user_properties.find(name) != m_user_properties.end()) { + return m_user_properties.at(name); + } + + auto option = get_option_ptr(name); + if ((allowed_visibility & option->get_visibility()) != option->get_visibility()) { + OPENVINO_THROW("Couldn't get unknown property: ", name); + } + + return option->get_any(); +} + +void PluginConfig::set_property(const ov::AnyMap& config) { + OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); + + for (auto& kv : config) { + auto& name = kv.first; + auto& val = kv.second; + + get_option_ptr(name)->set_any(val); + } +} + +void PluginConfig::set_user_property(const ov::AnyMap& config, OptionVisibility allowed_visibility, bool throw_on_error) { + OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); + + for (auto& kv : config) { + auto& name = kv.first; + auto val = kv.second; + + // [WA] ov::Any cannot be casted from int to streams::Num + // Can be reproduced with CpuExecNetworkCheckModelStreamsHasHigherPriorityThanThroughputHint test + // Should be fixed before the merge + if (name == ov::num_streams.name()) { + val = val.as(); + } + + auto option = get_option_ptr(name); + if ((allowed_visibility & option->get_visibility()) != option->get_visibility()) { + if (throw_on_error) + OPENVINO_THROW("Couldn't set unknown property: ", name); + else + continue; + } + if (!option->is_valid_value(val)) { + if (throw_on_error) + OPENVINO_THROW("Invalid value: ", val.as(), " for property: ", name, "\nProperty description: ", get_help_message(name)); + else + continue; + } + + m_user_properties[name] = val; + } +} + +void PluginConfig::finalize(std::shared_ptr context, const ov::RTMap& rt_info) { + if (m_is_finalized) + return; + + apply_rt_info(context, rt_info); + apply_debug_options(context); + // Copy internal properties before applying hints to ensure that + // a property set by hint won't be overriden by a value in user config. + // E.g num_streams=AUTO && hint=THROUGHPUT + // If we apply hints first and then copy all values from user config to internal one, + // then we'll get num_streams=AUTO in final config while some integer number is expected. + for (const auto& prop : m_user_properties) { + auto& option = m_options_map.at(prop.first); + option->set_any(prop.second); + } + + finalize_impl(context); + + // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization + m_user_properties.clear(); + + m_is_finalized = true; +} + +bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) { + for (const auto& prop : m_user_properties) { + auto val = prop.second.as(); + visitor.on_attribute(prop.first + "__user", val); + } + for (const auto& prop : m_options_map) { + auto val = prop.second->get_any().as(); + visitor.on_attribute(prop.first + "__internal", val); + } + visitor.on_attribute("is_finalized", m_is_finalized); + + return true; +} + +void PluginConfig::apply_debug_options(std::shared_ptr context) { + const bool throw_on_error = false; + + if (context) { + ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); + cleanup_unsupported(config_properties); +#ifdef ENABLE_DEBUG_CAPS + for (auto& prop : config_properties) { + std::cout << "Non default config value for " << prop.first << " = " << prop.second.as() << std::endl; + } +#endif + set_user_property(config_properties, OptionVisibility::ANY, throw_on_error); + } + + ov::AnyMap env_properties = read_env({"OV_"}); + cleanup_unsupported(env_properties); +#ifdef ENABLE_DEBUG_CAPS + for (auto& prop : env_properties) { + std::cout << "Non default env value for " << prop.first << " = " << prop.second.as() << std::endl; + } +#endif + set_user_property(env_properties, OptionVisibility::ANY, throw_on_error); +} + +ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const { + ov::AnyMap config; + + std::ifstream ifs(filename); + if (!ifs.is_open()) { + return config; + } + + nlohmann::json json_config; + try { + ifs >> json_config; + } catch (const std::exception& e) { + return config; + } + + DeviceIDParser parser(target_device_name); + for (auto item = json_config.cbegin(), end = json_config.cend(); item != end; ++item) { + const std::string& device_name = item.key(); + if (DeviceIDParser(device_name).get_device_name() != parser.get_device_name()) + continue; + + const auto& item_value = item.value(); + for (auto option = item_value.cbegin(), item_value_end = item_value.cend(); option != item_value_end; ++option) { + config[option.key()] = option.value().get(); + } + } + + return config; +} + +ov::AnyMap PluginConfig::read_env(const std::vector& prefixes) const { + ov::AnyMap config; + + for (auto& kv : m_options_map) { + for (auto& prefix : prefixes) { + auto var_name = prefix + kv.first; + const auto& val = ov::util::getenv_string(var_name.c_str()); + + if (!val.empty()) { + if (dynamic_cast*>(kv.second) != nullptr) { + const std::set off = {"0", "false", "off", "no"}; + const std::set on = {"1", "true", "on", "yes"}; + + const auto& val_lower = ov::util::to_lower(val); + if (off.count(val_lower)) { + config[kv.first] = false; + } else if (on.count(val_lower)) { + config[kv.first] = true; + } else { + OPENVINO_THROW("Unexpected value for boolean property: ", val); + } + } else { + config[kv.first] = val; + } + break; + } + } + } + + return config; +} + +void PluginConfig::cleanup_unsupported(ov::AnyMap& config) const { + for (auto it = config.begin(); it != config.end();) { + auto& name = it->first; + auto opt_it = std::find_if(m_options_map.begin(), m_options_map.end(), [&](const OptionMapEntry& o) { return o.first == name; }); + if (opt_it == m_options_map.end()) { + it = config.erase(it); + } else { + ++it; + } + } +} + +std::string PluginConfig::to_string() const { + std::stringstream ss; + + ss << "-----------------------------------------\n"; + ss << "PROPERTIES:\n"; + + for (const auto& option : m_options_map) { + ss << "\t" << option.first << ": " << option.second->get_any().as() << std::endl; + } + ss << "USER PROPERTIES:\n"; + for (const auto& user_prop : m_user_properties) { + ss << "\t" << user_prop.first << ": " << user_prop.second.as() << std::endl; + } + + return ss.str(); +} + +void PluginConfig::print_help() const { + auto format_text = [](const std::string& cpp_name, const std::string& str_name, const std::string& desc, size_t max_name_width, size_t max_width) { + std::istringstream words(desc); + std::ostringstream formatted_text; + std::string word; + std::vector words_vec; + + while (words >> word) { + words_vec.push_back(word); + } + + size_t j = 0; + size_t count_of_desc_lines = (desc.length() + max_width - 1) / max_width; + for (size_t i = 0 ; i < std::max(2, count_of_desc_lines); i++) { + if (i == 0) { + formatted_text << std::left << std::setw(max_name_width) << cpp_name; + } else if (i == 1) { + formatted_text << std::left << std::setw(max_name_width) << str_name; + } else { + formatted_text << std::left << std::setw(max_name_width) << ""; + } + + formatted_text << " | "; + + size_t line_length = max_name_width + 3; + for (; j < words_vec.size();) { + line_length += words_vec[j].size() + 1; + if (line_length > max_width) { + break; + } else { + formatted_text << words_vec[j] << " "; + } + j++; + } + formatted_text << "\n"; + } + return formatted_text.str(); + }; + + const auto& options_desc = get_options_desc(); + std::stringstream ss; + auto max_name_length_item = std::max_element(options_desc.begin(), options_desc.end(), + [](const OptionsDesc::value_type& a, const OptionsDesc::value_type& b){ + return std::get<0>(a).size() < std::get<0>(b).size(); + }); + + const size_t max_name_width = static_cast(std::get<0>(*max_name_length_item).size() + std::get<1>(*max_name_length_item).size()); + const size_t terminal_width = get_terminal_width(); + ss << std::left << std::setw(max_name_width) << "Option name" << " | " << " Description " << "\n"; + ss << std::left << std::setw(terminal_width) << std::setfill('-') << "" << "\n"; + for (auto& kv : options_desc) { + ss << format_text(std::get<0>(kv), std::get<1>(kv), std::get<2>(kv), max_name_width, terminal_width) << "\n"; + } + + std::cout << ss.str(); +} + +const std::string PluginConfig::get_help_message(const std::string& name) const { + const auto& options_desc = get_options_desc(); + auto it = std::find_if(options_desc.begin(), options_desc.end(), [&](const OptionsDesc::value_type& v) { return std::get<1>(v) == name; }); + if (it != options_desc.end()) { + return std::get<2>(*it); + } + + return ""; +} + +} // namespace ov diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp new file mode 100644 index 00000000000000..d2c99585ab015b --- /dev/null +++ b/src/inference/tests/unit/config_test.cpp @@ -0,0 +1,216 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/core/any.hpp" +#include "openvino/runtime/plugin_config.hpp" + +#include +#include + +using namespace ::testing; +using namespace ov; + +static constexpr Property unsupported_property{"UNSUPPORTED_PROPERTY"}; +static constexpr Property bool_property{"BOOL_PROPERTY"}; +static constexpr Property int_property{"INT_PROPERTY"}; +static constexpr Property high_level_property{"HIGH_LEVEL_PROPERTY"}; +static constexpr Property low_level_property{"LOW_LEVEL_PROPERTY"}; +static constexpr Property release_internal_property{"RELEASE_INTERNAL_PROPERTY"}; +static constexpr Property debug_property{"DEBUG_PROPERTY"}; + + +struct EmptyTestConfig : public ov::PluginConfig { + std::vector get_supported_properties() const { + std::vector supported_properties; + for (const auto& kv : m_options_map) { + supported_properties.push_back(kv.first); + } + return supported_properties; + } +}; + +struct NotEmptyTestConfig : public ov::PluginConfig { + NotEmptyTestConfig() { + #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + OV_CONFIG_RELEASE_OPTION(, bool_property, true, "") + OV_CONFIG_RELEASE_OPTION(, int_property, -1, "") + OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "") + OV_CONFIG_RELEASE_OPTION(, low_level_property, "", "") + OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "") + OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "") + #undef OV_CONFIG_OPTION + } + + NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() { + m_user_properties = other.m_user_properties; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); + } + } + + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) OV_CONFIG_DECLARE_GETTERS(__VA_ARGS__) + OV_CONFIG_RELEASE_OPTION(, bool_property, true, "") + OV_CONFIG_RELEASE_OPTION(, int_property, -1, "") + OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "") + OV_CONFIG_RELEASE_OPTION(, low_level_property, "", "") + OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "") + OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "") + #undef OV_CONFIG_OPTION + + std::vector get_supported_properties() const { + std::vector supported_properties; + for (const auto& kv : m_options_map) { + supported_properties.push_back(kv.first); + } + return supported_properties; + } + + void finalize_impl(std::shared_ptr context) override { + if (!is_set_by_user(low_level_property)) { + m_low_level_property.value = m_high_level_property.value; + } + } + + void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override { + apply_rt_info_property(high_level_property, rt_info); + } + + using ov::PluginConfig::get_option_ptr; + using ov::PluginConfig::is_set_by_user; +}; + +TEST(plugin_config, can_create_empty_config) { + ASSERT_NO_THROW( + EmptyTestConfig cfg; + ASSERT_EQ(cfg.get_supported_properties().size(), 0); + ); +} + +TEST(plugin_config, can_create_not_empty_config) { + ASSERT_NO_THROW( + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_supported_properties().size(), 6); + ); +} + +TEST(plugin_config, can_set_get_property) { + NotEmptyTestConfig cfg; + ASSERT_NO_THROW(cfg.get_bool_property()); + ASSERT_EQ(cfg.get_bool_property(), true); + ASSERT_NO_THROW(cfg.set_property(bool_property(false))); + ASSERT_EQ(cfg.get_bool_property(), false); +} + +TEST(plugin_config, throw_for_unsupported_property) { + NotEmptyTestConfig cfg; + ASSERT_ANY_THROW(cfg.get_property(unsupported_property.name())); + ASSERT_ANY_THROW(cfg.set_property(unsupported_property(10.0f))); +} + +TEST(plugin_config, can_direct_access_to_properties) { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.m_int_property.value, cfg.get_int_property()); + ASSERT_NO_THROW(cfg.set_property(int_property(1))); + ASSERT_EQ(cfg.m_int_property.value, -1); // user property doesn't impact member value until finalize() is called + + cfg.m_int_property.value = 2; + ASSERT_EQ(cfg.get_int_property(), 1); // stil 1 as user property was set previously +} + +TEST(plugin_config, finalization_updates_member) { + NotEmptyTestConfig cfg; + ASSERT_NO_THROW(cfg.set_property(bool_property(false))); + ASSERT_EQ(cfg.m_bool_property.value, true); // user property doesn't impact member value until finalize() is called + + cfg.finalize(nullptr, {}); + + ASSERT_EQ(cfg.m_bool_property.value, false); // now the value has changed +} + +TEST(plugin_config, get_property_before_finalization_returns_user_property_if_set) { + NotEmptyTestConfig cfg; + + ASSERT_EQ(cfg.get_bool_property(), true); // default value + ASSERT_EQ(cfg.m_bool_property.value, true); // default value + + cfg.m_bool_property.value = false; // update member directly + ASSERT_EQ(cfg.get_bool_property(), false); // OK, return the class member value as no user property was set + + ASSERT_NO_THROW(cfg.set_property(bool_property(true))); + ASSERT_TRUE(cfg.is_set_by_user(bool_property)); + ASSERT_EQ(cfg.get_bool_property(), true); // now user property value is returned + ASSERT_EQ(cfg.m_bool_property.value, false); // but class member is not updated + + cfg.finalize(nullptr, {}); + ASSERT_EQ(cfg.get_bool_property(), cfg.m_bool_property.value); // equal after finalization + ASSERT_FALSE(cfg.is_set_by_user(bool_property)); // and user property is cleared +} + +TEST(plugin_config, finalization_updates_dependant_properties) { + NotEmptyTestConfig cfg; + + cfg.set_property(high_level_property("value1")); + ASSERT_TRUE(cfg.is_set_by_user(high_level_property)); + ASSERT_FALSE(cfg.is_set_by_user(low_level_property)); + + cfg.finalize(nullptr, {}); + ASSERT_EQ(cfg.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg.m_low_level_property.value, "value1"); + ASSERT_FALSE(cfg.is_set_by_user(high_level_property)); + ASSERT_FALSE(cfg.is_set_by_user(low_level_property)); +} + +TEST(plugin_config, can_set_property_from_rt_info) { + NotEmptyTestConfig cfg; + + RTMap rt_info = { + {high_level_property.name(), "value1"}, + {int_property.name(), 10} // int_property is not applied from rt info + }; + + // default values + ASSERT_EQ(cfg.m_high_level_property.value, ""); + ASSERT_EQ(cfg.m_low_level_property.value, ""); + ASSERT_EQ(cfg.m_int_property.value, -1); + + cfg.finalize(nullptr, rt_info); + + ASSERT_EQ(cfg.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg.m_low_level_property.value, "value1"); // dependant is updated too + ASSERT_EQ(cfg.m_int_property.value, -1); // still default +} + +TEST(plugin_config, can_copy_config) { + NotEmptyTestConfig cfg1; + + cfg1.m_high_level_property.value = "value1"; + cfg1.m_low_level_property.value = "value2"; + cfg1.m_int_property.value = 1; + cfg1.set_property(bool_property(false)); + + NotEmptyTestConfig cfg2 = cfg1; + ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg2.m_low_level_property.value, "value2"); + ASSERT_EQ(cfg2.m_int_property.value, 1); + ASSERT_EQ(cfg2.get_bool_property(), false); // ensure user properties are copied too + + // check that cfg1 modification doesn't impact a copy + cfg1.set_property(high_level_property("value3")); + cfg1.m_int_property.value = 3; + ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg2.m_int_property.value, 1); +} + +TEST(plugin_config, set_property_throw_for_non_release_options) { + NotEmptyTestConfig cfg; + ASSERT_ANY_THROW(cfg.set_property(release_internal_property(10))); + ASSERT_ANY_THROW(cfg.set_property(debug_property(10))); +} + +TEST(plugin_config, visibility_is_correct) { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_option_ptr(release_internal_property.name())->get_visibility(), OptionVisibility::RELEASE_INTERNAL); + ASSERT_EQ(cfg.get_option_ptr(debug_property.name())->get_visibility(), OptionVisibility::DEBUG); + ASSERT_EQ(cfg.get_option_ptr(int_property.name())->get_visibility(), OptionVisibility::RELEASE); +} diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt index aa6ce49a051e00..0ecd4435507da4 100644 --- a/src/plugins/intel_cpu/CMakeLists.txt +++ b/src/plugins/intel_cpu/CMakeLists.txt @@ -373,11 +373,11 @@ if(BUILD_SHARED_LIBS) PRIVATE $ $ - $ $ $ PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src + $ $) target_include_directories(${TARGET_NAME}_obj SYSTEM PUBLIC $) diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index f81c7dbbced99d..ab7091d2988fd8 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -53,14 +53,15 @@ CompiledModel::CompiledModel(const std::shared_ptr& model, m_cfg{cfg}, m_name{model->get_name()}, m_loaded_from_cache(loaded_from_cache), - m_sub_memory_manager(sub_memory_manager) { + m_sub_memory_manager(sub_memory_manager), + m_model_name(model->get_friendly_name()) { m_mutex = std::make_shared(); const auto& core = m_plugin->get_core(); if (!core) OPENVINO_THROW("Unable to get API version. Core is unavailable"); IStreamsExecutor::Config executor_confg; - if (cfg.exclusiveAsyncRequests) { + if (cfg.get_exclusive_async_requests()) { // special case when all InferRequests are muxed into a single queue m_task_executor = m_plugin->get_executor_manager()->get_executor("CPU"); } else { @@ -156,7 +157,7 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const { GraphContext::Ptr ctx; { std::lock_guard lock{*m_mutex.get()}; - auto isQuantizedFlag = (m_cfg.lpTransformsMode == Config::On) && + auto isQuantizedFlag = (m_cfg.get_lp_transforms_mode()) && ov::pass::low_precision::LowPrecision::isFunctionQuantized(m_model); ctx = std::make_shared(m_cfg, @@ -219,16 +220,16 @@ ov::Any CompiledModel::get_property(const std::string& name) const { return m_loaded_from_cache; } - Config engConfig = get_graph()._graph.getConfig(); - auto option = engConfig._config.find(name); - if (option != engConfig._config.end()) { - return option->second; - } + // Config engConfig = get_graph()._graph.getConfig(); + // auto option = engConfig._config.find(name); + // if (option != engConfig._config.end()) { + // return option->second; + // } - // @todo Can't we just use local copy (_cfg) instead? - auto graphLock = get_graph(); - const auto& graph = graphLock._graph; - const auto& config = graph.getConfig(); + // // @todo Can't we just use local copy (_cfg) instead? + // auto graphLock = get_graph(); + // const auto& graph = graphLock._graph; + // const auto& config = graph.getConfig(); auto RO_property = [](const std::string& propertyName) { return ov::PropertyName(propertyName, ov::PropertyMutability::RO); @@ -266,78 +267,22 @@ ov::Any CompiledModel::get_property(const std::string& name) const { } if (name == ov::model_name) { - // @todo Does not seem ok to 'dump()' the whole graph everytime in order to get a name - const std::string modelName = graph.dump()->get_friendly_name(); - return decltype(ov::model_name)::value_type(modelName); + return decltype(ov::model_name)::value_type {m_model_name}; + } else if (name == ov::loaded_from_cache) { + return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache}; } else if (name == ov::optimal_number_of_infer_requests) { - const auto streams = config.streamExecutorConfig.get_streams(); + const auto streams = m_cfg.streamExecutorConfig.get_streams(); return decltype(ov::optimal_number_of_infer_requests)::value_type( streams > 0 ? streams : 1); // ov::optimal_number_of_infer_requests has no negative values - } else if (name == ov::num_streams) { - const auto streams = config.streamExecutorConfig.get_streams(); - return decltype(ov::num_streams)::value_type( - streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2) - OPENVINO_SUPPRESS_DEPRECATED_START - } else if (name == ov::affinity) { - const auto affinity = config.threadBindingType; - switch (affinity) { - case IStreamsExecutor::ThreadBindingType::NONE: - return ov::Affinity::NONE; - case IStreamsExecutor::ThreadBindingType::CORES: - return ov::Affinity::CORE; - case IStreamsExecutor::ThreadBindingType::NUMA: - return ov::Affinity::NUMA; - case IStreamsExecutor::ThreadBindingType::HYBRID_AWARE: - return ov::Affinity::HYBRID_AWARE; - } - return ov::Affinity::NONE; - OPENVINO_SUPPRESS_DEPRECATED_END - } else if (name == ov::inference_num_threads) { - const auto num_threads = config.streamExecutorConfig.get_threads(); - return decltype(ov::inference_num_threads)::value_type(num_threads); - } else if (name == ov::enable_profiling.name()) { - const bool perfCount = config.collectPerfCounters; - return decltype(ov::enable_profiling)::value_type(perfCount); - } else if (name == ov::hint::inference_precision) { - return decltype(ov::hint::inference_precision)::value_type(config.inferencePrecision); - } else if (name == ov::hint::performance_mode) { - return decltype(ov::hint::performance_mode)::value_type(config.hintPerfMode); - } else if (name == ov::log::level) { - return decltype(ov::log::level)::value_type(config.logLevel); - } else if (name == ov::hint::enable_cpu_pinning.name()) { - const bool use_pin = config.enableCpuPinning; - return decltype(ov::hint::enable_cpu_pinning)::value_type(use_pin); - } else if (name == ov::hint::scheduling_core_type) { - const auto stream_mode = config.schedulingCoreType; - return stream_mode; - } else if (name == ov::hint::model_distribution_policy) { - const auto& distribution_policy = config.modelDistributionPolicy; - return distribution_policy; - } else if (name == ov::hint::enable_hyper_threading.name()) { - const bool use_ht = config.enableHyperThreading; - return decltype(ov::hint::enable_hyper_threading)::value_type(use_ht); - } else if (name == ov::hint::execution_mode) { - return config.executionMode; - } else if (name == ov::hint::num_requests) { - return decltype(ov::hint::num_requests)::value_type(config.hintNumRequests); } else if (name == ov::execution_devices) { return decltype(ov::execution_devices)::value_type{m_plugin->get_device_name()}; - } else if (name == ov::intel_cpu::denormals_optimization) { - return decltype(ov::intel_cpu::denormals_optimization)::value_type(config.denormalsOptMode == - Config::DenormalsOptMode::DO_On); - } else if (name == ov::intel_cpu::sparse_weights_decompression_rate) { - return decltype(ov::intel_cpu::sparse_weights_decompression_rate)::value_type( - config.fcSparseWeiDecompressionRate); - } else if (name == ov::hint::dynamic_quantization_group_size) { - return decltype(ov::hint::dynamic_quantization_group_size)::value_type(config.fcDynamicQuantizationGroupSize); - } else if (name == ov::hint::kv_cache_precision) { - return decltype(ov::hint::kv_cache_precision)::value_type(config.kvCachePrecision); } - OPENVINO_THROW("Unsupported property: ", name); + + return m_cfg.get_property(name); } void CompiledModel::export_model(std::ostream& modelStream) const { - ModelSerializer serializer(modelStream, m_cfg.cacheEncrypt); + ModelSerializer serializer(modelStream, m_cfg.get_cache_encryption_callbacks().encrypt); serializer << m_model; } diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h index f7d2903b0526cf..e5251853fd95cb 100644 --- a/src/plugins/intel_cpu/src/compiled_model.h +++ b/src/plugins/intel_cpu/src/compiled_model.h @@ -98,6 +98,8 @@ class CompiledModel : public ov::ICompiledModel { std::vector> m_sub_compiled_models; std::shared_ptr m_sub_memory_manager = nullptr; bool m_has_sub_compiled_models = false; + + std::string m_model_name; }; // This class provides safe access to the internal CompiledModel structures and helps to decouple SyncInferRequest and diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 7d1ee05897e81d..b2d6658bc598b1 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -21,456 +21,456 @@ namespace ov { namespace intel_cpu { -using namespace ov::threading; -using namespace dnnl::impl::cpu::x64; +// using namespace ov::threading; +// using namespace dnnl::impl::cpu::x64; -Config::Config() { - // this is default mode -#if defined(__APPLE__) || defined(_WIN32) - threadBindingType = IStreamsExecutor::NONE; -#else - threadBindingType = IStreamsExecutor::CORES; -#endif +// Config::Config() { +// // this is default mode +// #if defined(__APPLE__) || defined(_WIN32) +// threadBindingType = IStreamsExecutor::NONE; +// #else +// threadBindingType = IStreamsExecutor::CORES; +// #endif -// for the TBB code-path, additional configuration depending on the OS and CPU types -#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO) -# if defined(__APPLE__) || defined(_WIN32) - // 'CORES' is not implemented for Win/MacOS; so the 'NONE' or 'NUMA' is default - auto numaNodes = get_available_numa_nodes(); - if (numaNodes.size() > 1) { - threadBindingType = IStreamsExecutor::NUMA; - } else { - threadBindingType = IStreamsExecutor::NONE; - } -# endif +// // for the TBB code-path, additional configuration depending on the OS and CPU types +// #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO) +// # if defined(__APPLE__) || defined(_WIN32) +// // 'CORES' is not implemented for Win/MacOS; so the 'NONE' or 'NUMA' is default +// auto numaNodes = get_available_numa_nodes(); +// if (numaNodes.size() > 1) { +// threadBindingType = IStreamsExecutor::NUMA; +// } else { +// threadBindingType = IStreamsExecutor::NONE; +// } +// # endif - if (get_available_cores_types().size() > 1 /*Hybrid CPU*/) { - threadBindingType = IStreamsExecutor::HYBRID_AWARE; - } -#endif - CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties()); +// if (get_available_cores_types().size() > 1 /*Hybrid CPU*/) { +// threadBindingType = IStreamsExecutor::HYBRID_AWARE; +// } +// #endif +// CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties()); - updateProperties(); -} +// updateProperties(); +// } -#ifdef CPU_DEBUG_CAPS -/** - * Debug capabilities configuration has more priority than common one - * Some of the debug capabilities also require to enable some of common - * configuration properties - */ -void Config::applyDebugCapsProperties() { - // always enable perf counters for verbose, performance summary and average counters - if (!debugCaps.verbose.empty() || !debugCaps.summaryPerf.empty() || !debugCaps.averageCountersPath.empty()) { - collectPerfCounters = true; - } -} -#endif +// #ifdef CPU_DEBUG_CAPS +// /** +// * Debug capabilities configuration has more priority than common one +// * Some of the debug capabilities also require to enable some of common +// * configuration properties +// */ +// void Config::applyDebugCapsProperties() { +// // always enable perf counters for verbose, performance summary and average counters +// if (!debugCaps.verbose.empty() || !debugCaps.summaryPerf.empty() || !debugCaps.averageCountersPath.empty()) { +// collectPerfCounters = true; +// } +// } +// #endif -void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { - const auto streamExecutorConfigKeys = - streamExecutorConfig.get_property(ov::supported_properties.name()).as>(); - for (const auto& kvp : prop) { - const auto& key = kvp.first; - const auto& val = kvp.second; - if (streamExecutorConfigKeys.end() != - std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) { - streamExecutorConfig.set_property(key, val.as()); - streams = streamExecutorConfig.get_streams(); - threads = streamExecutorConfig.get_threads(); - threadsPerStream = streamExecutorConfig.get_threads_per_stream(); - if (key == ov::num_streams.name()) { - ov::Any value = val.as(); - auto streams_value = value.as(); - if (streams_value == ov::streams::NUMA) { - modelDistributionPolicy = {}; - hintPerfMode = ov::hint::PerformanceMode::LATENCY; - changedHintPerfMode = true; - } else if (streams_value == ov::streams::AUTO) { - hintPerfMode = ov::hint::PerformanceMode::THROUGHPUT; - changedHintPerfMode = true; - } else { - streamsChanged = true; - } - } - OPENVINO_SUPPRESS_DEPRECATED_START - } else if (key == ov::affinity.name()) { - try { - changedCpuPinning = true; - ov::Affinity affinity = val.as(); -#if defined(__APPLE__) - enableCpuPinning = false; - threadBindingType = affinity == ov::Affinity::NONE ? IStreamsExecutor::ThreadBindingType::NONE - : IStreamsExecutor::ThreadBindingType::NUMA; -#else - enableCpuPinning = - (affinity == ov::Affinity::CORE || affinity == ov::Affinity::HYBRID_AWARE) ? true : false; - switch (affinity) { - case ov::Affinity::NONE: - threadBindingType = IStreamsExecutor::ThreadBindingType::NONE; - break; - case ov::Affinity::CORE: { - threadBindingType = IStreamsExecutor::ThreadBindingType::CORES; - } break; - case ov::Affinity::NUMA: - threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA; - break; - case ov::Affinity::HYBRID_AWARE: - threadBindingType = IStreamsExecutor::ThreadBindingType::HYBRID_AWARE; - break; - default: - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - key, - ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE."); - } -#endif - } catch (const ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - key, - ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE."); - } - OPENVINO_SUPPRESS_DEPRECATED_END - } else if (key == ov::hint::performance_mode.name()) { - try { - hintPerfMode = !changedHintPerfMode ? val.as() : hintPerfMode; - } catch (const ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - key, - ". Expected only ov::hint::PerformanceMode::LATENCY/THROUGHPUT/CUMULATIVE_THROUGHPUT."); - } - } else if (key == ov::log::level.name()) { - try { - logLevel = val.as(); - } catch (const ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - key, - ". Expected only ov::log::Level::NO/ERR/WARNING/INFO/DEBUG/TRACE."); - } - } else if (key == ov::hint::num_requests.name()) { - try { - ov::Any value = val.as(); - int val_i = value.as(); - if (val_i < 0) - OPENVINO_THROW("invalid value."); - hintNumRequests = static_cast(val_i); - } catch (const ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - ov::hint::num_requests.name(), - ". Expected only > 0."); - } - } else if (key == ov::hint::enable_cpu_pinning.name()) { - try { - enableCpuPinning = val.as(); - changedCpuPinning = true; - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - ov::hint::enable_cpu_pinning.name(), - ". Expected only true/false."); - } - } else if (key == ov::hint::scheduling_core_type.name()) { - try { - schedulingCoreType = val.as(); - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - ov::hint::scheduling_core_type.name(), - ". Expected only ov::hint::SchedulingCoreType::ANY_CORE/PCORE_ONLY/ECORE_ONLY"); - } - } else if (key == ov::hint::model_distribution_policy.name()) { - auto error_info = [&]() { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - ov::hint::model_distribution_policy.name(), - ". CPU plugin only support {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}"); - }; +// void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { +// const auto streamExecutorConfigKeys = +// streamExecutorConfig.get_property(ov::supported_properties.name()).as>(); +// for (const auto& kvp : prop) { +// const auto& key = kvp.first; +// const auto& val = kvp.second; +// if (streamExecutorConfigKeys.end() != +// std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) { +// streamExecutorConfig.set_property(key, val.as()); +// streams = streamExecutorConfig.get_streams(); +// threads = streamExecutorConfig.get_threads(); +// threadsPerStream = streamExecutorConfig.get_threads_per_stream(); +// if (key == ov::num_streams.name()) { +// ov::Any value = val.as(); +// auto streams_value = value.as(); +// if (streams_value == ov::streams::NUMA) { +// modelDistributionPolicy = {}; +// hintPerfMode = ov::hint::PerformanceMode::LATENCY; +// changedHintPerfMode = true; +// } else if (streams_value == ov::streams::AUTO) { +// hintPerfMode = ov::hint::PerformanceMode::THROUGHPUT; +// changedHintPerfMode = true; +// } else { +// streamsChanged = true; +// } +// } +// OPENVINO_SUPPRESS_DEPRECATED_START +// } else if (key == ov::affinity.name()) { +// try { +// changedCpuPinning = true; +// ov::Affinity affinity = val.as(); +// #if defined(__APPLE__) +// enableCpuPinning = false; +// threadBindingType = affinity == ov::Affinity::NONE ? IStreamsExecutor::ThreadBindingType::NONE +// : IStreamsExecutor::ThreadBindingType::NUMA; +// #else +// enableCpuPinning = +// (affinity == ov::Affinity::CORE || affinity == ov::Affinity::HYBRID_AWARE) ? true : false; +// switch (affinity) { +// case ov::Affinity::NONE: +// threadBindingType = IStreamsExecutor::ThreadBindingType::NONE; +// break; +// case ov::Affinity::CORE: { +// threadBindingType = IStreamsExecutor::ThreadBindingType::CORES; +// } break; +// case ov::Affinity::NUMA: +// threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA; +// break; +// case ov::Affinity::HYBRID_AWARE: +// threadBindingType = IStreamsExecutor::ThreadBindingType::HYBRID_AWARE; +// break; +// default: +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// key, +// ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE."); +// } +// #endif +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// key, +// ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE."); +// } +// OPENVINO_SUPPRESS_DEPRECATED_END +// // } else if (key == ov::hint::performance_mode.name()) { +// // try { +// // hintPerfMode = !changedHintPerfMode ? val.as() : hintPerfMode; +// // } catch (const ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // "for property key ", +// // key, +// // ". Expected only ov::hint::PerformanceMode::LATENCY/THROUGHPUT/CUMULATIVE_THROUGHPUT."); +// // } +// // } else if (key == ov::log::level.name()) { +// // try { +// // logLevel = val.as(); +// // } catch (const ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // key, +// // ". Expected only ov::log::Level::NO/ERR/WARNING/INFO/DEBUG/TRACE."); +// // } +// // } else if (key == ov::hint::num_requests.name()) { +// // try { +// // ov::Any value = val.as(); +// // int val_i = value.as(); +// // if (val_i < 0) +// // OPENVINO_THROW("invalid value."); +// // hintNumRequests = static_cast(val_i); +// // } catch (const ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // "for property key ", +// // ov::hint::num_requests.name(), +// // ". Expected only > 0."); +// // } +// // } else if (key == ov::hint::enable_cpu_pinning.name()) { +// // try { +// // enableCpuPinning = val.as(); +// // changedCpuPinning = true; +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // "for property key ", +// // ov::hint::enable_cpu_pinning.name(), +// // ". Expected only true/false."); +// // } +// // } else if (key == ov::hint::scheduling_core_type.name()) { +// // try { +// // schedulingCoreType = val.as(); +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // "for property key ", +// // ov::hint::scheduling_core_type.name(), +// // ". Expected only ov::hint::SchedulingCoreType::ANY_CORE/PCORE_ONLY/ECORE_ONLY"); +// // } +// // } else if (key == ov::hint::model_distribution_policy.name()) { +// // auto error_info = [&]() { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // "for property key ", +// // ov::hint::model_distribution_policy.name(), +// // ". CPU plugin only support {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}"); +// // }; - try { - for (auto& row : val.as>()) { - if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)) { - error_info(); - } - } - modelDistributionPolicy = val.as>(); - } catch (ov::Exception&) { - error_info(); - } - } else if (key == ov::hint::enable_hyper_threading.name()) { - try { - enableHyperThreading = val.as(); - changedHyperThreading = true; - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - ov::hint::enable_hyper_threading.name(), - ". Expected only true/false."); - } - } else if (key == ov::intel_cpu::sparse_weights_decompression_rate.name()) { - float val_f = 0.0f; - try { - val_f = val.as(); - } catch (const ov::Exception&) { - OPENVINO_THROW("Wrong value for property key ", - ov::intel_cpu::sparse_weights_decompression_rate.name(), - ". Expected only float numbers"); - } - if (val_f < 0.f || val_f > 1.f) { - OPENVINO_THROW("Wrong value for property key ", - ov::intel_cpu::sparse_weights_decompression_rate.name(), - ". Sparse rate must be in range [0.0f,1.0f]"); - } else { - fcSparseWeiDecompressionRate = val_f; - } - } else if (key == ov::hint::dynamic_quantization_group_size.name()) { - try { - fcDynamicQuantizationGroupSizeSetExplicitly = true; - fcDynamicQuantizationGroupSize = val.as(); - } catch (const ov::Exception&) { - OPENVINO_THROW("Wrong value for property key ", - ov::hint::dynamic_quantization_group_size.name(), - ". Expected only unsinged integer numbers"); - } - } else if (key == ov::enable_profiling.name()) { - try { - collectPerfCounters = val.as(); - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - ov::enable_profiling.name(), - ". Expected only true/false"); - } - } else if (key == ov::internal::exclusive_async_requests.name()) { - try { - exclusiveAsyncRequests = val.as(); - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - ov::internal::exclusive_async_requests.name(), - ". Expected only true/false"); - } - } else if (key == ov::intel_cpu::lp_transforms_mode.name()) { - try { - lpTransformsMode = val.as() ? LPTransformsMode::On : LPTransformsMode::Off; - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - key, - ". Expected value only ov::intel_cpu::Config::LPTransformsMode::On/Off"); - } - } else if (key == ov::device::id.name()) { - device_id = val.as(); - if (!device_id.empty()) { - OPENVINO_THROW("CPU plugin supports only '' as device id"); - } - } else if (key == ov::hint::inference_precision.name()) { - try { - auto const prec = val.as(); - inferencePrecisionSetExplicitly = true; - if (prec == ov::element::bf16) { - if (hasHardwareSupport(ov::element::bf16)) { - inferencePrecision = ov::element::bf16; - } - } else if (prec == ov::element::f16) { - if (hasHardwareSupport(ov::element::f16)) { - inferencePrecision = ov::element::f16; - } - } else if (one_of(prec, element::f32, element::undefined)) { - inferencePrecision = prec; - } else { - OPENVINO_THROW("invalid value"); - } - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - ov::hint::inference_precision.name(), - ". Supported values: bf16, f16, f32, undefined"); - } - } else if (ov::intel_cpu::cpu_runtime_cache_capacity.name() == key) { - int val_i = -1; - try { - ov::Any value = val.as(); - val_i = value.as(); - } catch (const ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - ov::intel_cpu::cpu_runtime_cache_capacity.name(), - ". Expected only integer numbers"); - } - // any negative value will be treated - // as zero that means disabling the cache - rtCacheCapacity = std::max(val_i, 0); - } else if (ov::intel_cpu::denormals_optimization.name() == key) { - try { - denormalsOptMode = val.as() ? DenormalsOptMode::DO_On : DenormalsOptMode::DO_Off; - } catch (ov::Exception&) { - denormalsOptMode = DenormalsOptMode::DO_Keep; - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - ov::intel_cpu::denormals_optimization.name(), - ". Expected only true/false"); - } - } else if (key == ov::intel_cpu::snippets_mode.name()) { - try { - auto const mode = val.as(); - if (mode == ov::intel_cpu::SnippetsMode::ENABLE) - snippetsMode = SnippetsMode::Enable; - else if (mode == ov::intel_cpu::SnippetsMode::IGNORE_CALLBACK) - snippetsMode = SnippetsMode::IgnoreCallback; - else if (mode == ov::intel_cpu::SnippetsMode::DISABLE) - snippetsMode = SnippetsMode::Disable; - else - OPENVINO_THROW("invalid value"); - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - ov::intel_cpu::snippets_mode.name(), - ". Expected values: ov::intel_cpu::SnippetsMode::ENABLE/DISABLE/IGNORE_CALLBACK"); - } - } else if (key == ov::hint::execution_mode.name()) { - try { - executionMode = val.as(); - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - ov::hint::execution_mode.name(), - ". Supported values: ov::hint::ExecutionMode::PERFORMANCE/ACCURACY"); - } - } else if (key == ov::hint::kv_cache_precision.name()) { - try { - kvCachePrecisionSetExplicitly = true; - auto const prec = val.as(); - if (one_of(prec, ov::element::f32, ov::element::f16, ov::element::bf16, ov::element::u8)) { - kvCachePrecision = prec; - } else { - OPENVINO_THROW("invalid value"); - } - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - " for property key ", - ov::hint::kv_cache_precision.name(), - ". Supported values: u8, bf16, f16, f32"); - } - } else if (key == ov::cache_encryption_callbacks.name()) { - try { - auto encryption_callbacks = val.as(); - cacheEncrypt = encryption_callbacks.encrypt; - cacheDecrypt = encryption_callbacks.decrypt; - } catch (ov::Exception&) { - OPENVINO_THROW("Wrong value for property key ", ov::cache_encryption_callbacks.name()); - } - } else if (key == ov::internal::caching_with_mmap.name()) { - } else { - OPENVINO_THROW("NotFound: Unsupported property ", key, " by CPU plugin."); - } - } - // apply execution mode after all the params are handled to prevent possible conflicts - // when both execution_mode and inference_precision are specified - if (!inferencePrecisionSetExplicitly) { - if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { - inferencePrecision = ov::element::f32; -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) - if (hasHardwareSupport(ov::element::f16)) { - inferencePrecision = ov::element::f16; - } -#endif - if (mayiuse(avx512_core_bf16)) - inferencePrecision = ov::element::bf16; - } else { - inferencePrecision = ov::element::undefined; - } - } - // enable ACL fast math in PERFORMANCE mode -#if defined(OV_CPU_WITH_ACL) - if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { - aclFastMath = true; - } -#endif - // disable dynamic quantization and kv quantization for best accuracy - if (executionMode == ov::hint::ExecutionMode::ACCURACY) { - if (!fcDynamicQuantizationGroupSizeSetExplicitly) { - fcDynamicQuantizationGroupSize = 0; - } - if (!kvCachePrecisionSetExplicitly) { - kvCachePrecision = ov::element::f32; - } - } +// // try { +// // for (auto& row : val.as>()) { +// // if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)) { +// // error_info(); +// // } +// // } +// // modelDistributionPolicy = val.as>(); +// // } catch (ov::Exception&) { +// // error_info(); +// // } +// // } else if (key == ov::hint::enable_hyper_threading.name()) { +// // try { +// // enableHyperThreading = val.as(); +// // changedHyperThreading = true; +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // "for property key ", +// // ov::hint::enable_hyper_threading.name(), +// // ". Expected only true/false."); +// // } +// // } else if (key == ov::intel_cpu::sparse_weights_decompression_rate.name()) { +// // float val_f = 0.0f; +// // try { +// // val_f = val.as(); +// // } catch (const ov::Exception&) { +// // OPENVINO_THROW("Wrong value for property key ", +// // ov::intel_cpu::sparse_weights_decompression_rate.name(), +// // ". Expected only float numbers"); +// // } +// // if (val_f < 0.f || val_f > 1.f) { +// // OPENVINO_THROW("Wrong value for property key ", +// // ov::intel_cpu::sparse_weights_decompression_rate.name(), +// // ". Sparse rate must be in range [0.0f,1.0f]"); +// // } else { +// // fcSparseWeiDecompressionRate = val_f; +// // } +// // } else if (key == ov::hint::dynamic_quantization_group_size.name()) { +// // try { +// // fcDynamicQuantizationGroupSizeSetExplicitly = true; +// // fcDynamicQuantizationGroupSize = val.as(); +// // } catch (const ov::Exception&) { +// // OPENVINO_THROW("Wrong value for property key ", +// // ov::hint::dynamic_quantization_group_size.name(), +// // ". Expected only unsinged integer numbers"); +// // } +// // } else if (key == ov::enable_profiling.name()) { +// // try { +// // collectPerfCounters = val.as(); +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // ov::enable_profiling.name(), +// // ". Expected only true/false"); +// // } +// // } else if (key == ov::internal::exclusive_async_requests.name()) { +// // try { +// // exclusiveAsyncRequests = val.as(); +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // ov::internal::exclusive_async_requests.name(), +// // ". Expected only true/false"); +// // } +// // } else if (key == ov::intel_cpu::lp_transforms_mode.name()) { +// // try { +// // lpTransformsMode = val.as() ? LPTransformsMode::On : LPTransformsMode::Off; +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // key, +// // ". Expected value only ov::intel_cpu::Config::LPTransformsMode::On/Off"); +// // } +// // } else if (key == ov::device::id.name()) { +// // device_id = val.as(); +// // if (!device_id.empty()) { +// // OPENVINO_THROW("CPU plugin supports only '' as device id"); +// // } +// // } else if (key == ov::hint::inference_precision.name()) { +// // try { +// // auto const prec = val.as(); +// // inferencePrecisionSetExplicitly = true; +// // if (prec == ov::element::bf16) { +// // if (hasHardwareSupport(ov::element::bf16)) { +// // inferencePrecision = ov::element::bf16; +// // } +// // } else if (prec == ov::element::f16) { +// // if (hasHardwareSupport(ov::element::f16)) { +// // inferencePrecision = ov::element::f16; +// // } +// // } else if (one_of(prec, element::f32, element::undefined)) { +// // inferencePrecision = prec; +// // } else { +// // OPENVINO_THROW("invalid value"); +// // } +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // ov::hint::inference_precision.name(), +// // ". Supported values: bf16, f16, f32, undefined"); +// // } +// // } else if (ov::intel_cpu::cpu_runtime_cache_capacity.name() == key) { +// // int val_i = -1; +// // try { +// // ov::Any value = val.as(); +// // val_i = value.as(); +// // } catch (const ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // ov::intel_cpu::cpu_runtime_cache_capacity.name(), +// // ". Expected only integer numbers"); +// // } +// // // any negative value will be treated +// // // as zero that means disabling the cache +// // rtCacheCapacity = std::max(val_i, 0); +// // } else if (ov::intel_cpu::denormals_optimization.name() == key) { +// // try { +// // denormalsOptMode = val.as() ? DenormalsOptMode::DO_On : DenormalsOptMode::DO_Off; +// // } catch (ov::Exception&) { +// // denormalsOptMode = DenormalsOptMode::DO_Keep; +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // ov::intel_cpu::denormals_optimization.name(), +// // ". Expected only true/false"); +// // } +// // } else if (key == ov::intel_cpu::snippets_mode.name()) { +// // try { +// // auto const mode = val.as(); +// // if (mode == ov::intel_cpu::SnippetsMode::ENABLE) +// // snippetsMode = SnippetsMode::Enable; +// // else if (mode == ov::intel_cpu::SnippetsMode::IGNORE_CALLBACK) +// // snippetsMode = SnippetsMode::IgnoreCallback; +// // else if (mode == ov::intel_cpu::SnippetsMode::DISABLE) +// // snippetsMode = SnippetsMode::Disable; +// // else +// // OPENVINO_THROW("invalid value"); +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // ov::intel_cpu::snippets_mode.name(), +// // ". Expected values: ov::intel_cpu::SnippetsMode::ENABLE/DISABLE/IGNORE_CALLBACK"); +// // } +// // } else if (key == ov::hint::execution_mode.name()) { +// // try { +// // executionMode = val.as(); +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // "for property key ", +// // ov::hint::execution_mode.name(), +// // ". Supported values: ov::hint::ExecutionMode::PERFORMANCE/ACCURACY"); +// // } +// // } else if (key == ov::hint::kv_cache_precision.name()) { +// // try { +// // kvCachePrecisionSetExplicitly = true; +// // auto const prec = val.as(); +// // if (one_of(prec, ov::element::f32, ov::element::f16, ov::element::bf16, ov::element::u8)) { +// // kvCachePrecision = prec; +// // } else { +// // OPENVINO_THROW("invalid value"); +// // } +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value ", +// // val.as(), +// // " for property key ", +// // ov::hint::kv_cache_precision.name(), +// // ". Supported values: u8, bf16, f16, f32"); +// // } +// // } else if (key == ov::cache_encryption_callbacks.name()) { +// // try { +// // auto encryption_callbacks = val.as(); +// // cacheEncrypt = encryption_callbacks.encrypt; +// // cacheDecrypt = encryption_callbacks.decrypt; +// // } catch (ov::Exception&) { +// // OPENVINO_THROW("Wrong value for property key ", ov::cache_encryption_callbacks.name()); +// // } +// // } else if (key == ov::internal::caching_with_mmap.name()) { +// } else { +// OPENVINO_THROW("NotFound: Unsupported property ", key, " by CPU plugin."); +// } +// } +// // // apply execution mode after all the params are handled to prevent possible conflicts +// // // when both execution_mode and inference_precision are specified +// // if (!inferencePrecisionSetExplicitly) { +// // if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { +// // inferencePrecision = ov::element::f32; +// // #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +// // if (hasHardwareSupport(ov::element::f16)) { +// // inferencePrecision = ov::element::f16; +// // } +// // #endif +// // if (mayiuse(avx512_core_bf16)) +// // inferencePrecision = ov::element::bf16; +// // } else { +// // inferencePrecision = ov::element::undefined; +// // } +// // } +// // enable ACL fast math in PERFORMANCE mode +// // #if defined(OV_CPU_WITH_ACL) +// // if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { +// // aclFastMath = true; +// // } +// // #endif +// // disable dynamic quantization and kv quantization for best accuracy +// if (executionMode == ov::hint::ExecutionMode::ACCURACY) { +// if (!fcDynamicQuantizationGroupSizeSetExplicitly) { +// fcDynamicQuantizationGroupSize = 0; +// } +// if (!kvCachePrecisionSetExplicitly) { +// kvCachePrecision = ov::element::f32; +// } +// } - if (!prop.empty()) - _config.clear(); +// if (!prop.empty()) +// _config.clear(); - if (exclusiveAsyncRequests) { // Exclusive request feature disables the streams - streams = 1; - streamsChanged = true; - } +// if (exclusiveAsyncRequests) { // Exclusive request feature disables the streams +// streams = 1; +// streamsChanged = true; +// } -#if defined(OV_CPU_WITH_SHL) - // TODO: multi-stream execution is unsafe when SHL is used: - // The library uses global static variables as flags and counters. - streams = 1; - streamsChanged = true; -#endif +// #if defined(OV_CPU_WITH_SHL) +// // TODO: multi-stream execution is unsafe when SHL is used: +// // The library uses global static variables as flags and counters. +// streams = 1; +// streamsChanged = true; +// #endif - this->modelType = modelType; +// this->modelType = modelType; - CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties()); - updateProperties(); -} +// CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties()); +// updateProperties(); +// } -void Config::updateProperties() { - if (!_config.empty()) - return; +// void Config::updateProperties() { +// if (!_config.empty()) +// return; - if (collectPerfCounters == true) - _config.insert({ov::enable_profiling.name(), "YES"}); - else - _config.insert({ov::enable_profiling.name(), "NO"}); - if (exclusiveAsyncRequests == true) - _config.insert({ov::internal::exclusive_async_requests.name(), "YES"}); - else - _config.insert({ov::internal::exclusive_async_requests.name(), "NO"}); +// if (collectPerfCounters == true) +// _config.insert({ov::enable_profiling.name(), "YES"}); +// else +// _config.insert({ov::enable_profiling.name(), "NO"}); +// if (exclusiveAsyncRequests == true) +// _config.insert({ov::internal::exclusive_async_requests.name(), "YES"}); +// else +// _config.insert({ov::internal::exclusive_async_requests.name(), "NO"}); - _config.insert({ov::device::id.name(), device_id}); +// _config.insert({ov::device::id.name(), device_id}); - _config.insert({ov::hint::performance_mode.name(), ov::util::to_string(hintPerfMode)}); - _config.insert({ov::hint::num_requests.name(), std::to_string(hintNumRequests)}); -} +// _config.insert({ov::hint::performance_mode.name(), ov::util::to_string(hintPerfMode)}); +// _config.insert({ov::hint::num_requests.name(), std::to_string(hintNumRequests)}); +// } -void Config::applyRtInfo(const std::shared_ptr& model) { - // if user sets explicitly, it will be higher priority than rt_info - if (!kvCachePrecisionSetExplicitly && - model->has_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()})) { - this->kvCachePrecision = - model->get_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()}); - } - if (!fcDynamicQuantizationGroupSizeSetExplicitly && - model->has_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()})) { - this->fcDynamicQuantizationGroupSize = - model->get_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()}); - } -} +// void Config::applyRtInfo(const std::shared_ptr& model) { +// // if user sets explicitly, it will be higher priority than rt_info +// if (!kvCachePrecisionSetExplicitly && +// model->has_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()})) { +// this->kvCachePrecision = +// model->get_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()}); +// } +// if (!fcDynamicQuantizationGroupSizeSetExplicitly && +// model->has_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()})) { +// this->fcDynamicQuantizationGroupSize = +// model->get_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()}); +// } +// } } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 1aa08f4412f0b3..22808caff1f8f5 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -4,118 +4,121 @@ #pragma once -#include -#include -#include - -#include "internal_properties.hpp" -#include "openvino/core/type/element_type.hpp" -#include "openvino/runtime/properties.hpp" -#include "openvino/runtime/threading/istreams_executor.hpp" -#include "openvino/util/common_util.hpp" -#include "utils/debug_caps_config.h" +// #include +// #include +// #include + +// #include "internal_properties.hpp" +// #include "openvino/core/type/element_type.hpp" +// #include "openvino/runtime/properties.hpp" +// #include "openvino/runtime/threading/istreams_executor.hpp" +// #include "openvino/util/common_util.hpp" +// #include "utils/debug_caps_config.h" +#include "config_new.hpp" namespace ov { namespace intel_cpu { -struct Config { - Config(); - - enum LPTransformsMode { - Off, - On, - }; - - enum DenormalsOptMode { - DO_Keep, - DO_Off, - DO_On, - }; - - enum SnippetsMode { - Enable, - IgnoreCallback, - Disable, - }; - - enum class ModelType { CNN, LLM, Unknown }; - - bool collectPerfCounters = false; - bool exclusiveAsyncRequests = false; - SnippetsMode snippetsMode = SnippetsMode::Enable; - std::string dumpToDot = {}; - std::string device_id = {}; - float fcSparseWeiDecompressionRate = 1.0f; - uint64_t fcDynamicQuantizationGroupSize = 32; - bool fcDynamicQuantizationGroupSizeSetExplicitly = false; - bool kvCachePrecisionSetExplicitly = false; -#if defined(OV_CPU_WITH_ACL) - bool aclFastMath = false; -#endif -#if defined(OPENVINO_ARCH_X86_64) - ov::element::Type kvCachePrecision = ov::element::u8; - size_t rtCacheCapacity = 5000ul; -#else - ov::element::Type kvCachePrecision = ov::element::f16; - // TODO: Executor cache may leads to incorrect behavior on oneDNN ACL primitives - size_t rtCacheCapacity = 0ul; -#endif - ov::threading::IStreamsExecutor::Config streamExecutorConfig; - int streams = 1; - bool streamsChanged = false; - int threads = 0; - int threadsPerStream = 0; - ov::threading::IStreamsExecutor::ThreadBindingType threadBindingType = - ov::threading::IStreamsExecutor::ThreadBindingType::NONE; - ov::hint::PerformanceMode hintPerfMode = ov::hint::PerformanceMode::LATENCY; - std::vector> streamsRankTable; - bool changedHintPerfMode = false; - ov::log::Level logLevel = ov::log::Level::NO; - uint32_t hintNumRequests = 0; - bool enableCpuPinning = true; - bool changedCpuPinning = false; - ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE; - std::set modelDistributionPolicy = {}; - int streamsRankLevel = 1; - int numSubStreams = 0; - bool enableNodeSplit = false; - bool enableHyperThreading = true; - bool changedHyperThreading = false; -#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) - LPTransformsMode lpTransformsMode = LPTransformsMode::On; -#else - // Currently INT8 mode is not optimized on ARM / RISCV or other non-x86 platforms, fallback to FP32 mode. - LPTransformsMode lpTransformsMode = LPTransformsMode::Off; -#endif - // default inference precision - ov::element::Type inferencePrecision = ov::element::f32; - bool inferencePrecisionSetExplicitly = false; - ov::hint::ExecutionMode executionMode = ov::hint::ExecutionMode::PERFORMANCE; - - DenormalsOptMode denormalsOptMode = DenormalsOptMode::DO_Keep; - - // The denormals-are-zeros flag was introduced in the Pentium 4 and Intel Xeon processor - // In earlier IA-32 processors and in some models of the Pentium 4 processor, this flag (bit 6) - // is reserved. - bool DAZOn = false; - - void readProperties(const ov::AnyMap& config, const ModelType modelType = ModelType::Unknown); - - void updateProperties(); - - void applyRtInfo(const std::shared_ptr& model); - - std::map _config; - - int modelPreferThreads = -1; - ModelType modelType = ModelType::Unknown; - std::function cacheEncrypt; - std::function cacheDecrypt; - -#ifdef CPU_DEBUG_CAPS - DebugCapsConfig debugCaps; - void applyDebugCapsProperties(); -#endif -}; + +using Config = ExecutionConfig; +// struct Config { +// Config(); + +// enum LPTransformsMode { +// Off, +// On, +// }; + +// enum DenormalsOptMode { +// DO_Keep, +// DO_Off, +// DO_On, +// }; + +// enum SnippetsMode { +// Enable, +// IgnoreCallback, +// Disable, +// }; + +// enum class ModelType { CNN, LLM, Unknown }; + +// bool collectPerfCounters = false; +// bool exclusiveAsyncRequests = false; +// SnippetsMode snippetsMode = SnippetsMode::Enable; +// std::string dumpToDot = {}; +// std::string device_id = {}; +// float fcSparseWeiDecompressionRate = 1.0f; +// uint64_t fcDynamicQuantizationGroupSize = 32; +// bool fcDynamicQuantizationGroupSizeSetExplicitly = false; +// bool kvCachePrecisionSetExplicitly = false; +// #if defined(OV_CPU_WITH_ACL) +// bool aclFastMath = false; +// #endif +// #if defined(OPENVINO_ARCH_X86_64) +// ov::element::Type kvCachePrecision = ov::element::u8; +// size_t rtCacheCapacity = 5000ul; +// #else +// ov::element::Type kvCachePrecision = ov::element::f16; +// // TODO: Executor cache may leads to incorrect behavior on oneDNN ACL primitives +// size_t rtCacheCapacity = 0ul; +// #endif +// ov::threading::IStreamsExecutor::Config streamExecutorConfig; +// int streams = 1; +// bool streamsChanged = false; +// int threads = 0; +// int threadsPerStream = 0; +// ov::threading::IStreamsExecutor::ThreadBindingType threadBindingType = +// ov::threading::IStreamsExecutor::ThreadBindingType::NONE; +// ov::hint::PerformanceMode hintPerfMode = ov::hint::PerformanceMode::LATENCY; +// std::vector> streamsRankTable; +// bool changedHintPerfMode = false; +// ov::log::Level logLevel = ov::log::Level::NO; +// uint32_t hintNumRequests = 0; +// bool enableCpuPinning = true; +// bool changedCpuPinning = false; +// ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE; +// std::set modelDistributionPolicy = {}; +// int streamsRankLevel = 1; +// int numSubStreams = 0; +// bool enableNodeSplit = false; +// bool enableHyperThreading = true; +// bool changedHyperThreading = false; +// #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) +// LPTransformsMode lpTransformsMode = LPTransformsMode::On; +// #else +// // Currently INT8 mode is not optimized on ARM / RISCV or other non-x86 platforms, fallback to FP32 mode. +// LPTransformsMode lpTransformsMode = LPTransformsMode::Off; +// #endif +// // default inference precision +// ov::element::Type inferencePrecision = ov::element::f32; +// bool inferencePrecisionSetExplicitly = false; +// ov::hint::ExecutionMode executionMode = ov::hint::ExecutionMode::PERFORMANCE; + +// DenormalsOptMode denormalsOptMode = DenormalsOptMode::DO_Keep; + +// // The denormals-are-zeros flag was introduced in the Pentium 4 and Intel Xeon processor +// // In earlier IA-32 processors and in some models of the Pentium 4 processor, this flag (bit 6) +// // is reserved. +// bool DAZOn = false; + +// void readProperties(const ov::AnyMap& config, const ModelType modelType = ModelType::Unknown); + +// void updateProperties(); + +// void applyRtInfo(const std::shared_ptr& model); + +// std::map _config; + +// int modelPreferThreads = -1; +// ModelType modelType = ModelType::Unknown; +// std::function cacheEncrypt; +// std::function cacheDecrypt; + +// #ifdef CPU_DEBUG_CAPS +// DebugCapsConfig debugCaps; +// void applyDebugCapsProperties(); +// #endif +// }; } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/config_new.cpp b/src/plugins/intel_cpu/src/config_new.cpp new file mode 100644 index 00000000000000..d205631eb00df1 --- /dev/null +++ b/src/plugins/intel_cpu/src/config_new.cpp @@ -0,0 +1,659 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "config_new.hpp" +#include "utils/precision_support.h" +#include "utils/codec_xor.hpp" + +#include "cpu/x64/cpu_isa_traits.hpp" + +// #include +// #include +// #include + +// #include "cpu/x64/cpu_isa_traits.hpp" +// #include "openvino/core/parallel.hpp" +// #include "openvino/core/type/element_type_traits.hpp" +// #include "openvino/runtime/intel_cpu/properties.hpp" +// #include "openvino/runtime/internal_properties.hpp" +// #include "openvino/runtime/properties.hpp" +// #include "utils/cpu_utils.hpp" +// #include "utils/debug_capabilities.h" +// #include "utils/precision_support.h" + +namespace ov { +namespace intel_cpu { + +ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { + #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + #include "options.inl" + #undef OV_CONFIG_OPTION + + set_default_values(); +} + +ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() { + m_user_properties = other.m_user_properties; + m_is_finalized = false; // copy is not automatically finalized + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); + } + + streamExecutorConfig = other.streamExecutorConfig; + modelPreferThreads = other.modelPreferThreads; + modelType = other.modelType; + DAZOn = other.DAZOn; + streamsRankTable = other.streamsRankTable; + streamsRankLevel = other.streamsRankLevel; + numSubStreams = other.numSubStreams; + enableNodeSplit = other.enableNodeSplit; +} + +ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) { + m_user_properties = other.m_user_properties; + m_is_finalized = false; // copy is not automatically finalized + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); + } + + streamExecutorConfig = other.streamExecutorConfig; + modelPreferThreads = other.modelPreferThreads; + modelType = other.modelType; + DAZOn = other.DAZOn; + streamsRankTable = other.streamsRankTable; + streamsRankLevel = other.streamsRankLevel; + numSubStreams = other.numSubStreams; + enableNodeSplit = other.enableNodeSplit; + + return *this; +} + +const ov::PluginConfig::OptionsDesc& ExecutionConfig::get_options_desc() const { + static ov::PluginConfig::OptionsDesc help_map { + #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__) + #include "options.inl" + #undef OV_CONFIG_OPTION + }; + return help_map; +} + +void ExecutionConfig::apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) { + apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); + apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); +} + +void ExecutionConfig::finalize_impl(std::shared_ptr context) { + // const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + apply_hints(); + + if (get_exclusive_async_requests()) { + m_num_streams = 1; + } + +#if defined(OV_CPU_WITH_SHL) + // TODO: multi-stream execution is unsafe when SHL is used: + // The library uses global static variables as flags and counters. + m_num_streams = 1; +#endif + + if (!m_cache_encryption_callbacks.value.encrypt || !m_cache_encryption_callbacks.value.decrypt) { + m_cache_encryption_callbacks.value.encrypt = codec_xor_str; + m_cache_encryption_callbacks.value.decrypt = codec_xor_str; + } +} + +void ExecutionConfig::set_default_values() { +#if defined(OPENVINO_ARCH_X86_64) + m_cpu_runtime_cache_capacity = 5000ul; +#else + // TODO: Executor cache may leads to incorrect behavior on oneDNN ACL primitives + // TODO: Verify on RISC-V platforms + m_cpu_runtime_cache_capacity = 0ul; +#endif + +#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) + m_lp_transforms_mode = true; +#else + // Currently INT8 mode is not optimized on ARM / RISCV or other non-x86 platforms, fallback to FP32 mode. + m_lp_transforms_mode = false; +#endif +} + +void ExecutionConfig::apply_hints() { + apply_execution_hints(); + apply_performance_hints(); +} + +void ExecutionConfig::apply_execution_hints() { + if (get_execution_mode() == ov::hint::ExecutionMode::PERFORMANCE) { + if (!is_set_by_user(ov::hint::inference_precision)) { +#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) + m_inference_precision = ov::element::f16; +#else + if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16)) + m_inference_precision = ov::element::bf16; +#endif + } + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size)) { + m_dynamic_quantization_group_size = 32; + } + if (!is_set_by_user(ov::hint::kv_cache_precision)) { +#if defined(OPENVINO_ARCH_X86_64) + m_kv_cache_precision = ov::element::u8; +#else + m_kv_cache_precision = ov::element::f16; +#endif + } +#if defined(OV_CPU_WITH_ACL) + if (!is_set_by_user(ov::intel_cpu::acl_fast_math)) { + m_acl_fast_math = true; + } +#endif + } + + if (get_execution_mode() == ov::hint::ExecutionMode::ACCURACY) { + if (!is_set_by_user(ov::hint::inference_precision)) { + m_inference_precision = ov::element::undefined; + } + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size)) { + m_dynamic_quantization_group_size = 0; + } + if (!is_set_by_user(ov::hint::kv_cache_precision)) { + m_kv_cache_precision = ov::element::f32; + } +#if defined(OV_CPU_WITH_ACL) + if (!is_set_by_user(ov::intel_cpu::acl_fast_math)) { + m_acl_fast_math = false; + } +#endif + } + + if (!hasHardwareSupport(m_inference_precision)) { + m_inference_precision = ov::element::f32; + } +} + +void ExecutionConfig::apply_performance_hints() { + // if (is_set_by_user(ov::hint::performance_mode)) { + // const auto mode = get_property(ov::hint::performance_mode); + // if (!is_set_by_user(ov::num_streams)) { + // if (mode == ov::hint::PerformanceMode::LATENCY) { + // set_property(ov::num_streams(1)); + // } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) { + // set_property(ov::num_streams(ov::streams::AUTO)); + // } + // } + // } + + // if (get_property(ov::num_streams) == ov::streams::AUTO) { + // int32_t n_streams = std::max(info.num_ccs, 2); + // set_property(ov::num_streams(n_streams)); + // } + + // if (get_property(ov::internal::exclusive_async_requests)) { + // set_property(ov::num_streams(1)); + // } + + // // Allow kernels reuse only for single-stream scenarios + // if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) { + // if (get_property(ov::num_streams) != 1) { + // set_property(ov::intel_gpu::hint::enable_kernels_reuse(false)); + // } + // } +} + + +// using namespace ov::threading; +// using namespace dnnl::impl::cpu::x64; + +// Config::Config() { +// // this is default mode +// #if defined(__APPLE__) || defined(_WIN32) +// threadBindingType = IStreamsExecutor::NONE; +// #else +// threadBindingType = IStreamsExecutor::CORES; +// #endif + +// // for the TBB code-path, additional configuration depending on the OS and CPU types +// #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO) +// # if defined(__APPLE__) || defined(_WIN32) +// // 'CORES' is not implemented for Win/MacOS; so the 'NONE' or 'NUMA' is default +// auto numaNodes = get_available_numa_nodes(); +// if (numaNodes.size() > 1) { +// threadBindingType = IStreamsExecutor::NUMA; +// } else { +// threadBindingType = IStreamsExecutor::NONE; +// } +// # endif + +// if (get_available_cores_types().size() > 1 /*Hybrid CPU*/) { +// threadBindingType = IStreamsExecutor::HYBRID_AWARE; +// } +// #endif +// CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties()); + +// updateProperties(); +// } + +// #ifdef CPU_DEBUG_CAPS +// /** +// * Debug capabilities configuration has more priority than common one +// * Some of the debug capabilities also require to enable some of common +// * configuration properties +// */ +// void Config::applyDebugCapsProperties() { +// // always enable perf counters for verbose, performance summary and average counters +// if (!debugCaps.verbose.empty() || !debugCaps.summaryPerf.empty() || !debugCaps.averageCountersPath.empty()) { +// collectPerfCounters = true; +// } +// } +// #endif + +// void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { +// const auto streamExecutorConfigKeys = +// streamExecutorConfig.get_property(ov::supported_properties.name()).as>(); +// for (const auto& kvp : prop) { +// const auto& key = kvp.first; +// const auto& val = kvp.second; +// if (streamExecutorConfigKeys.end() != +// std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) { +// streamExecutorConfig.set_property(key, val.as()); +// streams = streamExecutorConfig.get_streams(); +// threads = streamExecutorConfig.get_threads(); +// threadsPerStream = streamExecutorConfig.get_threads_per_stream(); +// if (key == ov::num_streams.name()) { +// ov::Any value = val.as(); +// auto streams_value = value.as(); +// if (streams_value == ov::streams::NUMA) { +// modelDistributionPolicy = {}; +// hintPerfMode = ov::hint::PerformanceMode::LATENCY; +// changedHintPerfMode = true; +// } else if (streams_value == ov::streams::AUTO) { +// hintPerfMode = ov::hint::PerformanceMode::THROUGHPUT; +// changedHintPerfMode = true; +// } else { +// streamsChanged = true; +// } +// } +// OPENVINO_SUPPRESS_DEPRECATED_START +// } else if (key == ov::affinity.name()) { +// try { +// changedCpuPinning = true; +// ov::Affinity affinity = val.as(); +// #if defined(__APPLE__) +// enableCpuPinning = false; +// threadBindingType = affinity == ov::Affinity::NONE ? IStreamsExecutor::ThreadBindingType::NONE +// : IStreamsExecutor::ThreadBindingType::NUMA; +// #else +// enableCpuPinning = +// (affinity == ov::Affinity::CORE || affinity == ov::Affinity::HYBRID_AWARE) ? true : false; +// switch (affinity) { +// case ov::Affinity::NONE: +// threadBindingType = IStreamsExecutor::ThreadBindingType::NONE; +// break; +// case ov::Affinity::CORE: { +// threadBindingType = IStreamsExecutor::ThreadBindingType::CORES; +// } break; +// case ov::Affinity::NUMA: +// threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA; +// break; +// case ov::Affinity::HYBRID_AWARE: +// threadBindingType = IStreamsExecutor::ThreadBindingType::HYBRID_AWARE; +// break; +// default: +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// key, +// ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE."); +// } +// #endif +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// key, +// ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE."); +// } +// OPENVINO_SUPPRESS_DEPRECATED_END +// } else if (key == ov::hint::performance_mode.name()) { +// try { +// hintPerfMode = !changedHintPerfMode ? val.as() : hintPerfMode; +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// key, +// ". Expected only ov::hint::PerformanceMode::LATENCY/THROUGHPUT/CUMULATIVE_THROUGHPUT."); +// } +// } else if (key == ov::log::level.name()) { +// try { +// logLevel = val.as(); +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// key, +// ". Expected only ov::log::Level::NO/ERR/WARNING/INFO/DEBUG/TRACE."); +// } +// } else if (key == ov::hint::num_requests.name()) { +// try { +// ov::Any value = val.as(); +// int val_i = value.as(); +// if (val_i < 0) +// OPENVINO_THROW("invalid value."); +// hintNumRequests = static_cast(val_i); +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// ov::hint::num_requests.name(), +// ". Expected only > 0."); +// } +// } else if (key == ov::hint::enable_cpu_pinning.name()) { +// try { +// enableCpuPinning = val.as(); +// changedCpuPinning = true; +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// ov::hint::enable_cpu_pinning.name(), +// ". Expected only true/false."); +// } +// } else if (key == ov::hint::scheduling_core_type.name()) { +// try { +// schedulingCoreType = val.as(); +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// ov::hint::scheduling_core_type.name(), +// ". Expected only ov::hint::SchedulingCoreType::ANY_CORE/PCORE_ONLY/ECORE_ONLY"); +// } +// } else if (key == ov::hint::model_distribution_policy.name()) { +// auto error_info = [&]() { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// ov::hint::model_distribution_policy.name(), +// ". CPU plugin only support {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}"); +// }; + +// try { +// for (auto& row : val.as>()) { +// if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)) { +// error_info(); +// } +// } +// modelDistributionPolicy = val.as>(); +// } catch (ov::Exception&) { +// error_info(); +// } +// } else if (key == ov::hint::enable_hyper_threading.name()) { +// try { +// enableHyperThreading = val.as(); +// changedHyperThreading = true; +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// ov::hint::enable_hyper_threading.name(), +// ". Expected only true/false."); +// } +// } else if (key == ov::intel_cpu::sparse_weights_decompression_rate.name()) { +// float val_f = 0.0f; +// try { +// val_f = val.as(); +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value for property key ", +// ov::intel_cpu::sparse_weights_decompression_rate.name(), +// ". Expected only float numbers"); +// } +// if (val_f < 0.f || val_f > 1.f) { +// OPENVINO_THROW("Wrong value for property key ", +// ov::intel_cpu::sparse_weights_decompression_rate.name(), +// ". Sparse rate must be in range [0.0f,1.0f]"); +// } else { +// fcSparseWeiDecompressionRate = val_f; +// } +// } else if (key == ov::hint::dynamic_quantization_group_size.name()) { +// try { +// fcDynamicQuantizationGroupSizeSetExplicitly = true; +// fcDynamicQuantizationGroupSize = val.as(); +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value for property key ", +// ov::hint::dynamic_quantization_group_size.name(), +// ". Expected only unsinged integer numbers"); +// } +// } else if (key == ov::enable_profiling.name()) { +// try { +// collectPerfCounters = val.as(); +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// ov::enable_profiling.name(), +// ". Expected only true/false"); +// } +// } else if (key == ov::internal::exclusive_async_requests.name()) { +// try { +// exclusiveAsyncRequests = val.as(); +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// ov::internal::exclusive_async_requests.name(), +// ". Expected only true/false"); +// } +// } else if (key == ov::intel_cpu::lp_transforms_mode.name()) { +// try { +// lpTransformsMode = val.as() ? LPTransformsMode::On : LPTransformsMode::Off; +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// key, +// ". Expected value only ov::intel_cpu::Config::LPTransformsMode::On/Off"); +// } +// } else if (key == ov::device::id.name()) { +// device_id = val.as(); +// if (!device_id.empty()) { +// OPENVINO_THROW("CPU plugin supports only '' as device id"); +// } +// } else if (key == ov::hint::inference_precision.name()) { +// try { +// auto const prec = val.as(); +// inferencePrecisionSetExplicitly = true; +// if (prec == ov::element::bf16) { +// if (hasHardwareSupport(ov::element::bf16)) { +// inferencePrecision = ov::element::bf16; +// } +// } else if (prec == ov::element::f16) { +// if (hasHardwareSupport(ov::element::f16)) { +// inferencePrecision = ov::element::f16; +// } +// } else if (one_of(prec, element::f32, element::undefined)) { +// inferencePrecision = prec; +// } else { +// OPENVINO_THROW("invalid value"); +// } +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// ov::hint::inference_precision.name(), +// ". Supported values: bf16, f16, f32, undefined"); +// } +// } else if (ov::intel_cpu::cpu_runtime_cache_capacity.name() == key) { +// int val_i = -1; +// try { +// ov::Any value = val.as(); +// val_i = value.as(); +// } catch (const ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// ov::intel_cpu::cpu_runtime_cache_capacity.name(), +// ". Expected only integer numbers"); +// } +// // any negative value will be treated +// // as zero that means disabling the cache +// rtCacheCapacity = std::max(val_i, 0); +// } else if (ov::intel_cpu::denormals_optimization.name() == key) { +// try { +// denormalsOptMode = val.as() ? DenormalsOptMode::DO_On : DenormalsOptMode::DO_Off; +// } catch (ov::Exception&) { +// denormalsOptMode = DenormalsOptMode::DO_Keep; +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// ov::intel_cpu::denormals_optimization.name(), +// ". Expected only true/false"); +// } +// } else if (key == ov::intel_cpu::snippets_mode.name()) { +// try { +// auto const mode = val.as(); +// if (mode == ov::intel_cpu::SnippetsMode::ENABLE) +// snippetsMode = SnippetsMode::Enable; +// else if (mode == ov::intel_cpu::SnippetsMode::IGNORE_CALLBACK) +// snippetsMode = SnippetsMode::IgnoreCallback; +// else if (mode == ov::intel_cpu::SnippetsMode::DISABLE) +// snippetsMode = SnippetsMode::Disable; +// else +// OPENVINO_THROW("invalid value"); +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// ov::intel_cpu::snippets_mode.name(), +// ". Expected values: ov::intel_cpu::SnippetsMode::ENABLE/DISABLE/IGNORE_CALLBACK"); +// } +// } else if (key == ov::hint::execution_mode.name()) { +// try { +// executionMode = val.as(); +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// "for property key ", +// ov::hint::execution_mode.name(), +// ". Supported values: ov::hint::ExecutionMode::PERFORMANCE/ACCURACY"); +// } +// } else if (key == ov::hint::kv_cache_precision.name()) { +// try { +// kvCachePrecisionSetExplicitly = true; +// auto const prec = val.as(); +// if (one_of(prec, ov::element::f32, ov::element::f16, ov::element::bf16, ov::element::u8)) { +// kvCachePrecision = prec; +// } else { +// OPENVINO_THROW("invalid value"); +// } +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value ", +// val.as(), +// " for property key ", +// ov::hint::kv_cache_precision.name(), +// ". Supported values: u8, bf16, f16, f32"); +// } +// } else if (key == ov::cache_encryption_callbacks.name()) { +// try { +// auto encryption_callbacks = val.as(); +// cacheEncrypt = encryption_callbacks.encrypt; +// cacheDecrypt = encryption_callbacks.decrypt; +// } catch (ov::Exception&) { +// OPENVINO_THROW("Wrong value for property key ", ov::cache_encryption_callbacks.name()); +// } +// } else if (key == ov::internal::caching_with_mmap.name()) { +// } else { +// OPENVINO_THROW("NotFound: Unsupported property ", key, " by CPU plugin."); +// } +// } +// // apply execution mode after all the params are handled to prevent possible conflicts +// // when both execution_mode and inference_precision are specified +// if (!inferencePrecisionSetExplicitly) { +// if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { +// inferencePrecision = ov::element::f32; +// #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +// if (hasHardwareSupport(ov::element::f16)) { +// inferencePrecision = ov::element::f16; +// } +// #endif +// if (mayiuse(avx512_core_bf16)) +// inferencePrecision = ov::element::bf16; +// } else { +// inferencePrecision = ov::element::undefined; +// } +// } +// // enable ACL fast math in PERFORMANCE mode +// #if defined(OV_CPU_WITH_ACL) +// if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { +// aclFastMath = true; +// } +// #endif +// // disable dynamic quantization and kv quantization for best accuracy +// if (executionMode == ov::hint::ExecutionMode::ACCURACY) { +// if (!fcDynamicQuantizationGroupSizeSetExplicitly) { +// fcDynamicQuantizationGroupSize = 0; +// } +// if (!kvCachePrecisionSetExplicitly) { +// kvCachePrecision = ov::element::f32; +// } +// } + +// if (!prop.empty()) +// _config.clear(); + +// if (exclusiveAsyncRequests) { // Exclusive request feature disables the streams +// streams = 1; +// streamsChanged = true; +// } + +// #if defined(OV_CPU_WITH_SHL) +// // TODO: multi-stream execution is unsafe when SHL is used: +// // The library uses global static variables as flags and counters. +// streams = 1; +// streamsChanged = true; +// #endif + +// this->modelType = modelType; + +// CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties()); +// updateProperties(); +// } + +// void Config::updateProperties() { +// if (!_config.empty()) +// return; + +// if (collectPerfCounters == true) +// _config.insert({ov::enable_profiling.name(), "YES"}); +// else +// _config.insert({ov::enable_profiling.name(), "NO"}); +// if (exclusiveAsyncRequests == true) +// _config.insert({ov::internal::exclusive_async_requests.name(), "YES"}); +// else +// _config.insert({ov::internal::exclusive_async_requests.name(), "NO"}); + +// _config.insert({ov::device::id.name(), device_id}); + +// _config.insert({ov::hint::performance_mode.name(), ov::util::to_string(hintPerfMode)}); +// _config.insert({ov::hint::num_requests.name(), std::to_string(hintNumRequests)}); +// } + +// void Config::applyRtInfo(const std::shared_ptr& model) { +// // if user sets explicitly, it will be higher priority than rt_info +// if (!kvCachePrecisionSetExplicitly && +// model->has_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()})) { +// this->kvCachePrecision = +// model->get_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()}); +// } +// if (!fcDynamicQuantizationGroupSizeSetExplicitly && +// model->has_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()})) { +// this->fcDynamicQuantizationGroupSize = +// model->get_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()}); +// } +// } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/config_new.hpp b/src/plugins/intel_cpu/src/config_new.hpp new file mode 100644 index 00000000000000..ff6e717fd5cf32 --- /dev/null +++ b/src/plugins/intel_cpu/src/config_new.hpp @@ -0,0 +1,72 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +// #include +// #include +// #include + +// #include "openvino/core/type/element_type.hpp" +// #include "openvino/runtime/properties.hpp" +// #include "openvino/runtime/threading/istreams_executor.hpp" +// #include "openvino/util/common_util.hpp" +// #include "utils/debug_caps_config.h" + +#include "openvino/runtime/plugin_config.hpp" +#include "openvino/runtime/internal_properties.hpp" +#include "openvino/runtime/intel_cpu/properties.hpp" +#include "internal_properties.hpp" + +#include "utils/general_utils.h" + +namespace ov { +namespace intel_cpu { + +struct ExecutionConfig : public ov::PluginConfig { + ExecutionConfig(); + ExecutionConfig(std::initializer_list values) : ExecutionConfig() { set_property(ov::AnyMap(values)); } + explicit ExecutionConfig(const ov::AnyMap& properties) : ExecutionConfig() { set_property(properties); } + explicit ExecutionConfig(const ov::AnyMap::value_type& property) : ExecutionConfig() { set_property(property); } + + ExecutionConfig(const ExecutionConfig& other); + ExecutionConfig& operator=(const ExecutionConfig& other); + + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_GETTERS(__VA_ARGS__) + #include "options.inl" + #undef OV_CONFIG_OPTION + + void finalize_impl(std::shared_ptr context) override; + void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override; + + // TODO: move to GraphContext + ov::threading::IStreamsExecutor::Config streamExecutorConfig; + // TODO: make local for streams calculation logic + int modelPreferThreads = -1; + // TODO: move to GraphContext + enum class ModelType { CNN, LLM, Unknown }; + ModelType modelType = ModelType::Unknown; + + bool DAZOn = false; + + std::vector> streamsRankTable; + int streamsRankLevel = 1; + int numSubStreams = 0; + bool enableNodeSplit = false; + +private: + void set_default_values(); + void apply_user_properties(); + void apply_hints(); + void apply_execution_hints(); + void apply_performance_hints(); + const ov::PluginConfig::OptionsDesc& get_options_desc() const override; + + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + #include "options.inl" + #undef OV_CONFIG_OPTION +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index 3af6a52d5f3342..3b34b7000d5a66 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -672,39 +672,48 @@ std::vector> generate_stream_info(const int streams, Config& config, std::vector>& proc_type_table, int preferred_nthreads_per_stream) { - int model_prefer_threads = preferred_nthreads_per_stream; - proc_type_table = apply_scheduling_core_type(config.schedulingCoreType, proc_type_table); + auto threadsPerStream = config.streamExecutorConfig.get_threads_per_stream(); - proc_type_table = apply_hyper_threading(config.enableHyperThreading, - config.changedHyperThreading, - ov::util::to_string(config.hintPerfMode), + int model_prefer_threads = preferred_nthreads_per_stream; + auto core_type = config.get_scheduling_core_type(); + proc_type_table = apply_scheduling_core_type(core_type, proc_type_table); + config.set_property(ov::hint::scheduling_core_type(core_type)); + + auto enable_hyper_threading = config.get_enable_hyper_threading(); + proc_type_table = apply_hyper_threading(enable_hyper_threading, + config.is_set_by_user(ov::hint::enable_hyper_threading), + ov::util::to_string(config.get_performance_mode()), proc_type_table); + config.set_property(ov::hint::enable_hyper_threading(enable_hyper_threading)); + if (-1 == preferred_nthreads_per_stream) { model_prefer_threads = get_model_prefer_threads(streams, proc_type_table, model, config); } - auto streams_info_table = get_streams_info_table(config.streams, - config.streamsChanged, - config.threads, - config.hintNumRequests, + auto streams_info_table = get_streams_info_table(config.get_num_streams(), + config.is_set_by_user(ov::num_streams), + config.get_inference_num_threads(), + config.get_num_requests(), model_prefer_threads, input_current_socket_id, - ov::util::to_string(config.hintPerfMode), - config.modelDistributionPolicy, + ov::util::to_string(config.get_performance_mode()), + config.get_model_distribution_policy(), proc_type_table); // streams_info_table = {{1, 1, 56, 1, 1}, {-1, 1, 28, 1, 1}, {-1, 1, 28, 0, 0}}; - if (config.modelDistributionPolicy.find(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) != - config.modelDistributionPolicy.end()) { + auto modelDistributionPolicy = config.get_model_distribution_policy(); + if (modelDistributionPolicy.find(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) != modelDistributionPolicy.end()) { config.streamsRankTable = get_streams_rank_table(streams_info_table, config.streamsRankLevel, config.numSubStreams); } + auto enable_cpu_pinning = config.get_enable_cpu_pinning(); auto cpu_pinning = - get_cpu_pinning(config.enableCpuPinning, config.changedCpuPinning, proc_type_table, streams_info_table); + get_cpu_pinning(enable_cpu_pinning, config.is_set_by_user(ov::hint::enable_cpu_pinning), proc_type_table, streams_info_table); + config.set_property(ov::hint::enable_cpu_pinning(cpu_pinning)); config.streamExecutorConfig = IStreamsExecutor::Config{"CPUStreamsExecutor", - config.streams, - config.threadsPerStream, + config.get_num_streams(), + threadsPerStream, ov::hint::SchedulingCoreType::ANY_CORE, false, cpu_pinning, diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index aab78a4d5f15bd..79ce3b61d3685c 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -55,8 +55,8 @@ namespace ov { namespace intel_cpu { Graph::~Graph() { - CPU_DEBUG_CAP_ENABLE(summary_perf(*this)); - CPU_DEBUG_CAP_ENABLE(average_counters(*this)); + CPU_DEBUG_CAP_ENABLE(dump_summary_perf(*this)); + CPU_DEBUG_CAP_ENABLE(dump_average_counters(*this)); } template @@ -1409,9 +1409,9 @@ class UpdateNodes : public UpdateNodesBase { /* group all the profiling macros into a single one * to avoid cluttering a core logic */ #define VERBOSE_PERF_DUMP_ITT_DEBUG_LOG(ittScope, node, config) \ - VERBOSE(node, config.debugCaps.verbose); \ - PERF(node, config.collectPerfCounters); \ - DUMP(node, config.debugCaps, infer_count); \ + VERBOSE(node, config.get_verbose()); \ + PERF(node, config.get_enable_profiling()); \ + DUMP(node, config, infer_count); \ OV_ITT_SCOPED_TASK(ittScope, node->profiling.execute); \ DEBUG_LOG(*node); @@ -1452,7 +1452,7 @@ static int GetNumaNodeId(const GraphContext::CPtr& context) { int numaNodeId = -1; #if defined(__x86_64__) && defined(__linux__) if ((context->getCPUStreamExecutor()) && - (context->getConfig().hintPerfMode == ov::hint::PerformanceMode::LATENCY)) { + (context->getConfig().get_performance_mode() == ov::hint::PerformanceMode::LATENCY)) { numaNodeId = context->getCPUStreamExecutor()->get_numa_node_id(); } #endif @@ -1788,7 +1788,7 @@ bool Graph::InsertNode(NodePtr parent, NodePtr child, NodePtr node, int parentPo void Graph::EnforceInferencePrecision() { CPU_DEBUG_CAP_ENABLE(EnforceInferPrcDebug inferPrecDebug); - const auto inferPrec = getConfig().inferencePrecision; + const auto inferPrec = getConfig().get_inference_precision(); if (one_of(inferPrec, element::f32, element::undefined, ov::element::f16)) return; // nothing to do, only precision reduction is currently allowed diff --git a/src/plugins/intel_cpu/src/graph_context.cpp b/src/plugins/intel_cpu/src/graph_context.cpp index 462cdab2a9b5c0..2707f92f05c9db 100644 --- a/src/plugins/intel_cpu/src/graph_context.cpp +++ b/src/plugins/intel_cpu/src/graph_context.cpp @@ -22,7 +22,7 @@ GraphContext::GraphContext(const Config& config, subMemoryManager(sub_memory_manager), memoryStatesRegister(std::make_shared()), networkMemoryControl(std::make_shared()) { - rtParamsCache = std::make_shared(config.rtCacheCapacity); + rtParamsCache = std::make_shared(config.get_cpu_runtime_cache_capacity()); // primitive/executors can be shared across sub-stream // but scratch pad cannot be shared. numNumaNodes = 1; diff --git a/src/plugins/intel_cpu/src/graph_dumper.cpp b/src/plugins/intel_cpu/src/graph_dumper.cpp index ffd58fdb162899..ef516e47ba0bcc 100644 --- a/src/plugins/intel_cpu/src/graph_dumper.cpp +++ b/src/plugins/intel_cpu/src/graph_dumper.cpp @@ -218,7 +218,7 @@ std::shared_ptr dump_graph_as_ie_ngraph_net(const Graph& graph) { #ifdef CPU_DEBUG_CAPS void serialize(const Graph& graph) { - const std::string& path = graph.getConfig().debugCaps.execGraphPath; + const std::string& path = graph.getConfig().get_exec_graph_path(); if (path.empty()) return; @@ -264,11 +264,11 @@ void serializeToCout(const Graph& graph) { } } -void summary_perf(const Graph& graph) { +void dump_summary_perf(const Graph& graph) { if (!graph.getGraphContext()) { return; } - const std::string& summaryPerf = graph.getConfig().debugCaps.summaryPerf; + const std::string& summaryPerf = graph.getConfig().get_summary_perf(); if (summaryPerf.empty() || !std::stoi(summaryPerf)) return; @@ -349,7 +349,7 @@ void summary_perf(const Graph& graph) { } } -void average_counters(const Graph& graph) { +void dump_average_counters(const Graph& graph) { /** * @todo improve logic for a graph with inner graphs: * - collect counters only for the outer graph if full path is specified @@ -358,7 +358,7 @@ void average_counters(const Graph& graph) { * For example: 0_MyModel.csv */ - const std::string& path = graph.getConfig().debugCaps.averageCountersPath; + const std::string& path = graph.getConfig().get_average_counters(); if (path.empty()) return; diff --git a/src/plugins/intel_cpu/src/graph_dumper.h b/src/plugins/intel_cpu/src/graph_dumper.h index 40af2fd44c61e6..dd97346aad1792 100644 --- a/src/plugins/intel_cpu/src/graph_dumper.h +++ b/src/plugins/intel_cpu/src/graph_dumper.h @@ -14,8 +14,8 @@ namespace intel_cpu { std::shared_ptr dump_graph_as_ie_ngraph_net(const Graph& graph); #ifdef CPU_DEBUG_CAPS void serialize(const Graph& graph); -void summary_perf(const Graph& graph); -void average_counters(const Graph& graph); +void dump_summary_perf(const Graph& graph); +void dump_average_counters(const Graph& graph); #endif // CPU_DEBUG_CAPS } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/internal_properties.hpp b/src/plugins/intel_cpu/src/internal_properties.hpp index 7f3feb59779bd4..12e8c84b79af59 100644 --- a/src/plugins/intel_cpu/src/internal_properties.hpp +++ b/src/plugins/intel_cpu/src/internal_properties.hpp @@ -68,5 +68,27 @@ inline std::istream& operator>>(std::istream& is, SnippetsMode& mode) { */ static constexpr Property snippets_mode{"SNIPPETS_MODE"}; +/** + * @brief Enables fast-math mode for ARM Compute Library (ACL). + */ +static constexpr Property acl_fast_math{"ACL_FAST_MATH"}; + + +enum class BlobDumpFormat { + BIN, + TEXT, +}; + +static constexpr Property verbose{"VERBOSE"}; +static constexpr Property exec_graph_path{"EXEC_GRAPH_PATH"}; +static constexpr Property average_counters{"AVERAGE_COUNTERS"}; +static constexpr Property blob_dump_dir{"BLOB_DUMP_DIR"}; +static constexpr Property blob_dump_format{"BLOB_DUMP_FORMAT"}; +static constexpr Property blob_dump_node_exec_id{"BLOB_DUMP_NODE_EXEC_ID"}; +static constexpr Property blob_dump_node_ports{"BLOB_DUMP_NODE_PORTS"}; +static constexpr Property blob_dump_node_type{"BLOB_DUMP_NODE_TYPE"}; +static constexpr Property blob_dump_node_name{"BLOB_DUMP_NODE_NAME"}; +static constexpr Property summary_perf{"SUMMARY_PERF"}; + } // namespace intel_cpu } // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 2df6c0ae7522cc..46977f9a063904 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -470,8 +470,8 @@ void FullyConnected::initSupportedPrimitiveDescriptors() { attrs.sparseWeights = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS)->getParent(), getOriginalInputPrecisionAtPort(DATA), - context->getConfig().fcSparseWeiDecompressionRate); - attrs.dynamicQuantizationGroupSize = context->getConfig().fcDynamicQuantizationGroupSize; + context->getConfig().get_sparse_weights_decompression_rate()); + attrs.dynamicQuantizationGroupSize = context->getConfig().get_dynamic_quantization_group_size(); attrs.modelType = context->getConfig().modelType; postOps = getPostOps(fusedWith); diff --git a/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp b/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp index b475a602c3cd1a..a3c97a5d0862d1 100644 --- a/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp +++ b/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp @@ -503,7 +503,7 @@ LLMMLP::LLMMLP(const std::shared_ptr& op, const GraphContext::CPtr con : Node(op, context, NgraphShapeInferFactory(op)) { std::string errorMessage; const auto& config = context->getConfig(); - if (!isSupportedOperation(op, errorMessage, config.fcDynamicQuantizationGroupSize)) { + if (!isSupportedOperation(op, errorMessage, config.get_dynamic_quantization_group_size())) { OPENVINO_THROW("CPU: " + errorMessage); } const auto node_mlp = std::dynamic_pointer_cast(op); diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index 5a0bd7a1e3dff1..703260b9c4ed26 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -13,6 +13,7 @@ #include "scaled_attn.h" #include "shape_inference/shape_inference_pass_through.hpp" #include "utils/general_utils.h" +#include "openvino/util/common_util.hpp" using namespace dnnl; diff --git a/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp b/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp index ce7bfae07591d6..f07456aeffe389 100644 --- a/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp +++ b/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp @@ -343,7 +343,7 @@ QKVProjection::QKVProjection(const std::shared_ptr& op, const GraphCon if (concurrency == 0) concurrency = parallel_get_max_threads(); - if (!isSupportedOperation(op, errorMessage, concurrency, config.fcDynamicQuantizationGroupSize)) { + if (!isSupportedOperation(op, errorMessage, concurrency, config.get_dynamic_quantization_group_size())) { OPENVINO_THROW("CPU: " + errorMessage); } const auto node = std::dynamic_pointer_cast(op); diff --git a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp index 7fe3fc8dc5045d..f1eaad0c53be2a 100644 --- a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp +++ b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp @@ -1834,7 +1834,7 @@ void ScaledDotProductAttention::updatePastkv(const MemoryPtr& mem_cur_k, const M ov::element::Type ScaledDotProductAttention::getKVCachePrecision() { ov::element::Type kvcache_precision; auto rtPrecision = getRuntimePrecision(); - auto kvCachePrecisionHint = context->getConfig().kvCachePrecision; + auto kvCachePrecisionHint = context->getConfig().get_kv_cache_precision(); bool enableKVCacheFP16 = m_config.config.fuse_concat && mayiuse(cpu_isa_t::avx2) && rtPrecision != ov::element::bf16 && kvCachePrecisionHint == ov::element::f16; kvcache_precision = enableKVCacheFP16 ? ov::element::f16 : rtPrecision; diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index 2b0c7b55fb043d..f2ff8ad03ff5a1 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -460,9 +460,9 @@ void Subgraph::initSupportedPrimitiveDescriptors() { const auto originalInputPrecision = getOriginalInputPrecisionAtPort(i); const auto precision = ((originalInputPrecision == ov::element::f32) && - one_of(context->getConfig().inferencePrecision, ov::element::bf16, ov::element::f16) && + one_of(context->getConfig().get_inference_precision(), ov::element::bf16, ov::element::f16) && subgraph_attrs->snippet->has_domain_sensitive_ops()) - ? context->getConfig().inferencePrecision + ? context->getConfig().get_inference_precision() : originalInputPrecision; if (supportedPrecisions.count(precision) == 0) OPENVINO_THROW("Subgraph node with name `", getName(), "` doesn't support ", precision, " precision."); @@ -654,7 +654,7 @@ Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() { ov::snippets::pass::Canonicalization, ov::snippets::pass::AnalyzeBroadcastableInputs, broadcastable_inputs); - if (one_of(context->getConfig().inferencePrecision, ov::element::bf16, ov::element::f16) && + if (one_of(context->getConfig().get_inference_precision(), ov::element::bf16, ov::element::f16) && subgraph_attrs->snippet->has_domain_sensitive_ops()) { // enforce BF16 precisions to supported operations // MatMul has to be decomposed to Brgemm operations before enforcement @@ -664,7 +664,7 @@ Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() { ov::snippets::pass::MatMulToBrgemm, pass::EnforcePrecision, element::f32, - context->getConfig().inferencePrecision); + context->getConfig().get_inference_precision()); } SNIPPETS_REGISTER_PASS_RELATIVE_X86_64(Place::Before, ov::snippets::pass::PropagatePrecision, diff --git a/src/plugins/intel_cpu/src/options.inl b/src/plugins/intel_cpu/src/options.inl new file mode 100644 index 00000000000000..a4028b2269d378 --- /dev/null +++ b/src/plugins/intel_cpu/src/options.inl @@ -0,0 +1,76 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// Namespace, property name, default value, [validator], description + +// OV_CONFIG_RELEASE_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty") // ??? +OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f32, + [](ov::element::Type val) { return one_of(val, ov::element::f32, ov::element::bf16, ov::element::f16, ov::element::undefined); }, + "Model floating-point inference precision. Supported values: f32, bf16, f16, undefined") +OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, + "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc") +OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, + "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy") + +OV_CONFIG_RELEASE_OPTION(ov, num_streams, 1, "Defines number of streams to be used for inference") +OV_CONFIG_RELEASE_OPTION(ov, inference_num_threads, 0, "Defines maximum number of threads that can be used for inference tasks") +OV_CONFIG_RELEASE_OPTION(ov::hint, num_requests, 0, "Provides number of requests populated by the application") // TODO: Do we need validator? +OV_CONFIG_RELEASE_OPTION(ov::internal, exclusive_async_requests, false, "") + +OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not") +OV_CONFIG_RELEASE_OPTION(ov::hint, enable_hyper_threading, false, "Defined if hyper threading is used during inference") +OV_CONFIG_RELEASE_OPTION(ov::hint, scheduling_core_type, ov::hint::SchedulingCoreType::ANY_CORE, "Defines CPU core type which can be used during inference") + +OV_CONFIG_RELEASE_OPTION(ov::hint, model_distribution_policy, {}, + [](std::set val) { + for (auto& row : val) { + if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)) + return false; + } + return true; + }, + "Defines model distribution policy for inference with multiple sockets/devices. Supported values: TENSOR_PARALLEL") + + +OV_CONFIG_RELEASE_OPTION(ov::hint, dynamic_quantization_group_size, 0, + "Defines group size for dynamic quantization optimization. Supported values: [0, UINT64_MAX], where 0 - disabled DQ, UINT64_MAX - per-tensor DQ") +OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::f32, + [](ov::element::Type val) { return one_of(val, ov::element::f32, ov::element::f16, ov::element::bf16, ov::element::u8); }, + "Specifies precision for kv cache compression. Supported values: f32, bf16, f16, u8") + +OV_CONFIG_RELEASE_OPTION(ov::intel_cpu, cpu_runtime_cache_capacity, 0, + [](int val) { return val >= 0; }, + "Defines how many records can be stored in the CPU runtime parameters cache per CPU runtime parameter type per stream. Supported values: [0, INT32_MAX]") +OV_CONFIG_RELEASE_OPTION(ov::intel_cpu, sparse_weights_decompression_rate, 1.0f, + [](float val) { return val >= 0.f && val <= 1.f; }, + "Defines threshold for sparse weights decompression feature activation (1.0 means the feature is disabled). Supported values: [0.0f, 1.0f]") +OV_CONFIG_RELEASE_OPTION(ov::intel_cpu, denormals_optimization, nullptr, + "DefineS whether to perform denormals optimization (enables FTZ and DAZ)") + +OV_CONFIG_RELEASE_OPTION(ov::intel_cpu, lp_transforms_mode, false, "Defines if Low Precision Trasformations (LPT) should be enabled") +OV_CONFIG_RELEASE_OPTION(ov::intel_cpu, snippets_mode, SnippetsMode::ENABLE, + [](ov::intel_cpu::SnippetsMode val) { return one_of(val, ov::intel_cpu::SnippetsMode::ENABLE, ov::intel_cpu::SnippetsMode::DISABLE, ov::intel_cpu::SnippetsMode::IGNORE_CALLBACK); }, + "Defines Snippets code generation pipeline mode. Supported values: ov::intel_cpu::SnippetsMode::ENABLE/DISABLE/IGNORE_CALLBACK") + +OV_CONFIG_RELEASE_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin") +OV_CONFIG_RELEASE_OPTION(ov::log, level, ov::log::Level::NO, "Defines Log level") +OV_CONFIG_RELEASE_OPTION(ov::device, id, "", "ID of the current device") + +OV_CONFIG_RELEASE_OPTION(ov, cache_encryption_callbacks, EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model") +OV_CONFIG_RELEASE_OPTION(ov::internal, caching_with_mmap, true, "Defines if caching with mmap should be enabled") + +#if defined(OV_CPU_WITH_ACL) + OV_CONFIG_RELEASE_OPTION(ov::intel_cpu, acl_fast_math, false, "Defines if ACL fast-math mode should be enabled") +#endif + +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, verbose, "0", "Enables logging for debugging purposes.") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, exec_graph_path, "", "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, average_counters, "", "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, blob_dump_dir, "cpu_dump", "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, blob_dump_format, BlobDumpFormat::TEXT, "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, blob_dump_node_exec_id, "", "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, blob_dump_node_ports, "", "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, blob_dump_node_type, "", "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, blob_dump_node_name, "", "") +OV_CONFIG_DEBUG_OPTION(ov::intel_cpu, summary_perf, "", "") \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index db55c728df725e..1359138b53fcc2 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -135,6 +135,7 @@ Plugin::Plugin() : deviceFullName(getDeviceFullName()), specialSetup(new CPUSpec auto& ov_version = ov::get_openvino_version(); m_compiled_model_runtime_properties["OV_VERSION"] = std::string(ov_version.buildNumber); m_msg_manager = ov::threading::message_manager(); + m_remote_context = std::make_shared(get_device_name()); } Plugin::~Plugin() { @@ -144,24 +145,35 @@ Plugin::~Plugin() { executor_manager()->clear("CPUCallbackExecutor"); } -static bool streamsSet(const ov::AnyMap& config) { - return config.count(ov::num_streams.name()); +namespace { + +ov::RTMap get_rt_info(const ov::Model& model) { + ov::RTMap rt_info; + if (model.has_rt_info("runtime_options")) + rt_info = model.get_rt_info("runtime_options"); + + if (model.has_rt_info("__weights_path")) { + rt_info[ov::weights_path.name()] = model.get_rt_info("__weights_path"); + } + return rt_info; } +} // namespace + void Plugin::get_performance_streams(Config& config, const std::shared_ptr& model) const { - int streams_set = config.streams; + int streams_set = config.get_num_streams(); int streams; - if (config.streamsChanged) { + if (config.is_set_by_user(ov::num_streams)) { streams = streams_set; - } else if (config.hintPerfMode == ov::hint::PerformanceMode::LATENCY) { + } else if (config.get_performance_mode() == ov::hint::PerformanceMode::LATENCY) { streams = 1; - } else if (config.hintPerfMode == ov::hint::PerformanceMode::THROUGHPUT) { + } else if (config.get_performance_mode() == ov::hint::PerformanceMode::THROUGHPUT) { streams = 0; } else { streams = streams_set == 1 ? 0 : streams_set; } - if (!((0 == streams_set) && config.streamsChanged)) { + if (!((0 == streams_set) && config.is_set_by_user(ov::num_streams))) { get_num_streams(streams, model, config); } else { config.streamExecutorConfig = IStreamsExecutor::Config{"CPUStreamsExecutor", streams}; @@ -169,6 +181,11 @@ void Plugin::get_performance_streams(Config& config, const std::shared_ptr& model, bool imported) const { + + conf.streamExecutorConfig.set_property(ov::num_streams.name(), conf.get_property(ov::num_streams.name()).as()); + conf.streamExecutorConfig.set_property(ov::inference_num_threads.name(), conf.get_property(ov::inference_num_threads.name()).as()); + // conf.streamExecutorConfig.set_property(ov::threads_per_stream.name(), conf.get_property(ov::threads_per_stream.name())); + const auto model_prefer_name = std::string("MODEL_PREFER_THREADS"); if (imported && model->has_rt_info("intel_cpu_hints_config")) { // load model_prefer_threads from cache @@ -207,7 +224,7 @@ static Config::ModelType getModelType(const std::shared_ptr& model) } std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, - const ov::AnyMap& orig_config) const { + const ov::AnyMap& properties) const { OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Plugin::compile_model"); CREATE_DEBUG_TIMER(debugLoadTimer); @@ -239,27 +256,26 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< } } - auto config = orig_config; + // auto config = orig_config; const std::shared_ptr cloned_model = model->clone(); - Config::ModelType modelType = getModelType(model); DEBUG_LOG(PrintableModel(*cloned_model, "org_")); // update the props after the perf mode translated to configs // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not? - Config conf = engConfig; - conf.applyRtInfo(cloned_model); - conf.readProperties(config, modelType); + // Config conf = engConfig; + // conf.applyRtInfo(cloned_model); + // conf.readProperties(config, modelType); - Transformations transformations(cloned_model, conf); + Config config = m_plugin_config; + config.set_user_property(properties, OptionVisibility::RELEASE); + config.modelType = getModelType(model); - transformations.UpToLpt(); + Transformations transformations(cloned_model, config); - calculate_streams(conf, cloned_model); + transformations.UpToLpt(); - if (!conf.cacheEncrypt || !conf.cacheDecrypt) { - conf.cacheEncrypt = codec_xor_str; - conf.cacheDecrypt = codec_xor_str; - } + calculate_streams(config, cloned_model); + config.finalize(get_default_context(), get_rt_info(*model)); transformations.PostLpt(); transformations.Snippets(); @@ -288,85 +304,90 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< } // SSE runtime check is needed for some ATOM machine, which is x86-64 but w/o SSE - static Xbyak::util::Cpu cpu; - if (cpu.has(Xbyak::util::Cpu::tSSE)) { - if (conf.denormalsOptMode == Config::DenormalsOptMode::DO_On) { + if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::sse41)) { + auto denormals_optimization = config.get_denormals_optimization(); + if (denormals_optimization && *denormals_optimization == true) { flush_to_zero(true); - conf.DAZOn = denormals_as_zero(true); - } else if (conf.denormalsOptMode == Config::DenormalsOptMode::DO_Off) { + config.DAZOn = denormals_as_zero(true); + } else if (denormals_optimization && *denormals_optimization == false) { flush_to_zero(false); denormals_as_zero(false); } } - return std::make_shared(cloned_model, shared_from_this(), conf, false); + + return std::make_shared(cloned_model, shared_from_this(), config, false); } void Plugin::set_property(const ov::AnyMap& config) { - // @todo after Legacy configuration is dropped, use some wrapper class to keep both the property and - // "ifSetExplicitly" flag - streamsExplicitlySetForEngine = streamsSet(config); - - engConfig.readProperties(config); + m_plugin_config.set_user_property(config, OptionVisibility::RELEASE); } ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) const { - if (name == ov::optimal_number_of_infer_requests) { - const auto streams = engConfig.streamExecutorConfig.get_streams(); - return decltype(ov::optimal_number_of_infer_requests)::value_type( - streams); // ov::optimal_number_of_infer_requests has no negative values - } else if (name == ov::num_streams) { - const auto streams = engConfig.streamExecutorConfig.get_streams(); - return decltype(ov::num_streams)::value_type( - streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2) - OPENVINO_SUPPRESS_DEPRECATED_START - } else if (name == ov::affinity) { - const auto affinity = engConfig.threadBindingType; - switch (affinity) { - case IStreamsExecutor::ThreadBindingType::NONE: - return ov::Affinity::NONE; - case IStreamsExecutor::ThreadBindingType::CORES: - return ov::Affinity::CORE; - case IStreamsExecutor::ThreadBindingType::NUMA: - return ov::Affinity::NUMA; - case IStreamsExecutor::ThreadBindingType::HYBRID_AWARE: - return ov::Affinity::HYBRID_AWARE; - } - return ov::Affinity::NONE; - OPENVINO_SUPPRESS_DEPRECATED_END - } else if (name == ov::device::id.name()) { - return decltype(ov::device::id)::value_type{engConfig.device_id}; - } else if (name == ov::inference_num_threads) { - const auto threads = engConfig.streamExecutorConfig.get_threads(); - return decltype(ov::inference_num_threads)::value_type(threads); - } else if (name == ov::enable_profiling.name()) { - const bool perfCount = engConfig.collectPerfCounters; - return decltype(ov::enable_profiling)::value_type(perfCount); - } else if (name == ov::hint::inference_precision) { - return decltype(ov::hint::inference_precision)::value_type(engConfig.inferencePrecision); - } else if (name == ov::hint::performance_mode) { - return engConfig.hintPerfMode; - } else if (name == ov::hint::enable_cpu_pinning) { - const bool pin_value = engConfig.enableCpuPinning; - return decltype(ov::hint::enable_cpu_pinning)::value_type(pin_value); - } else if (name == ov::hint::scheduling_core_type) { - const auto core_type = engConfig.schedulingCoreType; - return core_type; - } else if (name == ov::hint::model_distribution_policy) { - const auto& distribution_policy = engConfig.modelDistributionPolicy; - return distribution_policy; - } else if (name == ov::hint::enable_hyper_threading) { - const bool ht_value = engConfig.enableHyperThreading; - return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value); - } else if (name == ov::hint::num_requests) { - return decltype(ov::hint::num_requests)::value_type(engConfig.hintNumRequests); - } else if (name == ov::hint::execution_mode) { - return engConfig.executionMode; - } else if (name == ov::internal::compiled_model_runtime_properties.name()) { + // if (name == ov::optimal_number_of_infer_requests) { + // const auto streams = engConfig.streamExecutorConfig.get_streams(); + // return decltype(ov::optimal_number_of_infer_requests)::value_type( + // streams); // ov::optimal_number_of_infer_requests has no negative values + // } else if (name == ov::num_streams) { + // const auto streams = engConfig.streamExecutorConfig.get_streams(); + // return decltype(ov::num_streams)::value_type( + // streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2) + // OPENVINO_SUPPRESS_DEPRECATED_START + // } else if (name == ov::affinity) { + // const auto affinity = engConfig.threadBindingType; + // switch (affinity) { + // case IStreamsExecutor::ThreadBindingType::NONE: + // return ov::Affinity::NONE; + // case IStreamsExecutor::ThreadBindingType::CORES: + // return ov::Affinity::CORE; + // case IStreamsExecutor::ThreadBindingType::NUMA: + // return ov::Affinity::NUMA; + // case IStreamsExecutor::ThreadBindingType::HYBRID_AWARE: + // return ov::Affinity::HYBRID_AWARE; + // } + // return ov::Affinity::NONE; + // OPENVINO_SUPPRESS_DEPRECATED_END + // } else if (name == ov::device::id.name()) { + // return decltype(ov::device::id)::value_type{engConfig.device_id}; + // } else if (name == ov::inference_num_threads) { + // const auto threads = engConfig.streamExecutorConfig.get_threads(); + // return decltype(ov::inference_num_threads)::value_type(threads); + // } else if (name == ov::enable_profiling.name()) { + // const bool perfCount = engConfig.collectPerfCounters; + // return decltype(ov::enable_profiling)::value_type(perfCount); + // } else if (name == ov::hint::inference_precision) { + // return decltype(ov::hint::inference_precision)::value_type(engConfig.inferencePrecision); + // } else if (name == ov::hint::performance_mode) { + // return engConfig.hintPerfMode; + // } else if (name == ov::hint::enable_cpu_pinning) { + // const bool pin_value = engConfig.enableCpuPinning; + // return decltype(ov::hint::enable_cpu_pinning)::value_type(pin_value); + // } else if (name == ov::hint::scheduling_core_type) { + // const auto core_type = engConfig.schedulingCoreType; + // return core_type; + // } else if (name == ov::hint::model_distribution_policy) { + // const auto& distribution_policy = engConfig.modelDistributionPolicy; + // return distribution_policy; + // } else if (name == ov::hint::enable_hyper_threading) { + // const bool ht_value = engConfig.enableHyperThreading; + // return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value); + // } else if (name == ov::hint::num_requests) { + // return decltype(ov::hint::num_requests)::value_type(engConfig.hintNumRequests); + // } else if (name == ov::hint::execution_mode) { + // return engConfig.executionMode; + // } else if (name == ov::log::level) { + // return engConfig.logLevel; + // } else if (name == ov::internal::exclusive_async_requests.name()) { + // return engConfig.exclusiveAsyncRequests; + // } else if (name == ov::hint::dynamic_quantization_group_size) { + // return decltype(ov::hint::dynamic_quantization_group_size)::value_type( + // engConfig.fcDynamicQuantizationGroupSize); + // } else if (name == ov::hint::kv_cache_precision) { + // return decltype(ov::hint::kv_cache_precision)::value_type(engConfig.kvCachePrecision); + + if (name == ov::internal::compiled_model_runtime_properties.name()) { auto model_runtime_properties = ov::Any(m_compiled_model_runtime_properties); return decltype(ov::internal::compiled_model_runtime_properties)::value_type( std::move(model_runtime_properties.as())); - } else if (name == ov::log::level) { - return engConfig.logLevel; } else if (name == ov::internal::compiled_model_runtime_properties_supported.name()) { ov::Any res = true; auto it = options.find(ov::internal::compiled_model_runtime_properties.name()); @@ -383,13 +404,7 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) } } return res; - } else if (name == ov::internal::exclusive_async_requests.name()) { - return engConfig.exclusiveAsyncRequests; - } else if (name == ov::hint::dynamic_quantization_group_size) { - return decltype(ov::hint::dynamic_quantization_group_size)::value_type( - engConfig.fcDynamicQuantizationGroupSize); - } else if (name == ov::hint::kv_cache_precision) { - return decltype(ov::hint::kv_cache_precision)::value_type(engConfig.kvCachePrecision); + } return get_ro_property(name, options); } @@ -485,12 +500,6 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio } else if (name == ov::internal::caching_properties) { std::vector cachingProperties = {ov::device::full_name}; return decltype(ov::internal::caching_properties)::value_type(std::move(cachingProperties)); - } else if (name == ov::intel_cpu::denormals_optimization) { - return decltype(ov::intel_cpu::denormals_optimization)::value_type(engConfig.denormalsOptMode == - Config::DenormalsOptMode::DO_On); - } else if (name == ov::intel_cpu::sparse_weights_decompression_rate) { - return decltype(ov::intel_cpu::sparse_weights_decompression_rate)::value_type( - engConfig.fcSparseWeiDecompressionRate); } else if (name == ov::execution_devices) { return decltype(ov::execution_devices)::value_type{get_device_name()}; } else if (name == ov::device::type) { @@ -511,27 +520,27 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio #endif } - OPENVINO_THROW("Cannot get unsupported property: ", name); + return m_plugin_config.get_property(name, OptionVisibility::RELEASE); + // OPENVINO_THROW("Cannot get unsupported property: ", name); } -ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& model, const ov::AnyMap& config) const { +ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& model, const ov::AnyMap& properties) const { WeightsSharing::Ptr fake_w_cache; if (model == nullptr) { OPENVINO_THROW("Only ngraph-based models are supported!"); } - Config conf = engConfig; - Config::ModelType modelType = getModelType(model); - conf.applyRtInfo(model); - conf.readProperties(config, modelType); - - auto context = std::make_shared(conf, fake_w_cache, false); + Config config = m_plugin_config; + config.set_user_property(properties, OptionVisibility::RELEASE); + config.modelType = getModelType(model); + config.finalize(get_default_context(), get_rt_info(*model)); + auto context = std::make_shared(config, fake_w_cache, false); auto supported = ov::get_supported_nodes( model, [&](std::shared_ptr& model) { - Transformations transformation(model, conf); + Transformations transformation(model, config); transformation.UpToLpt(); transformation.PostLpt(); transformation.Snippets(); @@ -555,22 +564,30 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& return res; } -std::shared_ptr Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const { +std::shared_ptr Plugin::import_model(std::istream& model_stream, const ov::AnyMap& properties) const { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); CacheDecrypt decrypt{codec_xor}; bool decript_from_string = false; - if (config.count(ov::cache_encryption_callbacks.name())) { - auto encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as(); + if (properties.count(ov::cache_encryption_callbacks.name())) { + auto encryption_callbacks = properties.at(ov::cache_encryption_callbacks.name()).as(); decrypt.m_decrypt_str = encryption_callbacks.decrypt; decript_from_string = true; } - auto _config = config; + auto _properties = properties; std::shared_ptr model_buffer; - if (_config.count(ov::internal::cached_model_buffer.name())) { - model_buffer = _config.at(ov::internal::cached_model_buffer.name()).as>(); - _config.erase(ov::internal::cached_model_buffer.name()); + if (_properties.count(ov::internal::cached_model_buffer.name())) { + model_buffer = _properties.at(ov::internal::cached_model_buffer.name()).as>(); + _properties.erase(ov::internal::cached_model_buffer.name()); + } + + // check ov::loaded_from_cache property and erase it to avoid exception in readProperties. + const auto& it = _properties.find(ov::loaded_from_cache.name()); + bool loaded_from_cache = false; + if (it != _properties.end()) { + loaded_from_cache = it->second.as(); + _properties.erase(it); } ModelDeserializer deserializer( @@ -585,23 +602,18 @@ std::shared_ptr Plugin::import_model(std::istream& model_str std::shared_ptr model; deserializer >> model; - Config conf = engConfig; - Config::ModelType modelType = getModelType(model); - conf.applyRtInfo(model); - // check ov::loaded_from_cache property and erase it to avoid exception in readProperties. - const auto& it = _config.find(ov::loaded_from_cache.name()); - bool loaded_from_cache = false; - if (it != _config.end()) { - loaded_from_cache = it->second.as(); - _config.erase(it); - } - conf.readProperties(_config, modelType); + Config config = m_plugin_config; + config.set_user_property(properties, OptionVisibility::RELEASE); + config.modelType = getModelType(model); // import config props from caching model - calculate_streams(conf, model, true); - auto compiled_model = std::make_shared(model, shared_from_this(), conf, loaded_from_cache); + calculate_streams(config, model, true); + config.finalize(get_default_context(), get_rt_info(*model)); + + auto compiled_model = std::make_shared(model, shared_from_this(), config, loaded_from_cache); return compiled_model; } + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index a67602ec4ece12..a8c596b4acdaad 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -6,6 +6,7 @@ #include "compiled_model.h" #include "openvino/runtime/threading/cpu_message.hpp" +#include "remote_context.hpp" namespace ov { namespace intel_cpu { @@ -43,19 +44,22 @@ class Plugin : public ov::IPlugin { std::shared_ptr m_msg_manager; + std::shared_ptr get_default_context() const { + return m_remote_context; + } + private: ov::Any get_ro_property(const std::string& name, const ov::AnyMap& options) const; void get_performance_streams(Config& config, const std::shared_ptr& model) const; void calculate_streams(Config& conf, const std::shared_ptr& model, bool imported = false) const; - Config engConfig; - /* Explicily configured streams have higher priority than performance hints. - So track if streams is set explicitly (not auto-configured) */ - bool streamsExplicitlySetForEngine = false; + Config m_plugin_config; const std::string deviceFullName; ov::AnyMap m_compiled_model_runtime_properties; std::shared_ptr specialSetup; + + std::shared_ptr m_remote_context; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/remote_context.cpp b/src/plugins/intel_cpu/src/remote_context.cpp new file mode 100644 index 00000000000000..4c457c8d634e3d --- /dev/null +++ b/src/plugins/intel_cpu/src/remote_context.cpp @@ -0,0 +1,28 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "remote_context.hpp" + +namespace ov { +namespace intel_cpu { + +RemoteContextImpl::RemoteContextImpl(const std::string& device_name) : m_device_name(device_name) {} + +const ov::AnyMap& RemoteContextImpl::get_property() const { + return {}; +} + +ov::SoPtr RemoteContextImpl::create_tensor(const ov::element::Type& type, + const ov::Shape& shape, + const ov::AnyMap& params) { + // TODO: should we check `params` are not empty params? + return create_host_tensor(type, shape); +} + +const std::string& RemoteContextImpl::get_device_name() const { + return m_device_name; +} + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/remote_context.hpp b/src/plugins/intel_cpu/src/remote_context.hpp new file mode 100644 index 00000000000000..6cc58e34c96473 --- /dev/null +++ b/src/plugins/intel_cpu/src/remote_context.hpp @@ -0,0 +1,46 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/runtime/iremote_context.hpp" + +namespace ov { +namespace intel_cpu { + +class RemoteContextImpl : public ov::IRemoteContext { +public: + RemoteContextImpl(const std::string& device_name); + + /** + * @brief Returns name of a device on which underlying object is allocated. + * @return A device name string in fully specified format `[.[.]]` (e.g. GPU.0.1). + */ + const std::string& get_device_name() const override; + + /** + * @brief Returns a map of device-specific parameters + * @return A map of name/Any elements. + */ + const ov::AnyMap& get_property() const override; + + /** + * @brief Allocates memory tensor in device memory or wraps user-supplied memory handle + * using the specified tensor description and low-level device-specific parameters. + * Returns a pointer to the object that implements the RemoteTensor interface. + * @param type Defines the element type of the tensor. + * @param shape Defines the shape of the tensor. + * @param params Map of the low-level tensor object parameters. + * @return Pointer to a plugin object that implements the RemoteTensor interface. + */ + ov::SoPtr create_tensor(const ov::element::Type& type, + const ov::Shape& shape, + const ov::AnyMap& params) override; + +private: + std::string m_device_name; +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp index 9793c63de821ec..4159a6a2dcaa0b 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp @@ -61,7 +61,7 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr& model, const C IC, OC, G, - config.inferencePrecision); + config.get_inference_precision()); }); CPU_REGISTER_PASS_X64(manager, pass::ConvertFCToFCQuantizedLegacy); diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 469abbd99eb149..0872472ac47dfa 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -305,9 +305,9 @@ void Transformations::UpToLpt() { levels::int8, levels::int8_narrow_range}; - const bool useLpt = config.lpTransformsMode == Config::LPTransformsMode::On && + const bool useLpt = config.get_lp_transforms_mode() && LowPrecision::isFunctionQuantized(model, supported_fq_levels) && - CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(config.debugCaps, Lpt); + CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(config, Lpt); const auto defaultPrecisions = useLpt ? precision_set::get_int8_support() : std::vector{}; @@ -397,7 +397,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis if (!hasHardwareSupport(ov::element::bf16)) map.insert({ov::element::bf16, ov::element::f32}); // TODO: Remove 'hasHardwareSupport' when all nodes are able to handle f16 properly. - if (!one_of(config.inferencePrecision, element::f16, element::undefined) || !hasHardwareSupport(element::f16)) { + if (!one_of(config.get_inference_precision(), element::f16, element::undefined) || !hasHardwareSupport(element::f16)) { map.insert({ov::element::f16, ov::element::f32}); } return map; @@ -407,7 +407,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis // It cannot be static data, because it may be difference for different inferencePrecision const auto precisions = get_convert_precisions(); - if (config.inferencePrecision == ov::element::f16) { + if (config.get_inference_precision() == ov::element::f16) { precisions_map fp_convert_precision_map = {{ov::element::f32, ov::element::f16}}; #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) type_to_fuse_map fuse_map = {{ov::opset1::FakeQuantize::get_type_info_static(), fuse_type_to_fq}}; @@ -609,12 +609,12 @@ void Transformations::PreLpt(const std::vector& defaultPrecis // snippets pipeline as well, where MVN is decomposed to simple ops, these simple ops will not // tokenized into subgraph again. // CVS-134277 to fully enable GN as snippets to disable this GroupNormalizationDecomposition entirly. - if (node->is_dynamic() || !one_of(config.inferencePrecision, element::f32, element::undefined) || - config.snippetsMode == Config::SnippetsMode::Disable) + if (node->is_dynamic() || !one_of(config.get_inference_precision(), element::f32, element::undefined) || + config.get_snippets_mode() == SnippetsMode::DISABLE) return false; - if (config.snippetsMode != Config::SnippetsMode::IgnoreCallback) { + if (config.get_snippets_mode() != SnippetsMode::IGNORE_CALLBACK) { const auto group_norm = ov::as_type_ptr(node); - if (!group_norm || !implication(config.inferencePrecision == element::undefined, + if (!group_norm || !implication(config.get_inference_precision() == element::undefined, group_norm->get_element_type() == element::f32)) return false; const auto num_groups = static_cast(group_norm->get_num_groups()); @@ -902,12 +902,12 @@ void Transformations::PostLpt() { #if defined(OPENVINO_ARCH_X86_64) // MLP & QKV fusion optimizations is focused on throughput, only enabled on AMX-bf16 & LLM serving use cases. auto can_use_amx_bf16_int8 = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx) && - (config.inferencePrecision == element::bf16); + (config.get_inference_precision() == element::bf16); auto can_use_amx_fp16 = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx_fp16) && - (config.inferencePrecision == element::f16); + (config.get_inference_precision() == element::f16); if (can_use_amx_bf16_int8 || can_use_amx_fp16) { - const auto fcDynamicQuantizationGroupSize = config.fcDynamicQuantizationGroupSize; + const auto fcDynamicQuantizationGroupSize = config.get_dynamic_quantization_group_size(); CPU_REGISTER_PASS_X64(postLPTPassManager, MLPFusion); CPU_SET_CALLBACK_X64( postLPTPassManager, @@ -961,7 +961,7 @@ void Transformations::PostLpt() { ov::intel_cpu::DecomposeRMSNorm); // markup Rope Input when BF16/F16 inference. - if (one_of(config.inferencePrecision, ov::element::bf16, ov::element::f16)) { + if (one_of(config.get_inference_precision(), ov::element::bf16, ov::element::f16)) { CPU_REGISTER_PASS_COMMON(postLPTPassManager, ov::pass::MarkRopeInputsToKeepInMixedPrecision); CPU_REGISTER_PASS_COMMON(postLPTPassManager, ov::pass::MarkFloatingPointRange); } @@ -984,11 +984,11 @@ void Transformations::MainSnippets(void) { return false; }; - if (config.snippetsMode == Config::SnippetsMode::Disable || !is_supported_isa()) + if (config.get_snippets_mode() == SnippetsMode::DISABLE || !is_supported_isa()) return; // TODO [123659] Implement common logic to split optimization and limitation conditions - const auto ignoreCallback = config.snippetsMode == Config::SnippetsMode::IgnoreCallback; + const auto ignoreCallback = config.get_snippets_mode() == SnippetsMode::IGNORE_CALLBACK; // [111813]: At the moment Snippets supports Transpose on output of MHA pattern only if it is an one node between // MatMul and Result. However there may be Convert [f32->bf16] before Result since: @@ -996,7 +996,7 @@ void Transformations::MainSnippets(void) { // - CPU Node Subgraph requires bf16 on output when inference precision is bf16. // To avoid situations when Transpose is not alone node between MatMul and Result, // Plugin disables Transpose tokenization on output - bool mha_token_enable_transpose_on_output = one_of(config.inferencePrecision, element::f32, element::undefined); + bool mha_token_enable_transpose_on_output = one_of(config.get_inference_precision(), element::f32, element::undefined); size_t concurrency = config.streamExecutorConfig.get_threads_per_stream(); if (concurrency == 0) concurrency = parallel_get_max_threads(); @@ -1004,7 +1004,7 @@ void Transformations::MainSnippets(void) { // Runtime caching should be enabled in case of dynamic Subgraphs in CPU Plugin: to reduce overheads of // ShapeInference and CodeGeneration If runtime cache capacity is zero, it means that rtCache won't be used and we // shouldn't tokenize dynamic Subgraphs - it will lead to performance degradations - bool is_dynamic_mha_token_enabled = config.rtCacheCapacity != 0; + bool is_dynamic_mha_token_enabled = config.get_cpu_runtime_cache_capacity() != 0; #if defined(OPENVINO_ARCH_ARM64) // ARM has 32 gprs. After excluding 2 registers for work amounts, 1 register for runtime parameters, 1 platform // register, 3 registers for temporary use, and 2 stack related registers, it has 23 remaining registers. @@ -1036,7 +1036,7 @@ void Transformations::MainSnippets(void) { #if defined(OPENVINO_ARCH_ARM64) CPU_REGISTER_PASS_ARM(snippetsManager, SnippetsMarkSkipped); #else - CPU_REGISTER_PASS_X64(snippetsManager, SnippetsMarkSkipped, config.inferencePrecision == ov::element::bf16); + CPU_REGISTER_PASS_X64(snippetsManager, SnippetsMarkSkipped, config.get_inference_precision() == ov::element::bf16); #endif CPU_DISABLE_PASS_COMMON(snippetsManager, snippets::pass::TokenizeFCSnippets); } @@ -1048,11 +1048,11 @@ void Transformations::MainSnippets(void) { false; #else (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2) && - one_of(config.inferencePrecision, ov::element::f32, element::undefined)) || + one_of(config.get_inference_precision(), ov::element::f32, element::undefined)) || (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) && - one_of(config.inferencePrecision, ov::element::bf16, ov::element::f32, element::undefined)) || + one_of(config.get_inference_precision(), ov::element::bf16, ov::element::f32, element::undefined)) || (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx_fp16) && - one_of(config.inferencePrecision, ov::element::f16)); + one_of(config.get_inference_precision(), ov::element::f16)); #endif if (!isMHASupported) { CPU_DISABLE_PASS_COMMON(snippetsManager, snippets::pass::TokenizeMHASnippets); @@ -1067,13 +1067,13 @@ void Transformations::MainSnippets(void) { const auto in_type0 = matmul->get_input_element_type(0); const auto in_type1 = matmul->get_input_element_type(1); const auto is_fp32 = (in_type0 == ov::element::f32 && in_type1 == ov::element::f32 && - one_of(config.inferencePrecision, element::f32, element::undefined)); + one_of(config.get_inference_precision(), element::f32, element::undefined)); const auto is_fp16 = (in_type0 == ov::element::f16 || in_type1 == ov::element::f16) || - (in_type0 == element::f32 && in_type1 == ov::element::f32 && config.inferencePrecision == ov::element::f16); + (in_type0 == element::f32 && in_type1 == ov::element::f32 && config.get_inference_precision() == ov::element::f16); const auto is_bf16 = (in_type0 == ov::element::bf16 && in_type1 == ov::element::bf16) || ((in_type0 == element::f32 && in_type1 == ov::element::f32 && - config.inferencePrecision == ov::element::bf16)); + config.get_inference_precision() == ov::element::bf16)); const auto is_int8 = in_type0 == ov::element::i8; if (is_fp32) return true; @@ -1240,7 +1240,7 @@ void Transformations::MainSnippets(void) { snippets::pass::TokenizeSnippets); auto mm_supports_transpose_b = [this, ignoreCallback](const std::shared_ptr& n) { - MAYBE_UNUSED(config.inferencePrecision); + MAYBE_UNUSED(config.get_inference_precision()); if (!ignoreCallback) return false; // Note: BrgemmTPP doesn't support transposed KN natively @@ -1260,7 +1260,7 @@ void Transformations::MainSnippets(void) { } ov::element::TypeVector precisions; auto push_precision = [&](const ov::element::Type& precision) { - if (config.inferencePrecision == ov::element::bf16 && precision == ov::element::f32) + if (config.get_inference_precision() == ov::element::bf16 && precision == ov::element::f32) precisions.push_back(ov::element::bf16); else precisions.push_back(precision); @@ -1300,7 +1300,7 @@ void Transformations::PostSnippets(void) { } void Transformations::Snippets(void) { - const bool useSnippets = config.snippetsMode != Config::SnippetsMode::Disable && + const bool useSnippets = config.get_snippets_mode() != SnippetsMode::DISABLE && CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(config.debugCaps, Snippets); if (!useSnippets) return; diff --git a/src/plugins/intel_cpu/src/utils/ngraph_transformation.hpp b/src/plugins/intel_cpu/src/utils/ngraph_transformation.hpp index 356ca2f1141f82..610dcbff1de310 100644 --- a/src/plugins/intel_cpu/src/utils/ngraph_transformation.hpp +++ b/src/plugins/intel_cpu/src/utils/ngraph_transformation.hpp @@ -98,25 +98,25 @@ class TransformationDumper { } // namespace ov // 'EXPAND' wrapper is necessary to ensure __VA_ARGS__ behaves the same on all the platforms -# define CPU_DEBUG_CAP_EXPAND(x) x -# define CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_config, _type) \ - _config.disable.transformations.filter[DebugCapsConfig::TransformationFilter::Type::_type] -# define CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(...) \ - CPU_DEBUG_CAP_EXPAND(!CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(__VA_ARGS__)) -# define CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type) \ - OPENVINO_ASSERT(CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(_this->config.debugCaps, _type)); \ - auto dumperPtr = \ - _this->config.debugCaps.dumpIR.transformations.filter[DebugCapsConfig::TransformationFilter::Type::_type] \ - ? std::unique_ptr( \ - new TransformationDumper(_this->config.debugCaps, \ - DebugCapsConfig::TransformationFilter::Type::_type, \ - _this->model)) \ - : nullptr -# define CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(_this, _type) \ - if (CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_this->config.debugCaps, _type)) \ - return; \ - CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type) -#else +// # define CPU_DEBUG_CAP_EXPAND(x) x +// # define CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_config, _type) \ +// _config.disable.transformations.filter[DebugCapsConfig::TransformationFilter::Type::_type] +// # define CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(...) \ +// CPU_DEBUG_CAP_EXPAND(!CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(__VA_ARGS__)) +// # define CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type) \ +// OPENVINO_ASSERT(CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(_this->config.debugCaps, _type)); \ +// auto dumperPtr = \ +// _this->config.debugCaps.dumpIR.transformations.filter[DebugCapsConfig::TransformationFilter::Type::_type] \ +// ? std::unique_ptr( \ +// new TransformationDumper(_this->config.debugCaps, \ +// DebugCapsConfig::TransformationFilter::Type::_type, \ +// _this->model)) \ +// : nullptr +// # define CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(_this, _type) \ +// if (CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_this->config.debugCaps, _type)) \ +// return; \ +// CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type) +// #else # define CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_config, _type) false # define CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(...) true # define CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type) diff --git a/src/plugins/intel_cpu/src/utils/node_dumper.cpp b/src/plugins/intel_cpu/src/utils/node_dumper.cpp index 6e8a1a798256ed..f1175e09322f16 100644 --- a/src/plugins/intel_cpu/src/utils/node_dumper.cpp +++ b/src/plugins/intel_cpu/src/utils/node_dumper.cpp @@ -24,20 +24,16 @@ static void formatNodeName(std::string& name) { std::replace(name.begin(), name.end(), ':', '-'); } -static bool shouldBeDumped(const NodePtr& node, const DebugCapsConfig& config, const std::string& portsKind) { - const auto& dumpFilters = config.blobDumpFilters; - - if (dumpFilters.empty()) - return false; - - if (dumpFilters.count(DebugCapsConfig::FILTER::BY_PORTS)) { // filter by ports configured - if (dumpFilters.at(DebugCapsConfig::FILTER::BY_PORTS) != "ALL" && - portsKind != dumpFilters.at(DebugCapsConfig::FILTER::BY_PORTS)) +static bool shouldBeDumped(const NodePtr& node, const Config& config, const std::string& portsKind) { + const auto& filter_by_ports = config.get_blob_dump_node_ports(); + if (!filter_by_ports.empty()) { // filter by ports configured + if (filter_by_ports != "ALL" && portsKind != filter_by_ports) return false; } - if (dumpFilters.count(DebugCapsConfig::FILTER::BY_EXEC_ID)) { // filter by exec id configured - std::stringstream ss(dumpFilters.at(DebugCapsConfig::FILTER::BY_EXEC_ID)); + const auto& filter_by_exec_id = config.get_blob_dump_node_exec_id(); + if (!filter_by_exec_id.empty()) { // filter by exec id configured + std::stringstream ss(filter_by_exec_id); int id; bool matched = false; @@ -52,8 +48,9 @@ static bool shouldBeDumped(const NodePtr& node, const DebugCapsConfig& config, c return false; } - if (dumpFilters.count(DebugCapsConfig::FILTER::BY_TYPE)) { // filter by type configured - std::stringstream ss(dumpFilters.at(DebugCapsConfig::FILTER::BY_TYPE)); + const auto& filter_by_type = config.get_blob_dump_node_type(); + if (!filter_by_type.empty()) { // filter by type configured + std::stringstream ss(filter_by_type); std::string type; bool matched = false; @@ -68,24 +65,25 @@ static bool shouldBeDumped(const NodePtr& node, const DebugCapsConfig& config, c return false; } - if (dumpFilters.count(DebugCapsConfig::FILTER::BY_NAME)) { // filter by name configured - if (dumpFilters.at(DebugCapsConfig::FILTER::BY_NAME) != + const auto& filter_by_name = config.get_blob_dump_node_name(); + if (!filter_by_name.empty()) { // filter by name configured + if (filter_by_name != "*" && // to have 'single char' option for matching all the names !std::regex_match(node->getName(), - std::regex(dumpFilters.at(DebugCapsConfig::FILTER::BY_NAME)))) // name does not match + std::regex(filter_by_name))) // name does not match return false; } return true; } -static void dump(const BlobDumper& bd, const std::string& file, const DebugCapsConfig& config) { - switch (config.blobDumpFormat) { - case DebugCapsConfig::FORMAT::BIN: { +static void dump(const BlobDumper& bd, const std::string& file, const Config& config) { + switch (config.get_blob_dump_format()) { + case BlobDumpFormat::BIN: { bd.dump(file); break; } - case DebugCapsConfig::FORMAT::TEXT: { + case BlobDumpFormat::TEXT: { bd.dumpAsTxt(file); break; } @@ -94,7 +92,7 @@ static void dump(const BlobDumper& bd, const std::string& file, const DebugCapsC } } -static void dumpInternalBlobs(const NodePtr& node, const DebugCapsConfig& config) { +static void dumpInternalBlobs(const NodePtr& node, const Config& config) { std::string nodeName = node->getName(); formatNodeName(nodeName); @@ -103,7 +101,7 @@ static void dumpInternalBlobs(const NodePtr& node, const DebugCapsConfig& config for (size_t i = 0; i < internalBlobs.size(); i++) { const auto& blb = internalBlobs[i]; std::string file_name = NameFromType(node->getType()) + "_" + nodeName + "_blb" + std::to_string(i) + ".ieb"; - auto dump_file = config.blobDumpDir + "/#" + std::to_string(node->getExecIndex()) + "_" + file_name; + auto dump_file = config.get_blob_dump_dir() + "/#" + std::to_string(node->getExecIndex()) + "_" + file_name; if (blb->getDesc().getPrecision() == ov::element::u1) continue; @@ -113,7 +111,7 @@ static void dumpInternalBlobs(const NodePtr& node, const DebugCapsConfig& config } } -void dumpInputBlobs(const NodePtr& node, const DebugCapsConfig& config, int count) { +void dumpInputBlobs(const NodePtr& node, const Config& config, int count) { if (!shouldBeDumped(node, config, "IN")) return; @@ -133,7 +131,7 @@ void dumpInputBlobs(const NodePtr& node, const DebugCapsConfig& config, int coun if (file_name.size() > 240) file_name = file_name.substr(file_name.size() - 240); - auto dump_file = config.blobDumpDir + "/#" + exec_order + "_" + file_name; + auto dump_file = config.get_blob_dump_dir() + "/#" + exec_order + "_" + file_name; std::cout << "Dump inputs: " << dump_file << std::endl; auto& desc = prEdge->getMemory().getDesc(); @@ -147,7 +145,7 @@ void dumpInputBlobs(const NodePtr& node, const DebugCapsConfig& config, int coun dumpInternalBlobs(node, config); } -void dumpOutputBlobs(const NodePtr& node, const DebugCapsConfig& config, int count) { +void dumpOutputBlobs(const NodePtr& node, const Config& config, int count) { if (!shouldBeDumped(node, config, "OUT")) return; @@ -166,7 +164,7 @@ void dumpOutputBlobs(const NodePtr& node, const DebugCapsConfig& config, int cou if (file_name.size() > 240) file_name = file_name.substr(file_name.size() - 240); - auto dump_file = config.blobDumpDir + "/#" + exec_order + "_" + file_name; + auto dump_file = config.get_blob_dump_dir() + "/#" + exec_order + "_" + file_name; std::cout << "Dump outputs: " << dump_file << std::endl; auto& desc = childEdge->getMemory().getDesc(); diff --git a/src/plugins/intel_cpu/src/utils/node_dumper.h b/src/plugins/intel_cpu/src/utils/node_dumper.h index bed94930fa02c4..3d69486154ed23 100644 --- a/src/plugins/intel_cpu/src/utils/node_dumper.h +++ b/src/plugins/intel_cpu/src/utils/node_dumper.h @@ -11,16 +11,16 @@ namespace ov { namespace intel_cpu { -void dumpInputBlobs(const NodePtr& node, const DebugCapsConfig& config, int count = -1); -void dumpOutputBlobs(const NodePtr& node, const DebugCapsConfig& config, int count = -1); +void dumpInputBlobs(const NodePtr& node, const Config& config, int count = -1); +void dumpOutputBlobs(const NodePtr& node, const Config& config, int count = -1); class DumpHelper { const NodePtr& node; const int count; - const DebugCapsConfig& config; + const Config& config; public: - explicit DumpHelper(const NodePtr& _node, const DebugCapsConfig& _config, int _count = -1) + explicit DumpHelper(const NodePtr& _node, const Config& _config, int _count = -1) : node(_node), count(_count), config(_config) { diff --git a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt index 40a4fc4a1739c4..9329e415f49f6a 100644 --- a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt +++ b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt @@ -27,7 +27,7 @@ endif() target_link_libraries(cpuUtils PRIVATE ${CPU_UTILS_LINK_LIBRARIES}) target_include_directories(cpuUtils PUBLIC ${CPU_UTILS_INCLUDE_PATHS}) -set(INCLUDES ${CMAKE_CURRENT_SOURCE_DIR} $/src) +set(INCLUDES ${CMAKE_CURRENT_SOURCE_DIR} $/src $) set(DEPENDENCIES openvino_intel_cpu_plugin openvino_template_extension) set(LINK_LIBRARIES funcSharedTests cpuUtils openvino::snippets ov_snippets_models) diff --git a/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake b/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake index 057869a864d87b..04080efdd990f3 100644 --- a/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake +++ b/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake @@ -98,6 +98,7 @@ endfunction() if(ENABLE_CPU_SPECIFIC_TARGET_PER_TEST) create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src ov_cpu_func_subgraph) create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests ov_cpu_func_slt) + create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/behavior/ov_executable_network ov_cpu_func_behavior) endif() # examples of targets: diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp index 73086b78a0de95..5d30ef57700dd2 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp @@ -54,19 +54,6 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable ASSERT_EQ(supportedProperties, expectedSupportedProperties); } -TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkGetROPropertiesDoesNotThrow) { - ov::Core ie; - std::vector properties; - - ov::CompiledModel compiledModel = ie.compile_model(model, deviceName); - - OV_ASSERT_NO_THROW(properties = compiledModel.get_property(ov::supported_properties)); - - for (const auto& property : properties) { - OV_ASSERT_NO_THROW((void)compiledModel.get_property(property)); - } -} - TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSetROPropertiesThrow) { ov::Core ie; std::vector properties; diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index 379d7b3b64a222..2d820de1ef17cf 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -304,12 +304,11 @@ struct program { std::vector allocating_order; std::unique_ptr pm; std::unique_ptr _layout_optimizer; - bool is_internal; - bool _is_body_program; + bool is_internal = false; + bool _is_body_program = false; // if subgraph can be optimized if it consists of only inputs and corresponding outputs bool _can_be_optimized; std::unique_ptr _impls_cache; - const size_t _impls_cache_capacity = 300; std::shared_ptr _compilation_context; bool _loaded_from_cache = false; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp index 49a45ec9ffa11a..28a20fa737da76 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp @@ -44,7 +44,6 @@ class Plugin : public ov::IPlugin { bool is_metric(const std::string& name) const; ov::Any get_metric(const std::string& name, const ov::AnyMap& arguments) const; - void set_cache_info(const std::shared_ptr& model, ExecutionConfig& properties) const; public: Plugin(); diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp index 5cede62fd17e69..8a5fb44ba7e522 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp @@ -80,7 +80,7 @@ struct PerfCounter { class ProgramBuilder final { public: - ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, bool partialBuild = false, + ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, std::shared_ptr task_executor = nullptr, std::shared_ptr compilation_context = nullptr, bool innerProgram = false); @@ -138,8 +138,8 @@ class ProgramBuilder final { void add_primitive(const ov::Node& op, std::shared_ptr prim, std::vector aliases = {}); - bool use_new_shape_infer() const { return allow_new_shape_infer; } - bool requires_new_shape_infer(const std::shared_ptr& op) const; + bool use_new_shape_infer() const { return m_config.get_allow_new_shape_infer(); } + bool is_inner_program() const { return m_is_inner_program; } bool is_query_mode() { return queryMode; } @@ -157,8 +157,6 @@ class ProgramBuilder final { std::shared_ptr m_topology; CustomLayerMap m_custom_layers; - bool allow_new_shape_infer = false; - bool queryMode; std::shared_ptr m_task_executor; @@ -173,7 +171,7 @@ class ProgramBuilder final { void cleanup_build(); // TODO(eunsoo): remove createTopolpgyOnly argument and add another method to create topology from ngraph function - std::shared_ptr build(const std::vector>& ops, bool partialBuild = false, bool innerProgram = false); + std::shared_ptr build(const std::vector>& ops, bool innerProgram = false); void CreateSingleLayerPrimitive(const std::shared_ptr& op); }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index 5e059b17da0e97..0ca7f616f8790b 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -1,172 +1,48 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #pragma once +#include "openvino/runtime/plugin_config.hpp" +#include "intel_gpu/runtime/device_info.hpp" #include "intel_gpu/runtime/internal_properties.hpp" -#include "intel_gpu/runtime/device.hpp" +#include "openvino/runtime/internal_properties.hpp" +#include namespace ov { namespace intel_gpu { -enum class PropertyVisibility { - INTERNAL = 0, - PUBLIC = 1 -}; - -inline std::ostream& operator<<(std::ostream& os, const PropertyVisibility& visibility) { - switch (visibility) { - case PropertyVisibility::PUBLIC: os << "PUBLIC"; break; - case PropertyVisibility::INTERNAL: os << "INTERNAL"; break; - default: os << "UNKNOWN"; break; - } - - return os; -} - -class BaseValidator { -public: - using Ptr = std::shared_ptr; - virtual ~BaseValidator() = default; - virtual bool is_valid(const ov::Any& v) const = 0; -}; - -class FuncValidator : public BaseValidator { -public: -explicit FuncValidator(std::function func) : m_func(func) { } - bool is_valid(const ov::Any& v) const override { - return m_func(v); - } - -private: - std::function m_func; -}; - -// PropertyTypeValidator ensures that value can be converted to given property type -template -class PropertyTypeValidator : public BaseValidator { -public: - bool is_valid(const ov::Any& v) const override { - try { - v.as(); - return true; - } catch (ov::Exception&) { - return false; - } - } -}; - -class ExecutionConfig { -public: +struct ExecutionConfig : public ov::PluginConfig { ExecutionConfig(); ExecutionConfig(std::initializer_list values) : ExecutionConfig() { set_property(ov::AnyMap(values)); } explicit ExecutionConfig(const ov::AnyMap& properties) : ExecutionConfig() { set_property(properties); } explicit ExecutionConfig(const ov::AnyMap::value_type& property) : ExecutionConfig() { set_property(property); } - void set_default(); - void set_property(const ov::AnyMap& properties); - void set_user_property(const ov::AnyMap& properties); - Any get_property(const std::string& name) const; - bool is_set_by_user(const std::string& name) const; - bool is_supported(const std::string& name) const; - void register_property_impl(const std::pair& propertiy, PropertyVisibility visibility, BaseValidator::Ptr validator); - - template ::type = true> - void register_property_impl() { } + ExecutionConfig(const ExecutionConfig& other); + ExecutionConfig& operator=(const ExecutionConfig& other); - template - void register_property_impl(const std::tuple, ValueT>& property, PropertyInitializer&&... properties) { - auto p = std::get<0>(property)(std::get<1>(property)); - auto v = std::dynamic_pointer_cast(std::make_shared>()); - register_property_impl(std::move(p), visibility, std::move(v)); - register_property_impl(properties...); - } + void finalize(cldnn::engine& engine); + using ov::PluginConfig::finalize; - template - typename std::enable_if::value, void>::type - register_property_impl(const std::tuple, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) { - auto p = std::get<0>(property)(std::get<1>(property)); - auto v = std::dynamic_pointer_cast(std::make_shared(std::get<2>(property))); - register_property_impl(std::move(p), visibility, std::move(v)); - register_property_impl(properties...); - } + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_GETTERS(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" + #undef OV_CONFIG_OPTION - template - typename std::enable_if, ValidatorT>::value, void>::type - register_property_impl(const std::tuple, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) { - auto p = std::get<0>(property)(std::get<1>(property)); - auto v = std::dynamic_pointer_cast(std::make_shared(std::get<2>(property))); - register_property_impl(std::move(p), visibility, std::move(v)); - register_property_impl(properties...); - } - - template - void register_property(PropertyInitializer&&... properties) { - register_property_impl(properties...); - } - - template - util::EnableIfAllStringAny set_property(Properties&&... properties) { - set_property(ov::AnyMap{std::forward(properties)...}); - } - - template - util::EnableIfAllStringAny set_user_property(Properties&&... properties) { - set_user_property(ov::AnyMap{std::forward(properties)...}); - } - - template - bool is_set_by_user(const ov::Property& property) const { - return is_set_by_user(property.name()); - } - - template - T get_property(const ov::Property& property) const { - return get_property(property.name()).template as(); - } +protected: + void finalize_impl(std::shared_ptr context) override; + void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override; + const ov::PluginConfig::OptionsDesc& get_options_desc() const override; void apply_user_properties(const cldnn::device_info& info); - - // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call - // So this method should be called after setting all user properties, but before apply_user_properties() call. - void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info); - - std::string to_string() const; - -protected: void apply_hints(const cldnn::device_info& info); void apply_execution_hints(const cldnn::device_info& info); void apply_performance_hints(const cldnn::device_info& info); void apply_priority_hints(const cldnn::device_info& info); - void apply_debug_options(const cldnn::device_info& info); - template - void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { - if (!is_set_by_user(property)) { - auto rt_info_val = rt_info.find(property.name()); - if (rt_info_val != rt_info.end()) { - set_user_property(property(rt_info_val->second.template as())); - } - } - } - -private: - ov::AnyMap internal_properties; - ov::AnyMap user_properties; - - std::map supported_properties; - std::map property_validators; + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" + #undef OV_CONFIG_OPTION }; } // namespace intel_gpu @@ -174,4 +50,4 @@ class ExecutionConfig { namespace cldnn { using ov::intel_gpu::ExecutionConfig; -} // namespace cldnn +} diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index 199261772dcf2e..2981f6beb002e1 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -36,6 +36,40 @@ inline std::ostream& operator<<(std::ostream& os, const QueueTypes& val) { return os; } +enum class DumpFormat : uint8_t { + binary = 0, + text = 1, + text_raw = 2, +}; + +inline std::ostream& operator<<(std::ostream& os, const DumpFormat& val) { + switch (val) { + case DumpFormat::binary: os << "binary"; break; + case DumpFormat::text: os << "text"; break; + case DumpFormat::text_raw: os << "text_raw"; break; + default: os << "unknown"; + } + + return os; +} + +enum class DumpTensors : uint8_t { + all = 0, + in = 1, + out = 2, +}; + +inline std::ostream& operator<<(std::ostream& os, const DumpTensors& val) { + switch (val) { + case DumpTensors::all: os << "all"; break; + case DumpTensors::in: os << "in"; break; + case DumpTensors::out: os << "out"; break; + default: os << "unknown"; + } + + return os; +} + /** * @brief Defines queue type that must be used for model execution */ @@ -46,18 +80,43 @@ static constexpr Property optimize_data{"GPU_OPTIM static constexpr Property allow_static_input_reorder{"GPU_ALLOW_STATIC_INPUT_REORDER"}; static constexpr Property partial_build_program{"GPU_PARTIAL_BUILD"}; static constexpr Property allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"}; -static constexpr Property use_only_static_kernels_for_dynamic_shape{"GPU_USE_ONLY_STATIC_KERNELS_FOR_DYNAMIC_SHAPE"}; -static constexpr Property dump_graphs{"GPU_DUMP_GRAPHS"}; static constexpr Property, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"}; static constexpr Property force_implementations{"GPU_FORCE_IMPLEMENTATIONS"}; static constexpr Property config_file{"CONFIG_FILE"}; static constexpr Property enable_lp_transformations{"LP_TRANSFORMS_MODE"}; -static constexpr Property max_dynamic_batch{"DYN_BATCH_LIMIT"}; -static constexpr Property nv12_two_inputs{"GPU_NV12_TWO_INPUTS"}; static constexpr Property buffers_preallocation_ratio{"GPU_BUFFERS_PREALLOCATION_RATIO"}; static constexpr Property max_kernels_per_batch{"GPU_MAX_KERNELS_PER_BATCH"}; static constexpr Property use_onednn{"USE_ONEDNN"}; +static constexpr Property help{"HELP"}; +static constexpr Property verbose{"VERBOSE"}; +static constexpr Property log_to_file{"GPU_LOG_TO_FILE"}; +static constexpr Property disable_usm{"GPU_DISABLE_USM"}; +static constexpr Property disable_onednn_post_ops_opt{"DISABLE_ONEDNN_POST_OPS_OPT"}; +static constexpr Property dump_graphs_path{"GPU_DUMP_GRAPHS_PATH"}; +static constexpr Property dump_profiling_data_path{"GPU_DUMP_PROFILING_DATA_PATH"}; +static constexpr Property dump_sources_path{"GPU_DUMP_SOURCES_PATH"}; +static constexpr Property dump_tensors_path{"GPU_DUMP_TENSORS_PATH"}; +static constexpr Property dump_tensors{"DUMP_TENSORS"}; +static constexpr Property dump_layers{"GPU_DUMP_LAYERS"}; +static constexpr Property dump_tensors_format{"DUMP_TENSORS_FORMAT"}; +static constexpr Property dump_memory_pool_path{"GPU_DUMP_MEMORY_POOL_PATH"}; +static constexpr Property dump_batch_limit{"GPU_DUMP_BATCH_LIMIT"}; +static constexpr Property, ov::PropertyMutability::RW> dump_iterations{"GPU_DUMP_ITERATIONS"}; +static constexpr Property host_time_profiling{"HOST_TIME_PROFILING"}; +static constexpr Property impls_cache_capacity{"IMPLS_CACHE_CAPACITY"}; +static constexpr Property disable_async_compilation{"DISABLE_ASYNC_COMPILATION"}; +static constexpr Property disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"}; +static constexpr Property disable_memory_reuse{"DISABLE_MEMORY_REUSE"}; +static constexpr Property disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"}; +static constexpr Property disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"}; +static constexpr Property disable_fc_swiglu_fusion{"DISABLE_FC_SWIGLU_FUSION"}; +static constexpr Property disable_fake_alignment{"DISABLE_FAKE_ALIGNMENT"}; +static constexpr Property use_usm_host{"USE_USM_HOST"}; +static constexpr Property asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"}; +static constexpr Property mem_prealloc_options{"MEM_PREALLOC_OPTIONS"}; +static constexpr Property, ov::PropertyMutability::RW> load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"}; + } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl new file mode 100644 index 00000000000000..9d63b6b15e3368 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -0,0 +1,76 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// Namespace, property name, default value, [validator], description +OV_CONFIG_RELEASE_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin") +OV_CONFIG_RELEASE_OPTION(ov::device, id, "0", "ID of the current device") +OV_CONFIG_RELEASE_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty") +OV_CONFIG_RELEASE_OPTION(ov, num_streams, 1, "Number of streams to be used for inference") +OV_CONFIG_RELEASE_OPTION(ov, compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism") +OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f16, + [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision. Supported values: { f16, f32, undefined }") +OV_CONFIG_RELEASE_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings") +OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc") +OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy") +OV_CONFIG_RELEASE_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application") +OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not") + +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_sdpa_optimization, true, "Enable/Disable fused SDPA primitive execution") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, enable_loop_unrolling, true, "Enable/Disable Loop/TensorIterator operation unrolling") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, disable_winograd_convolution, false, "Enable/Disable winograd convolution implementation if available") +OV_CONFIG_RELEASE_OPTION(ov::internal, exclusive_async_requests, false, "") +OV_CONFIG_RELEASE_OPTION(ov::internal, query_model_ratio, 1.0f, "") +OV_CONFIG_RELEASE_OPTION(ov, cache_mode, ov::CacheMode::OPTIMIZE_SPEED, "Cache mode defines the trade-off between the model compilation time and the disk space required for the cache") +OV_CONFIG_RELEASE_OPTION(ov, cache_encryption_callbacks, EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model") +OV_CONFIG_RELEASE_OPTION(ov::hint, dynamic_quantization_group_size, 0, "") +OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "") +OV_CONFIG_RELEASE_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching") +OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision") + +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_lp_transformations, false, "Enable/Disable Low precision transformations set") + +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "Type of the queue that must be used for model execution. May be in-order or out-of-order") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, optimize_data, false, "Enable/Disable data flow optimizations for cldnn::program") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_memory_pool, true, "Enable/Disable memory pool usage") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_static_input_reorder, false, "Controls if weights tensors can be reordered during model compilation to more friendly layout for specific kernel") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, custom_outputs, std::vector{}, "List of output primitive names") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, force_implementations, ImplForcingMap{}, "Specifies the list of forced implementations for the primitives") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, partial_build_program, false, "Early exit from model compilation process which allows faster execution graph dumping") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "Switch between new and old shape inference flow. Shall be removed soon") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "Threshold for preallocation feature in case when it uses ratio policy") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, use_onednn, false, "Enable/Disable onednn for usage for particular model/platform") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models") + +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, help, false, "Print help message for all config options") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_usm, false, "Disable USM memory allocations and use only cl_mem") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_onednn_post_ops_opt, false, "Disable optimization pass for onednn post-ops") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data_path, "", "Save csv file with per-stage and per-primitive profiling data to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_graphs_path, "", "Save intermediate graph representations during model compilation pipeline to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_sources_path, "", "Save generated sources for each kernel to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_path, "", "Save intermediate in/out tensors of each primitive to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, ov::intel_gpu::DumpTensors::all, "Tensor types to dump. Supported values: all, inputs, outputs") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_format, ov::intel_gpu::DumpFormat::text, "Format of the tensors dump. Supported values: binary, text, text_raw") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_layers, "", "Activate dump for specified layers only") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool_path, "", "Save csv file with memory pool info to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, std::set{}, "Space separated list of iterations where other dump options should be enabled") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits::max(), "Max number of batch elements to dump") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, false, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "Disable runtime inplace optimizations for operations like concat and crop") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "Disable fusions of operations as post-ops/fused-ops") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "Disable pass which merges QKV projections into single MatMul") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fc_swiglu_fusion, false, "Disable pass which merges FC and SwiGLU ops") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fake_alignment, false, "Disable fake alignment feature which tries to keep gpu friendly memory alignment for arbitrary tensor shapes") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, use_usm_host, false, "Enforce USM host usage for all allocations") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "Preallocation setting") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, std::vector{}, "List of layers to load raw binary") diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp index 3af0300602d7bf..fb1ef48df4b82c 100644 --- a/src/plugins/intel_gpu/src/graph/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp @@ -149,7 +149,7 @@ void broadcast_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index e3ff36ceae38a5..7f091b7b7a8a28 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -278,7 +278,7 @@ void crop_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout()); diff --git a/src/plugins/intel_gpu/src/graph/eltwise.cpp b/src/plugins/intel_gpu/src/graph/eltwise.cpp index a370e8ba260f8b..83d9dbb260e40e 100644 --- a/src/plugins/intel_gpu/src/graph/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/eltwise.cpp @@ -393,7 +393,7 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) : ""); } } else { - bool use_new_shape_infer = network.get_config().get_property(ov::intel_gpu::allow_new_shape_infer); + bool use_new_shape_infer = network.get_config().get_allow_new_shape_infer(); auto input0_pshape = node.get_input_pshape(0); for (size_t i = 1; i < inputs_count; ++i) { diff --git a/src/plugins/intel_gpu/src/graph/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/fully_connected.cpp index 2aee524ac2e3e1..40478cfe017b23 100644 --- a/src/plugins/intel_gpu/src/graph/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/fully_connected.cpp @@ -250,8 +250,7 @@ kernel_impl_params fully_connected_inst::get_fake_aligned_params(kernel_impl_par } } - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_fake_alignment) { + GPU_DEBUG_IF(orig_impl_param.get_program().get_config().get_disable_fake_alignment()) { can_apply_fake_alignment = false; } diff --git a/src/plugins/intel_gpu/src/graph/gather.cpp b/src/plugins/intel_gpu/src/graph/gather.cpp index 6a361563653092..2e58e49ad3f207 100644 --- a/src/plugins/intel_gpu/src/graph/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/gather.cpp @@ -150,7 +150,7 @@ void gather_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp index 462809268db88a..3bb6118a4fa565 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp @@ -161,7 +161,7 @@ bool add_required_reorders::test_format(cldnn::program_node& node, format reques } void add_required_reorders::run(program& p) { - bool optimize_data = p.get_config().get_property(ov::intel_gpu::optimize_data); + bool optimize_data = p.get_config().get_optimize_data(); auto usr_itr = p.get_processing_order().begin(); while (usr_itr != p.get_processing_order().end()) { auto& usr = *usr_itr++; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp index 4c1b1008434144..ef4300c33bfea1 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp @@ -11,7 +11,7 @@ using namespace cldnn; void build_implementations::run(program& p) { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "pass::build_implementations"); - if (p.get_config().get_property(ov::intel_gpu::partial_build_program)) { + if (p.get_config().get_partial_build_program()) { return; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp index 2f2015c6f8a303..7e562582fdcc74 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp @@ -17,7 +17,7 @@ using namespace cldnn; namespace cldnn { void graph_initializations::set_outputs(program& p) { - auto custom_outputs = p.get_config().get_property(ov::intel_gpu::custom_outputs); + auto custom_outputs = p.get_config().get_custom_outputs(); if (!custom_outputs.empty()) { for (auto const& output : custom_outputs) { OPENVINO_ASSERT(p.has_node(output), "not found custom output node in current cldnn::program: ", output); @@ -37,7 +37,7 @@ void graph_initializations::set_outputs(program& p) { void graph_initializations::run(program& p) { set_outputs(p); - auto forcing_map = p.get_config().get_property(ov::intel_gpu::force_implementations); + auto forcing_map = p.get_config().get_force_implementations(); for (auto& kv : forcing_map) { if (p.has_node(kv.first)) { p.get_node(kv.first).set_forced_impl_type(kv.second.impl_type); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index 65acb0beb66ba0..6408bd1ab74fb7 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -80,8 +80,8 @@ bool concat_in_place_optimization::match(const program_node& concat_node, if (concat_node.is_output() || concat_params.fused_desc.size() > 0 || concat_node.is_in_shape_of_subgraph()) return false; bool do_runtime_buffer_fusing = true; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { + const auto& config = concat_node.get_config(); + GPU_DEBUG_IF(config.get_disable_runtime_buffer_fusing()) { do_runtime_buffer_fusing = false; } @@ -522,8 +522,7 @@ bool crop_in_place_optimization::match(const program_node& node, return false; if (node.get_users().size() > 0) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing && node.is_dynamic()) { + GPU_DEBUG_IF(node.get_config().get_disable_runtime_buffer_fusing() && node.is_dynamic()) { return false; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 05f907dcd81f0a..fb92cef3aca7f2 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -55,6 +55,9 @@ using namespace cldnn; void prepare_primitive_fusing::run(program& p) { + if (p.get_config().get_disable_post_ops_fusions()) + return; + fuse_reorders(p); remove_redundant_reshape(p); fuse_swiglu(p); @@ -164,10 +167,7 @@ void prepare_primitive_fusing::fuse_reorders(program &p) { } void prepare_primitive_fusing::fuse_swiglu(program &p) { - GPU_DEBUG_GET_INSTANCE(debug_config); - bool disable_fc_swiglu_fusion = false; - GPU_DEBUG_IF(debug_config->disable_fc_swiglu_fusion == 1) - disable_fc_swiglu_fusion = true; + bool disable_fc_swiglu_fusion = p.get_config().get_disable_fc_swiglu_fusion(); // Apply only for high performant GPU if (disable_fc_swiglu_fusion || p.get_engine().get_device_info().execution_units_count < 128) return; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp index f63f1bf4efbe21..78b494c52645de 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp @@ -16,6 +16,9 @@ using namespace cldnn; void prepare_primitive_fusing_through::run(program& p) { + if (p.get_config().get_disable_post_ops_fusions()) + return; + auto try_fuse_through = [&](program_node& node) -> std::vector { // This function tries to fuse peer_node to first non reorder or reshape previous primitive. // It returns chain of primitives (reshapes and reorders) including potential fused_node (e.g. Conv, FC, etc) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp index a4129800733875..2c361c6335069c 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" #include "pass_manager.h" #include "program_node.h" #include "intel_gpu/runtime/engine.hpp" @@ -145,6 +146,7 @@ propagate_constants::calculate(engine& engine, ExecutionConfig cf_config = config; cf_config.set_property(ov::intel_gpu::optimize_data(false)); cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs)); + cf_config.finalize(engine); network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true); std::map, std::shared_ptr>> weightless_cache_map; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index b29be318593348..26359025d556ae 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -704,7 +704,7 @@ void reorder_inputs::run(program& p, reorder_factory& rf) { GPU_DEBUG_LOG_PASS << " " << node_ptr->id() << " " << fmt_to_str(fmt) << std::endl; } - GPU_DEBUG_IF(debug_config->verbose >= 2) { + GPU_DEBUG_IF(p.get_config().get_verbose() >= 2) { reorder_cnt total_reorder_count = std::accumulate(p.get_processing_order().begin(), p.get_processing_order().end(), diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp index fcd6dab33754fd..107a943ada7724 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp @@ -74,7 +74,7 @@ void select_preferred_formats::run(program& p) { } #endif // ENABLE_ONEDNN_FOR_GPU - auto forcing_map = p.get_config().get_property(ov::intel_gpu::force_implementations); + auto forcing_map = p.get_config().get_force_implementations(); for (auto n : p.get_processing_order()) { n->recalc_output_layout(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index 110444c2c6255c..d77ac2098e16d8 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -203,7 +203,7 @@ struct fully_connected_impl : typed_primitive_impl_ocl { params.quantization = kernel_selector::QuantizationType::NONE; } - params.dynamic_quantization_group_size = impl_param.get_program().get_config().get_property(ov::hint::dynamic_quantization_group_size); + params.dynamic_quantization_group_size = impl_param.get_program().get_config().get_dynamic_quantization_group_size(); return params; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp index 42d83a0265d290..c5628d70a0450d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp @@ -1164,13 +1164,13 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p params.engineInfo.ip_version = device_info.ip_version; params.engineInfo.arch = kernel_selector::gpu_arch(static_cast::type>(device_info.arch)); - auto impl_forcing = config.get_property(ov::intel_gpu::force_implementations); + auto impl_forcing = config.get_force_implementations(); if (impl_forcing.count(param_info.desc->id) != 0) { params.forceImplementation = impl_forcing.at(param_info.desc->id).kernel_name; } - params.allowStaticInputReordering = config.get_property(ov::intel_gpu::optimize_data) || config.get_property(ov::intel_gpu::allow_static_input_reorder); + params.allowStaticInputReordering = config.get_optimize_data() || config.get_allow_static_input_reorder(); params.allowInputReordering = false; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp index 5db452dcda26f0..c9c1eadeaa6fac 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp @@ -111,7 +111,7 @@ namespace cldnn { std::mutex kernels_cache::_mutex; std::string kernels_cache::get_cache_path() const { - auto path = _config.get_property(ov::cache_dir); + auto path = _config.get_cache_dir(); if (path.empty()) { return {}; } @@ -123,20 +123,12 @@ std::string kernels_cache::get_cache_path() const { } bool kernels_cache::is_cache_enabled() const { - if (!_config.get_property(ov::intel_gpu::allow_new_shape_infer) && - (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SPEED)) { + if (!_config.get_allow_new_shape_infer() && + (_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SPEED)) { return false; } - return !_config.get_property(ov::cache_dir).empty(); -} - -size_t kernels_cache::get_max_kernels_per_batch() const { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->max_kernels_per_batch >= 1) { - return static_cast(debug_config->max_kernels_per_batch); - } - return _config.get_property(ov::intel_gpu::max_kernels_per_batch); + return !_config.get_cache_dir().empty(); } void kernels_cache::get_program_source(const kernels_code& kernels_source_code, std::vector* all_batches) const { @@ -201,7 +193,7 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, // Create new kernels batch when the limit is reached // and current kernel's entry_point is duplicated in this kernels batch - if (current_bucket.back().kernels_counter >= get_max_kernels_per_batch() + if (current_bucket.back().kernels_counter >= _config.get_max_kernels_per_batch() || current_bucket.back().entry_point_to_id.find(entry_point) != current_bucket.back().entry_point_to_id.end() || need_separate_batch(entry_point)) { const auto& batch_id = static_cast(current_bucket.size()); @@ -243,9 +235,8 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, b.hash_value = std::hash()(full_code); std::string dump_sources_dir = ""; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_sources.empty()) { - dump_sources_dir = debug_config->dump_sources; + GPU_DEBUG_IF(!_config.get_dump_sources_path().empty()) { + dump_sources_dir = _config.get_dump_sources_path(); } // Add -g -s to build options to allow IGC assembly dumper to associate assembler sources with corresponding OpenCL kernel code lines @@ -301,10 +292,9 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co bool dump_sources = batch.dump_custom_program; std::string dump_sources_dir = ""; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_sources.empty()) { + GPU_DEBUG_IF(!_config.get_dump_sources_path().empty()) { dump_sources = true; - dump_sources_dir = debug_config->dump_sources; + dump_sources_dir = _config.get_dump_sources_path(); } std::string err_log; // accumulated build log from all program's parts (only contains messages from parts which @@ -379,7 +369,7 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co if (is_cache_enabled()) { // If kernels caching is enabled, then we save compiled bucket to binary file with name ${code_hash_value}.cl_cache // Note: Bin file contains full bucket, not separate kernels, so kernels reuse across different models is quite limited - // Bucket size can be changed in get_max_kernels_per_batch() method, but forcing it to 1 will lead to much longer + // Bucket size can be changed by max_kernels_per_batch config option, but forcing it to 1 will lead to much longer // compile time. std::lock_guard lock(cacheAccessMutex); ov::intel_gpu::save_binary(cached_bin_name, getProgramBinaries(program)); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp index b08b087c55854a..98f65feea7bd4f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp @@ -103,7 +103,6 @@ class kernels_cache { std::string get_cache_path() const; bool is_cache_enabled() const; - size_t get_max_kernels_per_batch() const; bool _reuse_kernels = false; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp index 9e0a3fa5cfb390..64b92a15d1f4ba 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp @@ -18,8 +18,9 @@ struct ConcatenationImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; static const std::vector supported_types = { ov::element::f16, ov::element::u8, ov::element::i8 }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp index c3f599fc5db9f6..430c42dee57f75 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp @@ -23,8 +23,9 @@ struct ConvolutionImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& conv_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp index 039cf36261caa0..238214f82dc6fb 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp @@ -19,8 +19,9 @@ struct DeconvolutionImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& deconv_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp index 62129866927ea4..adc96db374b44e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp @@ -21,8 +21,9 @@ struct FullyConnectedImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& fc_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp index 6c576d177043ee..3d64d2009490c0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp @@ -18,8 +18,9 @@ struct GemmImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& gemm_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp index 6fd16a4dd04acf..4b2615c62e2747 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp @@ -22,10 +22,10 @@ struct LSTMSeqImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (info.arch == gpu_arch::unknown) + if (info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; - const auto& lstm_seq_node = node.as(); const auto& in_layout = lstm_seq_node.get_input_layout(0); const auto& out_layout = lstm_seq_node.get_output_layout(0); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp index 4710b0c77b83c7..ced0316e13a08f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp @@ -19,8 +19,9 @@ struct PoolingImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& in_layout = node.get_input_layout(0); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 275748da311081..93dc37320336f6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -47,12 +47,11 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _engine(&engine), _attrs(attrs), _pd(pd) { - _enable_profiling = config.get_property(ov::enable_profiling); + _enable_profiling = config.get_enable_profiling(); _scratchpad_md = _pd.scratchpad_desc(); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->verbose >= 4) { + GPU_DEBUG_IF(config.get_verbose() >= 4) { if (_scratchpad_md.get_size() > 0) { static std::atomic_llong total{0}; int64_t size = _scratchpad_md.get_size() / 1048576; @@ -70,9 +69,8 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _engine(&engine), _pd(), _prim() { - _enable_profiling = config.get_property(ov::enable_profiling); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + _enable_profiling = config.get_enable_profiling(); + GPU_DEBUG_IF(!config.get_dump_profiling_data_path().empty()) { _enable_profiling = true; } } @@ -318,7 +316,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { private: std::string get_cache_directory(const ExecutionConfig& config) const { - auto path = config.get_property(ov::cache_dir); + auto path = config.get_cache_dir(); if (path.empty()) { return {}; } @@ -343,7 +341,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { void build_primitive(const ExecutionConfig& config) { auto cache_outpath = get_cache_directory(config); - if (!config.get_property(ov::intel_gpu::allow_new_shape_infer)) { + if (!config.get_allow_new_shape_infer()) { cache_outpath = ""; } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp index 68d963fd9e369f..4a4a4c60df032d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp @@ -48,8 +48,9 @@ struct ReduceImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& reduce_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp index ad08c516e939d8..9fd3c7a0caaf30 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp @@ -56,8 +56,9 @@ struct ReorderImplementationManager : public ImplementationManager { if (output_fmt == format::custom) return true; + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; if (!one_of(input_fmt.value, supported_formats) || !one_of(output_fmt.value, supported_formats)) diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp index fdb2f151de8986..0ce180380f14b5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp @@ -58,7 +58,7 @@ std::unique_ptr ImplementationManager::create(const program_node if (auto impl = create_impl(node, params)) { update_impl(*impl, params); impl->set_node_params(node); - impl->can_share_kernels = node.get_program().get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse); + impl->can_share_kernels = node.get_program().get_config().get_enable_kernels_reuse(); return impl; } diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp index bc944cdc5ac5c9..535ac540c1ffff 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp @@ -60,7 +60,7 @@ const std::vector>& Registry(scores_layout.get_partial_shape()[0].get_length()); const size_t kClassNum = static_cast(scores_layout.get_partial_shape()[1].get_length()); const size_t kNStreams = - static_cast(node.get_program().get_config().get_property(ov::streams::num)); + static_cast(node.get_program().get_config().get_num_streams()); const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast(8)) * kNStreams; return kKeyValue > 64; } diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h index 8b8c4e6b0b6e97..4e37b7df923a56 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h @@ -199,6 +199,8 @@ class primitive_inst { program_node const& get_node() const { return *_node; } network& get_network() const { return _network; } uint32_t get_network_id() const; + const ExecutionConfig& get_config() const { return get_network().get_config(); } + virtual event::ptr set_output_memory(memory::ptr mem, bool check = true, size_t idx = 0); void check_memory_to_set(const memory& mem, const layout& layout) const; const std::list& get_users() const { return _node->get_users(); } diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index d1bbaa8a34cb8f..bc7d0fdfc67502 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -158,6 +158,7 @@ struct program_node { program& get_program() { return myprog; } program& get_program() const { return myprog; } + const ExecutionConfig& get_config() const { return myprog.get_config(); } primitive_impl* get_selected_impl() const { return selected_impl.get(); } void set_selected_impl(std::unique_ptr impl); diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 5262e8c4621e72..7fab84b8a6c527 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -435,15 +435,10 @@ bool should_use_winograd_2x3_s1(const convolution_node& node, layout const& input_layout, layout const& weights_layout, bool output_size_handling_enabled) { - bool disable_winograd_conv = node.get_program().get_config().get_property(ov::intel_gpu::disable_winograd_convolution); + bool disable_winograd_conv = node.get_program().get_config().get_disable_winograd_convolution(); if (disable_winograd_conv) return false; - // cases when NOT to use winograd - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_winograd_conv == 1) - return false; - auto prim = node.get_primitive(); if (input_layout.data_type != data_types::f16 || (input_layout.is_static() && input_layout.feature() % 64 != 0) // current algorithm is effective for ifm to be multiply of 64 diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 37152b0d9e3b4f..b965ad7e59a34e 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -180,9 +180,9 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo , _memory_pool(new memory_pool(program->get_engine())) , _internal(is_internal) , _is_primary_stream(is_primary_stream) - , _enable_profiling(program->get_config().get_property(ov::enable_profiling)) + , _enable_profiling(program->get_config().get_enable_profiling()) , _reset_arguments(true) - , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_property(ov::intel_gpu::buffers_preallocation_ratio))) { + , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_buffers_preallocation_ratio())) { if (!_internal) { net_id = get_unique_net_id(); } @@ -237,9 +237,8 @@ network::~network() { if (_program != nullptr) _program->cancel_compilation_context(); _memory_pool->clear_pool_for_network(net_id); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { - dump_perf_data_raw(debug_config->dump_profiling_data + "/perf_raw" + std::to_string(net_id) + ".csv", _exec_order); + GPU_DEBUG_IF(!_config.get_dump_profiling_data_path().empty()) { + dump_perf_data_raw(_config.get_dump_profiling_data_path() + "/perf_raw" + std::to_string(net_id) + ".csv", _exec_order); } } @@ -364,7 +363,7 @@ void network::calculate_weights_cache_capacity() { } // Sum all weights constants for each stream - required_mem_size += weights_const_size * _config.get_property(ov::streams::num); + required_mem_size += weights_const_size * _config.get_num_streams(); // Add all other constants (shared between streams) required_mem_size += total_const_size - weights_const_size; diff --git a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp index ba1cbbdf7816dc..f3788e34362604 100644 --- a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp @@ -157,7 +157,7 @@ void non_max_suppression_gather_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[i]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[i].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[i] = {_network.get_engine().reinterpret_buffer(input_memory(i), _impl_params->get_output_layout(i))}; diff --git a/src/plugins/intel_gpu/src/graph/permute.cpp b/src/plugins/intel_gpu/src/graph/permute.cpp index 2e4c792729f306..00a0b8e2a2881c 100644 --- a/src/plugins/intel_gpu/src/graph/permute.cpp +++ b/src/plugins/intel_gpu/src/graph/permute.cpp @@ -146,7 +146,7 @@ void permute_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 0737362405ff9c..0fabfa386454c5 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -163,7 +163,7 @@ static memory::ptr get_memory_from_pool(engine& _engine, OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate output for dynamic layout without upper bound"); // Use layout with max tensor for dynamic shape with upper bound - if (_node.get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + if (_node.get_program().get_config().get_enable_memory_pool()) { if (curr_memory != nullptr) pool.release_memory(curr_memory, _node.get_unique_id(), _node.id(), net_id); return pool.get_memory(layout, @@ -1073,8 +1073,7 @@ void primitive_inst::realloc_if_needed(bool prev_execution_skipped) { } bool primitive_inst::use_async_compilation() { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_async_compilation) { + GPU_DEBUG_IF(get_config().get_disable_async_compilation()) { return false; } @@ -1568,8 +1567,7 @@ void primitive_inst::do_runtime_in_place_concat() { return false; }; OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_in_place_concat: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { + GPU_DEBUG_IF(get_config().get_disable_runtime_buffer_fusing()) { return; } if (update_shape_done_by_other) { @@ -1678,8 +1676,7 @@ void primitive_inst::do_runtime_skip_scatter_update() { void primitive_inst::do_runtime_in_place_crop() { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_in_place_crop: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { + GPU_DEBUG_IF(get_config().get_disable_runtime_buffer_fusing()) { return; } @@ -1972,8 +1969,7 @@ void primitive_inst::execute() { set_out_event(_impl->execute(_impl_params->dep_events, *this)); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + GPU_DEBUG_IF(!get_config().get_dump_profiling_data_path().empty()) { auto ev = _impl_params->out_event; get_network().get_stream().wait_for_events({ev}); @@ -2029,7 +2025,7 @@ primitive_inst::primitive_inst(network & network, program_node const& node, bool : _network(network) , _node(&node) , _node_output_layout(node.get_output_layout()) - , _use_shared_kernels(node.get_program().get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse)) + , _use_shared_kernels(node.get_program().get_config().get_enable_kernels_reuse()) , _impl_params(node.get_kernel_impl_params()) , _impl(node.get_selected_impl() ? node.get_selected_impl()->clone() : nullptr) , _runtime_memory_dependencies(node.get_memory_dependencies()) @@ -2307,8 +2303,7 @@ void primitive_inst::update_weights() { reorder_impl->set_arguments(*reorder_inst, args); add_dep_event(reorder_impl->execute({}, *reorder_inst)); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + GPU_DEBUG_IF(!get_config().get_dump_profiling_data_path().empty()) { stream.wait_for_events(_impl_params->dep_events); } @@ -2577,8 +2572,8 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() { ExecutionConfig subgraph_config{ ov::intel_gpu::allow_static_input_reorder(true), ov::intel_gpu::allow_new_shape_infer(true), - ov::enable_profiling(get_network().get_config().get_property(ov::enable_profiling)), - ov::intel_gpu::use_onednn(get_network().get_config().get_property(ov::intel_gpu::use_onednn)) + ov::enable_profiling(get_network().get_config().get_enable_profiling()), + ov::intel_gpu::use_onednn(get_network().get_config().get_use_onednn()) }; auto prog = program::build_program(get_network().get_engine(), t, diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index bdffb9c4980722..fc1c75818b26f0 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -3,6 +3,7 @@ // #include "impls/registry/implementation_manager.hpp" +#include "intel_gpu/plugin/remote_context.hpp" #include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/core/type.hpp" #include "openvino/runtime/system_conf.hpp" @@ -107,8 +108,8 @@ using namespace cldnn; using namespace ov::intel_gpu; static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags, int num_streams = 0) { - int streams = (num_streams > 0) ? num_streams : config.get_property(ov::compilation_num_threads); - auto priority = config.get_property(ov::intel_gpu::hint::host_task_priority); + int streams = (num_streams > 0) ? num_streams : config.get_compilation_num_threads(); + auto priority = config.get_host_task_priority(); auto core_type = ov::hint::SchedulingCoreType::ANY_CORE; switch (priority) { case ov::hint::Priority::LOW: core_type = ov::hint::SchedulingCoreType::ECORE_ONLY; break; @@ -116,7 +117,7 @@ static ov::threading::IStreamsExecutor::Config make_task_executor_config(const E case ov::hint::Priority::HIGH: core_type = ov::hint::SchedulingCoreType::PCORE_ONLY; break; default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority); } - bool enable_cpu_pinning = config.get_property(ov::hint::enable_cpu_pinning); + bool enable_cpu_pinning = config.get_enable_cpu_pinning(); ov::threading::IStreamsExecutor::Config task_executor_config(tags, streams, @@ -161,7 +162,7 @@ program::program(engine& engine_ref, program_node::reset_unique_id(); if (no_optimizations) { init_graph(); - _config.apply_user_properties(_engine.get_device_info()); + _config.finalize(_engine); } else { build_program(is_internal); if (_is_body_program) { @@ -197,7 +198,7 @@ program::program(engine& engine_ref, _task_executor(std::move(task_executor)), processing_order(), is_internal(is_internal) { - _config.apply_user_properties(_engine.get_device_info()); + _config.finalize(_engine); init_primitives(); init_program(); prepare_nodes(nodes); @@ -210,8 +211,8 @@ program::program(engine& engine, const ExecutionConfig& config) _config(config), processing_order() { init_primitives(); - _config.apply_user_properties(_engine.get_device_info()); - new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer); + _config.finalize(_engine); + new_shape_infer = _config.get_allow_new_shape_infer(); _layout_optimizer = cldnn::make_unique(); } @@ -223,26 +224,21 @@ void program::init_program() { set_options(); pm = std::unique_ptr(new pass_manager(*this)); - new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer); + new_shape_infer = _config.get_allow_new_shape_infer(); if (_task_executor == nullptr) _task_executor = program::make_task_executor(_config); _kernels_cache = std::unique_ptr(new kernels_cache(_engine, _config, prog_id, _task_executor, kernel_selector::KernelBase::get_db().get_batch_headers())); - _kernels_cache->set_kernels_reuse(get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse)); + _kernels_cache->set_kernels_reuse(_config.get_enable_kernels_reuse()); if (!_compilation_context) _compilation_context = program::make_compilation_context(_config); _layout_optimizer = cldnn::make_unique(); - size_t impls_cache_capacity = _impls_cache_capacity; - GPU_DEBUG_IF(debug_config->impls_cache_capacity >= 0) { - impls_cache_capacity = debug_config->impls_cache_capacity; - } - - _impls_cache = cldnn::make_unique(impls_cache_capacity); + _impls_cache = cldnn::make_unique(get_config().get_impls_cache_capacity()); // Remove items of compilation context's internal queue when some impl is popped in kernels_cache // compilation context's queue check duplication of inserted task _impls_cache->set_remove_item_callback([this](ImplementationsCache::ItemType& item) { @@ -484,19 +480,11 @@ void program::set_options() { static std::atomic id_gen{0}; prog_id = ++id_gen; assert(prog_id != 0); - if (!_config.get_property(ov::intel_gpu::force_implementations).empty()) { - _config.set_property(ov::intel_gpu::optimize_data(true)); - } - - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - _config.set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); - } } void program::build_program(bool is_internal) { init_graph(); - _config.apply_user_properties(_engine.get_device_info()); + _config.finalize(_engine); { pre_optimize_graph(is_internal); } run_graph_compilation(); { post_optimize_graph(is_internal); } @@ -526,9 +514,6 @@ void program::init_graph() { for (auto& node : processing_order) { if (!node->is_type()) node->get_output_layouts(); - if (node->is_type()) { - _config.set_property(ov::intel_gpu::use_onednn(true)); - } } // Perform initial shape_of subgraphs markup apply_opt_pass(); @@ -546,7 +531,7 @@ void program::pre_optimize_graph(bool is_internal) { bool output_size_handling_enabled = analyze_output_size_handling_need(); - bool optimize_data = _config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = _config.get_optimize_data(); if (optimize_data) { apply_opt_pass(); } @@ -556,26 +541,13 @@ void program::pre_optimize_graph(bool is_internal) { reorder_factory rf; if (optimize_data) { - GPU_DEBUG_GET_INSTANCE(debug_config); -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(!debug_config->disable_primitive_fusing) { -#else - { -#endif - apply_opt_pass(); - } + apply_opt_pass(); apply_opt_pass(); apply_opt_pass(); -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(!debug_config->disable_primitive_fusing) { -#else - { -#endif - apply_opt_pass(); - } + apply_opt_pass(); apply_opt_pass(); @@ -623,7 +595,7 @@ void program::post_optimize_graph(bool is_internal) { reorder_factory rf; - bool optimize_data = _config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = _config.get_optimize_data(); if (!is_internal) { apply_opt_pass(rf); @@ -631,7 +603,7 @@ void program::post_optimize_graph(bool is_internal) { apply_opt_pass(false, true); // TODO: do we need it at this place also? - auto partial_build = _config.get_property(ov::intel_gpu::partial_build_program); + auto partial_build = _config.get_partial_build_program(); #ifdef GPU_DEBUG_CONFIG GPU_DEBUG_GET_INSTANCE(debug_config); if (!is_internal && (!partial_build || !debug_config->dry_run_path.empty())) { @@ -650,7 +622,7 @@ void program::post_optimize_graph(bool is_internal) { // Recalculate processing order after all graph transformation to keep optimal primitives ordering // for OOO queue - if (_config.get_property(ov::intel_gpu::queue_type) == QueueTypes::out_of_order) + if (_config.get_queue_type() == QueueTypes::out_of_order) get_processing_order().calculate_BFS_processing_order(); } @@ -772,7 +744,7 @@ const std::vector& program::get_allocating_order(bool forced_updat } void program::prepare_memory_dependencies() { - if (!_config.get_property(ov::intel_gpu::enable_memory_pool)) + if (!_config.get_enable_memory_pool()) return; for (auto& node : get_processing_order()) { node->add_memory_dependency(node->get_unique_id()); @@ -1382,8 +1354,7 @@ program::primitives_info program::get_current_stage_info() const { } void program::save_pass_info(std::string pass_name) { - // TODO: Directory path here can be probably changed to some bool flag - if (!_config.get_property(ov::intel_gpu::dump_graphs).empty()) + if (!_config.get_dump_graphs_path().empty()) optimizer_passes_info.emplace_back(pass_name, get_current_stage_info()); } @@ -1411,7 +1382,7 @@ const program::primitives_info& program::get_primitives_info() const { return pr void program::apply_opt_pass(base_pass& pass) { pm->run(*this, pass); } void program::set_layout_optimizer_attributes(layout_optimizer& lo) { - lo.set_implementation_forcing(_config.get_property(ov::intel_gpu::force_implementations)); + lo.set_implementation_forcing(_config.get_force_implementations()); // first pass to set layout optimization_attributes for topology @@ -1635,15 +1606,15 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 1); #ifdef ENABLE_ONEDNN_FOR_GPU - bool enable_onednn_for_tests = get_config().get_property(ov::intel_gpu::optimize_data) || is_internal_program(); + bool enable_onednn_for_tests = get_config().get_optimize_data() || is_internal_program(); auto& engine = get_engine(); if (engine.get_device_info().vendor_id == INTEL_VENDOR_ID && - get_config().get_property(ov::intel_gpu::queue_type) == QueueTypes::in_order && + get_config().get_queue_type() == QueueTypes::in_order && enable_onednn_for_tests) { if (engine.get_device_info().supports_immad) { lo.add_all_onednn_impls_optimization_attribute(); } else { - if (get_config().get_property(ov::intel_gpu::use_onednn)) { + if (get_config().get_use_onednn()) { lo.enable_onednn_for(); } } @@ -1851,8 +1822,8 @@ void program::load(cldnn::BinaryInputBuffer& ib) { init_program(); std::shared_ptr mapped_memory = nullptr; - std::string weights_path = _config.get_property(ov::weights_path); - if (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && + std::string weights_path = _config.get_weights_path(); + if (_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE && ov::util::validate_weights_path(weights_path)) { mapped_memory = ov::load_mmap_object(weights_path); } diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp index 4a2f43b28d9360..8838cd361502cd 100644 --- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp @@ -157,7 +157,7 @@ void dump_full_node(std::ofstream& out, const program_node* node) { } // namespace std::string get_dir_path(const ExecutionConfig& config) { - auto path = config.get_property(ov::intel_gpu::dump_graphs); + auto path = config.get_dump_graphs_path(); if (path.empty()) { return {}; } diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 5161887b79e57a..d0efb5a3f9d690 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -1821,8 +1821,7 @@ void program_node::create_onednn_primitive_attributes( // Trying to combine multiplications and additions which are placed one after another. // We do it in the cycle because some optimization cases can be simplified again from time to time do { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_onednn_opt_post_ops) + GPU_DEBUG_IF(get_config().get_disable_onednn_post_ops_opt()) break; optimized_post_ops = try_optimize_post_ops(fused_ops, optimized_post_ops, attrs, optimization_is_finished); } while (!optimization_is_finished); diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index 93698432e73be0..bfdc287852ae05 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -287,7 +287,7 @@ void reorder_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index e5e33f4ad87b14..bc1921127efd1f 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -320,7 +320,7 @@ void reshape_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp index ee8850fbd46220..2af34fe0245443 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp @@ -75,7 +75,7 @@ void scatter_elements_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp index ba0cea2e32299e..0ea852614b291c 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp @@ -86,7 +86,7 @@ void scatter_nd_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_update.cpp index 8d10f9ad2b4fd7..ee61478dc3c8af 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_update.cpp @@ -66,7 +66,7 @@ void scatter_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/select.cpp b/src/plugins/intel_gpu/src/graph/select.cpp index 9e0902e1f2ad4e..9a6b845a874d10 100644 --- a/src/plugins/intel_gpu/src/graph/select.cpp +++ b/src/plugins/intel_gpu/src/graph/select.cpp @@ -95,7 +95,7 @@ select_inst::typed_primitive_inst(network& network, select_node const& node) : p 3, ""); - bool allow_new_shape_infer = network.get_program()->get_config().get_property(ov::intel_gpu::allow_new_shape_infer); + bool allow_new_shape_infer = network.get_program()->get_config().get_allow_new_shape_infer(); // Broadcast check is performed in ngraph shape infer of select when allow_new_shape_infer=true if (!allow_new_shape_infer) { if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NONE) { diff --git a/src/plugins/intel_gpu/src/graph/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/strided_slice.cpp index 47248cd2a4d773..ae962ff7e1e369 100644 --- a/src/plugins/intel_gpu/src/graph/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/strided_slice.cpp @@ -208,7 +208,7 @@ void strided_slice_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 0774c62add1643..6cb5e47603c297 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -66,27 +66,6 @@ static bool is_weight_dyn_quantizable(const fully_connected_params& params) { // DYNAMIC_QUANTIZE static size_t get_dynamic_quantize_group_size(const fully_connected_params& params) { auto dynamic_quantization_group_size = params.dynamic_quantization_group_size; - - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { - dynamic_quantization_group_size = debug_config->dynamic_quantize_group_size; - - // Specify which Fully-connected layer would be dynamic-quantized - GPU_DEBUG_IF(!debug_config->dynamic_quantize_layers_without_onednn.empty()) { - auto layers = debug_config->dynamic_quantize_layers_without_onednn; - auto iter = std::find_if(layers.begin(), layers.end(), [&](const std::string& pattern){ - return debug_config->is_layer_name_matched(params.layerID, pattern); - }); - - if (iter != layers.end()) { - dynamic_quantization_group_size = debug_config->dynamic_quantize_group_size; - GPU_DEBUG_COUT << "Found specified Fully-connected layer [" << params.layerID << "]. Enable Dynamic-quantize." << std::endl; - } else { - dynamic_quantization_group_size = 0; - } - } - } - const size_t scale_group_size = params.weights.IFM().v / params.decompression_scale.Feature().v; for (auto group_size : available_quantize_grp_size) { if (dynamic_quantization_group_size >= group_size) { diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 810353fe626c19..279035f27d776f 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -21,20 +21,20 @@ namespace intel_gpu { namespace { std::shared_ptr create_task_executor(const std::shared_ptr& plugin, const ExecutionConfig& config) { - if (config.get_property(ov::internal::exclusive_async_requests)) { + if (config.get_exclusive_async_requests()) { // exclusive_async_requests essentially disables the streams (and hence should be checked first) => aligned with // the CPU behavior return plugin->get_executor_manager()->get_executor("GPU"); - } else if (config.get_property(ov::hint::enable_cpu_pinning)) { + } else if (config.get_enable_cpu_pinning()) { return std::make_shared( ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", - config.get_property(ov::num_streams), + config.get_num_streams(), 1, ov::hint::SchedulingCoreType::PCORE_ONLY, true}); } else { return std::make_shared( - ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", config.get_property(ov::num_streams)}); + ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", config.get_num_streams()}); } } } // namespace @@ -53,7 +53,7 @@ CompiledModel::CompiledModel(std::shared_ptr model, m_outputs(ov::ICompiledModel::outputs()), m_loaded_from_cache(false) { auto graph_base = std::make_shared(model, m_context, m_config, 0); - for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { + for (uint16_t n = 0; n < m_config.get_num_streams(); n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -148,7 +148,7 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, } auto graph_base = std::make_shared(ib, context, m_config, 0); - for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { + for (uint16_t n = 0; n < m_config.get_num_streams(); n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -170,8 +170,8 @@ std::shared_ptr CompiledModel::create_infer_request() co void CompiledModel::export_model(std::ostream& model) const { // If ov::CacheMode::OPTIMIZE_SIZE is set, do the export iff it's possible to do weightless caching // which requires the weights_path. - ov::CacheMode cache_mode = m_config.get_property(ov::cache_mode); - std::string weights_path = m_config.get_property(ov::weights_path); + ov::CacheMode cache_mode = m_config.get_cache_mode(); + std::string weights_path = m_config.get_weights_path(); if (cache_mode == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) return; @@ -179,7 +179,7 @@ void CompiledModel::export_model(std::ostream& model) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model"); OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded"); - const ov::EncryptionCallbacks encryption_callbacks = m_config.get_property(ov::cache_encryption_callbacks); + const ov::EncryptionCallbacks encryption_callbacks = m_config.get_cache_encryption_callbacks(); // Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty. const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; @@ -280,8 +280,8 @@ ov::Any CompiledModel::get_property(const std::string& name) const { } else if (name == ov::loaded_from_cache) { return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache}; } else if (name == ov::optimal_number_of_infer_requests) { - unsigned int nr = m_config.get_property(ov::num_streams); - if (m_config.get_property(ov::hint::performance_mode) != ov::hint::PerformanceMode::LATENCY) + unsigned int nr = m_config.get_num_streams(); + if (m_config.get_performance_mode() != ov::hint::PerformanceMode::LATENCY) nr *= 2; return decltype(ov::optimal_number_of_infer_requests)::value_type {nr}; } else if (name == ov::execution_devices) { diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index c3d74feffb5599..8f32857f4cf9da 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -2,7 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/graph/serialization/helpers.hpp" #include "intel_gpu/runtime/layout.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/runtime/threading/executor_manager.hpp" #include "openvino/runtime/exec_model_info.hpp" #include "openvino/pass/serialize.hpp" @@ -34,12 +36,40 @@ namespace ov { namespace intel_gpu { +namespace { + + +class OstreamAttributeVisitor : public ov::AttributeVisitor { + cldnn::BinaryOutputBuffer& os; + + template + void append_attribute(const std::string& name, const T& value) { + os << name; + os << value; + } +public: + OstreamAttributeVisitor(cldnn::BinaryOutputBuffer& os) : os(os) {} + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + OPENVINO_THROW("Attribute ", name, " can't be processed\n"); + } + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + append_attribute(name, adapter.get()); + } + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + append_attribute(name, adapter.get()); + } +}; + +} // namespace Graph::Graph(std::shared_ptr model, const RemoteContextImpl::Ptr& context, const ExecutionConfig& config, uint16_t stream_id) : m_context(context) , m_config(config) , m_stream_id(stream_id) { - auto program_builder = std::make_shared(model, get_engine(), config, false); + auto program_builder = std::make_shared(model, get_engine(), config); m_config = program_builder->get_config(); build(program_builder->get_compiled_program()); @@ -86,15 +116,7 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context ib >> perfEntry.parentPrimitive; } } - { - bool bool_prop_value; - ib >> bool_prop_value; - m_config.set_property(ov::intel_gpu::partial_build_program(bool_prop_value)); - ib >> bool_prop_value; - m_config.set_property(ov::intel_gpu::optimize_data(bool_prop_value)); - ib >> bool_prop_value; - m_config.set_property(ov::intel_gpu::allow_new_shape_infer(bool_prop_value)); - } + // ib >> m_config; auto imported_prog = std::make_shared(get_engine(), m_config); imported_prog->load(ib); @@ -178,7 +200,7 @@ void Graph::build(std::shared_ptr program) { auto external_queue = m_context->get_external_queue(); if (external_queue) { - OPENVINO_ASSERT(m_config.get_property(ov::num_streams) == 1, "[GPU] Throughput streams can't be used with shared queue!"); + OPENVINO_ASSERT(m_config.get_num_streams() == 1, "[GPU] Throughput streams can't be used with shared queue!"); const auto &engine = program->get_engine(); m_network = std::make_shared(program, engine.create_stream(m_config, external_queue), m_stream_id); } else { @@ -191,12 +213,12 @@ void Graph::build(std::shared_ptr program) { exit(0); } - GPU_DEBUG_IF(!debug_config->dump_graphs.empty() && m_stream_id == 0) { + GPU_DEBUG_IF(!m_config.get_dump_graphs_path().empty() && m_stream_id == 0) { static int net_id = 0; auto steps_info = get_network()->get_optimizer_passes_info(); size_t step_idx = 0; for (auto& step : steps_info) { - auto xml_path = debug_config->dump_graphs + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml"; + auto xml_path = m_config.get_dump_graphs_path() + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml"; ov::pass::Serialize(xml_path, "").run_on_model(get_runtime_model(step.second, true)); step_idx++; } @@ -210,7 +232,7 @@ bool Graph::use_external_queue() const { std::shared_ptr Graph::get_runtime_model(std::vector& primitives_info, bool filter_const_primitives) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::get_runtime_model"); - if (m_config.get_property(ov::enable_profiling)) { + if (m_config.get_enable_profiling()) { try { // Update may throw an exception for step-by-step runtime graph dump, // since network->get_executed_primitives() method can't be called before network execution @@ -521,11 +543,8 @@ void Graph::export_model(cldnn::BinaryOutputBuffer &ob) { ob << perf_item.second.second.parentPrimitive; } } - { - ob << m_config.get_property(ov::intel_gpu::partial_build_program); - ob << m_config.get_property(ov::intel_gpu::optimize_data); - ob << m_config.get_property(ov::intel_gpu::allow_new_shape_infer); - } + OstreamAttributeVisitor visitor(ob); + m_config.visit_attributes(visitor); ob.set_stream(m_network->get_stream_ptr().get()); m_network->get_program()->save(ob); diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index 4b7b3748d6e69d..03489d6513e1ee 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -1,6 +1,7 @@ // Copyright (C) 2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/op/if.hpp" #include "intel_gpu/plugin/program_builder.hpp" #include "intel_gpu/primitives/condition.hpp" @@ -22,16 +23,11 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ << ", num inputs: " << op->get_input_size() << std::endl; auto config = p.get_config(); - { - auto custom_outputs = config.get_property(ov::intel_gpu::custom_outputs); - if (!custom_outputs.empty()) { - config.set_property(ov::intel_gpu::custom_outputs(std::vector({}))); - } - } - config.set_property(ov::intel_gpu::max_dynamic_batch(1)); + config.set_property(ov::intel_gpu::custom_outputs(std::vector({}))); config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer())); + config.finalize(p.get_engine()); - ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true); + ProgramBuilder prog(internal_body, p.get_engine(), config, p.get_task_executor(), p.get_compilation_context(), true); branch.inner_program = prog.get_compiled_program(); auto& input_map = branch.input_map; diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index bb11308064f7e7..53c5896b21710b 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -300,11 +300,10 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr("runtime_options"); + + if (model.has_rt_info("__weights_path")) { + rt_info[ov::weights_path.name()] = model.get_rt_info("__weights_path"); + } + return rt_info; +} + +bool requires_new_shape_infer(const std::shared_ptr& op) { + if (op->is_dynamic()) { + return true; + } + + // HACK: SearchSorted has specific shape requirements. + // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine, + // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid. + if (ov::is_type(op)) + return true; + + if (ov::is_type(op)) + return true; + + if (ov::is_type(op)) { + const auto body_function = std::static_pointer_cast(op)->get_function(); + if (body_function->is_dynamic()) + return true; + } + + if (ov::is_type(op) || ov::is_type(op)) { + return true; + } + // When input node has dynamic shape with 4 dimension, this function return false + // because op.is_dynamic() which only checks input shapes return false. + // So, in the case of input data, we need to check output shape. + for (size_t i = 0; i < op->get_output_size(); i++) { + if (op->get_output_partial_shape(i).is_dynamic()) + return true; + } + + for (size_t i = 0; i < op->get_output_size(); i++) { + if (op->get_output_partial_shape(i).size() > 6) + return true; + } + + for (size_t i = 0; i < op->get_input_size(); i++) { + if (op->get_input_partial_shape(i).size() > 6) + return true; + } + + return false; +} + +void set_model_properties(const ov::Model& model, ExecutionConfig& config) { + const auto& ops = model.get_ordered_ops(); + // In the case of inner program, allow_new_shape_infer flag is setted by outside of program. + // So, do not check allow_new_shape_infer for inner program build + for (const auto& op : ops) { + if (requires_new_shape_infer(op)) { + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + break; + } + } + bool is_dynamic = false; + for (const auto& op : ops) { + if (op->is_dynamic()) { + is_dynamic = true; + break; + } + } + bool has_lstm = false; + for (const auto& op : ops) { + if (ov::is_type(op)) { + has_lstm = true; + break; + } + } + + // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, + // smaller # of kernels are built compared to static models. + // So having smaller batch size is even better for dynamic model as we can do more parallel build. + if (is_dynamic) { + config.set_property(ov::intel_gpu::max_kernels_per_batch(4)); + } else { + config.set_property(ov::intel_gpu::max_kernels_per_batch(8)); + } + + config.set_property(ov::intel_gpu::optimize_data(true)); + + if (has_lstm) + config.set_property(ov::intel_gpu::use_onednn(true)); +} + +} // namespace + #define FACTORY_DECLARATION(op_version, op_name) \ void __register ## _ ## op_name ## _ ## op_version(); @@ -97,14 +203,13 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p const ExecutionConfig& config, const std::shared_ptr& context) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::clone_and_transform_model"); - GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_DEFINE_MEM_LOGGER("Plugin::clone_and_transform_model"); auto cloned_model = model->clone(); OPENVINO_ASSERT(cloned_model != nullptr, "[GPU] Failed to clone model!"); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name(); + GPU_DEBUG_IF(!config.get_dump_graphs_path().empty()) { + auto path_base = config.get_dump_graphs_path() + "/" + cloned_model->get_name(); ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } @@ -123,8 +228,8 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p new_res->set_friendly_name(old_res->get_friendly_name()); } - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name() + "_" + "transformed_func"; + GPU_DEBUG_IF(!config.get_dump_graphs_path().empty()) { + auto path_base = config.get_dump_graphs_path() + "/" + cloned_model->get_name() + "_" + "transformed_func"; ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } return cloned_model; @@ -163,22 +268,6 @@ Plugin::Plugin() { m_compiled_model_runtime_properties["OV_VERSION"] = ov_version.buildNumber; } -void Plugin::set_cache_info(const std::shared_ptr& model, ExecutionConfig& config) const { - // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with - // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not - // using that mechanism. - if (config.get_property(ov::cache_mode) != ov::CacheMode::OPTIMIZE_SIZE) { - return; - } - - const auto& rt_info = model->get_rt_info(); - auto weights_path = rt_info.find("__weights_path"); - if (weights_path != rt_info.end()) { - ov::AnyMap weights_path_property{{"WEIGHTS_PATH", weights_path->second}}; - config.set_property(weights_path_property); - } -} - std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, const ov::AnyMap& orig_config) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::compile_model"); std::string device_id = get_device_id(orig_config); @@ -188,12 +277,9 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(orig_config); - if (model->has_rt_info("runtime_options")) - config.apply_rt_info(context->get_engine().get_device_info(), model->get_rt_info("runtime_options")); - config.apply_user_properties(context->get_engine().get_device_info()); - - set_cache_info(model, config); + config.set_user_property(orig_config, OptionVisibility::RELEASE); + set_model_properties(*model, config); + config.finalize(context, get_rt_info(*model)); auto transformed_model = clone_and_transform_model(model, config, context); { @@ -211,10 +297,8 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] LoadExeNetworkImpl: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(orig_config); - config.apply_user_properties(context_impl->get_engine().get_device_info()); - - set_cache_info(model, config); + set_model_properties(*model, config); + config.finalize(context_impl, get_rt_info(*model)); auto transformed_model = clone_and_transform_model(model, config, context_impl); return std::make_shared(transformed_model, shared_from_this(), context_impl, config); @@ -244,7 +328,7 @@ ov::SoPtr Plugin::get_default_context(const AnyMap& params) void Plugin::set_property(const ov::AnyMap &config) { auto update_config = [](ExecutionConfig& config, const ov::AnyMap& user_config) { - config.set_user_property(user_config); + config.set_user_property(user_config, OptionVisibility::RELEASE); // Check that custom layers config can be loaded if (user_config.find(ov::intel_gpu::config_file.name()) != user_config.end()) { CustomLayerMap custom_layers; @@ -279,14 +363,13 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& auto ctx = get_default_context(device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(orig_config); - if (model->has_rt_info("runtime_options")) - config.apply_rt_info(ctx->get_engine().get_device_info(), model->get_rt_info("runtime_options")); - config.apply_user_properties(ctx->get_engine().get_device_info()); + config.set_user_property(orig_config, OptionVisibility::RELEASE); + set_model_properties(*model, config); + config.finalize(ctx, get_rt_info(*model)); ProgramBuilder prog(ctx->get_engine(), config); - float query_model_ratio = config.get_property(ov::internal::query_model_ratio.name()).as(); + float query_model_ratio = config.get_query_model_ratio(); auto supported = ov::get_supported_nodes(model, [&config,&ctx,this](std::shared_ptr& model) { @@ -336,11 +419,11 @@ std::shared_ptr Plugin::import_model(std::istream& model, } ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(_orig_config); - config.apply_user_properties(context_impl->get_engine().get_device_info()); + config.set_user_property(_orig_config, OptionVisibility::RELEASE); + config.finalize(context_impl, {}); - ov::CacheMode cache_mode = config.get_property(ov::cache_mode); - ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks); + ov::CacheMode cache_mode = config.get_cache_mode(); + ov::EncryptionCallbacks encryption_callbacks = config.get_cache_encryption_callbacks(); const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; std::unique_ptr ib_ptr = @@ -357,9 +440,8 @@ std::shared_ptr Plugin::import_model(std::istream& model, return nullptr; } - std::string weights_path = config.get_property(ov::weights_path); - if (config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && - !ov::util::validate_weights_path(weights_path)) { + std::string weights_path = config.get_weights_path(); + if (config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) { return nullptr; } @@ -445,7 +527,7 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] get_property: Couldn't find config for GPU with id ", device_id); const auto& c = m_configs_map.at(device_id); - return c.get_property(name); + return c.get_property(name, OptionVisibility::RELEASE); } auto StringRightTrim = [](std::string string, std::string substring, bool case_sensitive = true) { @@ -657,8 +739,10 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { auto device_id = get_property(ov::device::id.name(), options).as(); auto context = get_default_contexts().at(device_id); const auto& device_info = context->get_engine().get_device_info(); - const auto& config = m_configs_map.at(device_id); - uint32_t n_streams = static_cast(config.get_property(ov::num_streams)); + auto config = m_configs_map.at(device_id); + config.set_property(ov::intel_gpu::partial_build_program(true)); + config.finalize(context, {}); + uint32_t n_streams = static_cast(config.get_num_streams()); uint64_t occupied_device_mem = 0; auto statistic_result = get_metric(ov::intel_gpu::memory_statistics.name(), options).as>(); auto occupied_usm_dev = statistic_result.find("usm_device_current"); @@ -775,7 +859,7 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { TransformationsPipeline transformations(config, context); transformations.apply(cloned_model); - program = std::make_shared(cloned_model, engine, config, true); + program = std::make_shared(cloned_model, engine, config); std::pair device_memory_usage = program->get_compiled_program()->get_estimated_device_mem_usage(); if (device_memory_usage.first == static_cast(-1L) && device_memory_usage.second == static_cast(-1L)) { return static_cast(max_batch_size); diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index a9bb813d0ce587..449bd3d7d4fe0c 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/split.hpp" @@ -10,7 +11,8 @@ #include "openvino/op/lstm_sequence.hpp" #include "openvino/op/loop.hpp" #include "openvino/op/search_sorted.hpp" -#include "ov_ops/dynamic_quantize.hpp" +#include "openvino/runtime/properties.hpp" + #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/plugin/program_builder.hpp" @@ -61,7 +63,6 @@ std::string layer_type_name_ID(const std::shared_ptr& op) { } ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, - bool partial_build, std::shared_ptr task_executor, std::shared_ptr compilation_context, bool is_inner_program) @@ -105,20 +106,11 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& config_path += "/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml"; CustomLayer::LoadFromFile(config_path, m_custom_layers, true); - auto custom_layers_config = m_config.get_property(ov::intel_gpu::config_file); + auto custom_layers_config = m_config.get_config_file(); CustomLayer::LoadFromFile(custom_layers_config, m_custom_layers, custom_layers_config.empty()); auto ops = model->get_ordered_ops(); - // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, - // smaller # of kernels are built compared to static models. - // So having smaller batch size is even better for dynamic model as we can do more parallel build. - if (model->is_dynamic()) { - m_config.set_property(ov::intel_gpu::max_kernels_per_batch(4)); - } else { - m_config.set_property(ov::intel_gpu::max_kernels_per_batch(8)); - } - - m_program = build(ops, partial_build, is_inner_program); + m_program = build(ops, is_inner_program); } ProgramBuilder::ProgramBuilder(cldnn::engine& engine, const ExecutionConfig& config) @@ -148,24 +140,8 @@ void ProgramBuilder::cleanup_build() { #endif } -std::shared_ptr ProgramBuilder::build(const std::vector>& ops, bool partial_build, bool is_inner_program) { +std::shared_ptr ProgramBuilder::build(const std::vector>& ops, bool is_inner_program) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "ProgramBuilder::build"); - // In the case of inner program, allow_new_shape_infer flag is setted by outside of program. - // So, do not check allow_new_shape_infer for inner program build - for (const auto& op : ops) { - if (requires_new_shape_infer(op)) { - allow_new_shape_infer = true; - break; - } - } - - if (is_inner_program) { - allow_new_shape_infer = (m_config.get_property(ov::intel_gpu::allow_new_shape_infer) || allow_new_shape_infer); - } - - m_config.set_property(ov::intel_gpu::partial_build_program(partial_build)); - m_config.set_property(ov::intel_gpu::optimize_data(true)); - m_config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer)); prepare_build(); { @@ -211,7 +187,6 @@ bool ProgramBuilder::is_op_supported(const std::shared_ptr& op) { if (!data_types_are_supported(op.get())) return false; - allow_new_shape_infer = requires_new_shape_infer(op); CreateSingleLayerPrimitive(op); cleanup_build(); DisableQueryMode(); @@ -268,7 +243,7 @@ std::vector ProgramBuilder::GetInputInfo(const std::shared_pt // Note: Currently Split/Variadic Split are divided to multiple crops // LSTMCell contains its own body network, and each output has a unique pid // But there is no need to maintain output port index for the next node e.g. Result - bool is_legacy_multiple_outputs = !allow_new_shape_infer + bool is_legacy_multiple_outputs = !use_new_shape_infer() || ov::is_type(prevOp) || ov::is_type(prevOp) || ov::is_type(prevOp); @@ -309,7 +284,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrorigin_op_name = op.get_friendly_name(); prim->origin_op_type_name = op.get_type_name(); - if (this->m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) { + if (this->m_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) { if (auto data_prim = dynamic_cast(prim.get())) { auto rt_info = op.get_rt_info(); @@ -340,7 +315,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrorigin_op_type_name = prim->type_string(); } - if (this->m_config.get_property(ov::enable_profiling) && should_profile) { + if (this->m_config.get_enable_profiling() && should_profile) { profiling_ids.push_back(prim_id); init_profile_info(*prim); } @@ -352,50 +327,6 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptradd_primitive(prim); } -bool ProgramBuilder::requires_new_shape_infer(const std::shared_ptr& op) const { - if (op->is_dynamic()) { - return true; - } - - // HACK: SearchSorted has specific shape requirements. - // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine, - // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid. - if (ov::is_type(op)) - return true; - - if (ov::is_type(op)) - return true; - - if (ov::is_type(op)) { - const auto body_function = std::static_pointer_cast(op)->get_function(); - if (body_function->is_dynamic()) - return true; - } - - if (ov::is_type(op) || ov::is_type(op)) { - return true; - } - // When input node has dynamic shape with 4 dimension, this function return false - // because op.is_dynamic() which only checks input shapes return false. - // So, in the case of input data, we need to check output shape. - for (size_t i = 0; i < op->get_output_size(); i++) { - if (op->get_output_partial_shape(i).is_dynamic()) - return true; - } - - for (size_t i = 0; i < op->get_output_size(); i++) { - if (op->get_output_partial_shape(i).size() > 6) - return true; - } - - for (size_t i = 0; i < op->get_input_size(); i++) { - if (op->get_input_partial_shape(i).size() > 6) - return true; - } - - return false; -} - int64_t ProgramBuilder::get_parameter_index(const std::shared_ptr& parameter) const { return m_model->get_parameter_index(parameter); } diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index f87f9af5275722..46fe4acc2a9955 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -114,8 +114,8 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr& c : ov::ISyncInferRequest(compiled_model) , m_graph(compiled_model->get_graph(0)) , m_context(std::static_pointer_cast(compiled_model->get_context_impl())) - , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_property(ov::intel_gpu::buffers_preallocation_ratio))) - , m_enable_profiling(m_graph->get_config().get_property(ov::enable_profiling)) + , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_buffers_preallocation_ratio())) + , m_enable_profiling(m_graph->get_config().get_enable_profiling()) , m_use_external_queue(m_graph->use_external_queue()) { GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) { @@ -310,15 +310,16 @@ void SyncInferRequest::enqueue() { m_internal_outputs = network->execute(dependencies); auto network_enqueue_end = std::chrono::high_resolution_clock::now(); + const auto& config = network->get_config(); + // If dump layers path is set, only runs first inference. - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0 && debug_config->dump_iteration.empty()) { + GPU_DEBUG_IF(!config.get_dump_tensors_path().empty() && config.get_dump_iterations().empty()) { GPU_DEBUG_INFO << "Only run first inference to dump layers." << std::endl; exit(0); } auto enqueue_end = std::chrono::high_resolution_clock::now(); - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->host_time_profiling) { + GPU_DEBUG_IF(config.get_host_time_profiling()) { network_enqueue_time = std::chrono::duration_cast(network_enqueue_end - network_enqueue_start).count(); const uint64_t total_time = std::chrono::duration_cast(enqueue_end - enqueue_start).count(); @@ -415,7 +416,7 @@ void SyncInferRequest::wait() { auto mem_shape = output_layout.get_shape(); // In case of old shape infer we need to shrink out tensor shape to avoid redudnant dimensions that occur due to rank extension // For new shape infer this shouldn't happen, thus remove that WA once we migrate to ngraph-based shape infer for all cases - if (!m_graph->get_config().get_property(ov::intel_gpu::allow_new_shape_infer)) { + if (!m_graph->get_config().get_allow_new_shape_infer()) { OPENVINO_ASSERT(port.get_partial_shape().is_static(), "[GPU] Unexpected dynamic shape for legacy shape inference"); OPENVINO_ASSERT(ov::shape_size(port.get_shape()) == ov::shape_size(mem_shape), "[GPU] Unexpected elements count for output tensor"); mem_shape = port.get_shape(); @@ -888,7 +889,7 @@ std::vector SyncInferRequest::prepare_input(const std::string auto memory = device_tensor->get_memory(); // WA to extend shape to ranks expected by legacy shape infer. Remove after full migration to new shape infer - if (!m_graph->get_config().get_property(ov::intel_gpu::allow_new_shape_infer)) { + if (!m_graph->get_config().get_allow_new_shape_infer()) { auto new_layout = memory->get_layout(); new_layout.set_partial_shape(m_graph->get_input_layouts().at(input_idx).get_shape()); memory = engine.reinterpret_buffer(*memory, new_layout); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp index 7b365ab7164ba7..12f8198750c4e4 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp @@ -17,7 +17,7 @@ namespace ov { namespace intel_gpu { -DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size) +DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size, bool asymmetric) : ov::pass::MatcherPass() { GPU_DEBUG_GET_INSTANCE(debug_config); using namespace ov::pass::pattern; @@ -56,7 +56,7 @@ DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size config.scale_dt = element::f16; config.group_sizes = shape_group_size; - GPU_DEBUG_IF(debug_config->dynamic_quantize_asym) { + if (asymmetric) { config.quantization_type = QuantizationType::Asymmetric; config.quantization_dt = element::u8; config.zp_dt = element::u8; // it supports u8 only now diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp index b5d956f7872b5c..f56859d506a4e4 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp @@ -12,7 +12,7 @@ namespace intel_gpu { class DynamicQuantizeFullyConnected: public ov::pass::MatcherPass { public: OPENVINO_RTTI("DynamicQuantizeFullyConnected", "0"); - DynamicQuantizeFullyConnected(uint64_t group_size); + DynamicQuantizeFullyConnected(uint64_t group_size, bool asymmetric = false); }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 44d68740a0dfb7..61f8b838de363d 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -282,7 +282,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { const auto& defaultPrecisions = ov::pass::low_precision::precision_set::get_int8_support(); const ov::element::TypeVector supported_woq_types = {ov::element::u8, ov::element::i8, ov::element::u4, ov::element::i4}; bool enableInt8; - bool unroll_loop = config.get_property(ov::intel_gpu::enable_loop_unrolling); + bool unroll_loop = config.get_enable_loop_unrolling(); { ov::pass::Manager manager("Plugin:GPU"); auto pass_config = manager.get_pass_config(); @@ -295,7 +295,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); - enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && is_model_quantized; + enableInt8 = config.get_enable_lp_transformations() && is_model_quantized; manager.register_pass( std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }, @@ -328,7 +328,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { }; // Add conversion from FP data types to infer precision if it's specified - auto infer_precision = config.get_property(ov::hint::inference_precision); + auto infer_precision = config.get_inference_precision(); if (infer_precision != ov::element::undefined) { if (!fp_precision_supported(infer_precision)) infer_precision = fallback_precision; @@ -409,7 +409,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { GPU_DEBUG_CODE(return cldnn::debug_configuration::get_instance()->enable_sdpa == 1); } - if (!config.get_property(ov::intel_gpu::hint::enable_sdpa_optimization)) + if (!config.get_enable_sdpa_optimization()) return false; auto sdpa = std::dynamic_pointer_cast(node); @@ -917,18 +917,14 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - bool disable_horizontal_fc_fusion = false; - bool disable_fc_swiglu_fusion = false; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_horizontal_fc_fusion == 1) - disable_horizontal_fc_fusion = true; - GPU_DEBUG_IF(debug_config->disable_fc_swiglu_fusion == 1) - disable_fc_swiglu_fusion = true; + bool disable_horizontal_fc_fusion = config.get_disable_horizontal_fc_fusion(); + bool disable_fc_swiglu_fusion = config.get_disable_fc_swiglu_fusion(); + // mlp fusion is only supported for cldnn on high performant GPUis bool fuse_mlp_swiglu = !device_info.supports_immad && device_info.execution_units_count >= 128 && !disable_fc_swiglu_fusion; - if (!disable_horizontal_fc_fusion) + if (!config.get_disable_horizontal_fc_fusion()) manager.register_pass(fuse_mlp_swiglu); // ZP should not be folded for FC. But still, ZP should be folded for Gather. @@ -946,7 +942,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); manager.register_pass(device_info.supports_immad); - manager.register_pass(config.get_property(ov::hint::activations_scale_factor)); + manager.register_pass(config.get_activations_scale_factor()); if (!device_info.supports_immad) { manager.register_pass(); @@ -956,7 +952,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - auto kv_cache_compression_dt = config.get_property(ov::hint::kv_cache_precision); + auto kv_cache_compression_dt = config.get_kv_cache_precision(); manager.register_pass(kv_cache_compression_dt); manager.register_pass(); @@ -981,7 +977,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); if (device_info.supports_immad) { - auto dynamic_quantization_group_size = config.get_property(ov::hint::dynamic_quantization_group_size); + bool asymmetric_dyn_quant = config.get_asym_dynamic_quantization(); + auto dynamic_quantization_group_size = config.get_dynamic_quantization_group_size(); pass_config->set_callback([=](const_node_ptr& root) -> bool { if (root->get_input_node_shared_ptr(0)->get_element_type() == ov::element::Type_t::f32) { GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: input type is not supported" << std::endl; @@ -996,7 +993,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } // AZP does not support 8bit weight - GPU_DEBUG_IF(debug_config->dynamic_quantize_asym + GPU_DEBUG_IF(asymmetric_dyn_quant && (root->get_input_element_type(1) == ov::element::i8 || root->get_input_element_type(1) == ov::element::u8)) { GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: asym quantization does not support 8bit weight" << std::endl; return true; @@ -1012,7 +1009,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } return false; }); - manager.register_pass(dynamic_quantization_group_size); + manager.register_pass(dynamic_quantization_group_size, asymmetric_dyn_quant); } // Remove Pad in front of MaxPool if both the pads_begin and pads_end are zero. diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index a698ec7eb6c5a0..4876af965991a8 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -1,149 +1,113 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "intel_gpu/runtime/execution_config.hpp" -#include "intel_gpu/runtime/debug_configuration.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "openvino/core/any.hpp" #include "openvino/runtime/internal_properties.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" +#include "openvino/runtime/plugin_config.hpp" +#include "openvino/runtime/properties.hpp" -#include namespace ov { namespace intel_gpu { -ExecutionConfig::ExecutionConfig() { - set_default(); +ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { + #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" + #undef OV_CONFIG_OPTION } -class InferencePrecisionValidator : public BaseValidator { -public: - bool is_valid(const ov::Any& v) const override { - auto precision = v.as(); - return precision == ov::element::f16 || precision == ov::element::f32 || precision == ov::element::undefined; +ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() { + m_user_properties = other.m_user_properties; + m_is_finalized = false; // copy is not automatically finalized + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); } -}; +} -class PerformanceModeValidator : public BaseValidator { -public: - bool is_valid(const ov::Any& v) const override { - auto mode = v.as(); - return mode == ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT || - mode == ov::hint::PerformanceMode::THROUGHPUT || - mode == ov::hint::PerformanceMode::LATENCY; +ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) { + m_user_properties = other.m_user_properties; + m_is_finalized = false; // copy is not automatically finalized + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); } -}; - -void ExecutionConfig::set_default() { - register_property( - std::make_tuple(ov::device::id, "0"), - std::make_tuple(ov::enable_profiling, false), - std::make_tuple(ov::cache_dir, ""), - std::make_tuple(ov::num_streams, 1), - std::make_tuple(ov::compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency()))), - std::make_tuple(ov::hint::inference_precision, ov::element::f16, InferencePrecisionValidator()), - std::make_tuple(ov::hint::model_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::LATENCY, PerformanceModeValidator()), - std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::PERFORMANCE), - std::make_tuple(ov::hint::num_requests, 0), - std::make_tuple(ov::hint::enable_cpu_pinning, false), - - std::make_tuple(ov::intel_gpu::hint::host_task_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::queue_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::enable_sdpa_optimization, true), - std::make_tuple(ov::intel_gpu::enable_loop_unrolling, true), - std::make_tuple(ov::intel_gpu::disable_winograd_convolution, false), - std::make_tuple(ov::internal::exclusive_async_requests, false), - std::make_tuple(ov::internal::query_model_ratio, 1.0f), - std::make_tuple(ov::cache_mode, ov::CacheMode::OPTIMIZE_SPEED), - std::make_tuple(ov::cache_encryption_callbacks, EncryptionCallbacks{}), - std::make_tuple(ov::hint::dynamic_quantization_group_size, 0), - std::make_tuple(ov::hint::kv_cache_precision, ov::element::undefined), - std::make_tuple(ov::intel_gpu::hint::enable_kernels_reuse, false), - std::make_tuple(ov::weights_path, ""), - std::make_tuple(ov::hint::activations_scale_factor, 0.f), - - // Legacy API properties - std::make_tuple(ov::intel_gpu::nv12_two_inputs, false), - std::make_tuple(ov::intel_gpu::config_file, ""), - std::make_tuple(ov::intel_gpu::enable_lp_transformations, false)); - - register_property( - std::make_tuple(ov::intel_gpu::max_dynamic_batch, 1), - std::make_tuple(ov::intel_gpu::queue_type, QueueTypes::out_of_order), - std::make_tuple(ov::intel_gpu::optimize_data, false), - std::make_tuple(ov::intel_gpu::enable_memory_pool, true), - std::make_tuple(ov::intel_gpu::allow_static_input_reorder, false), - std::make_tuple(ov::intel_gpu::custom_outputs, std::vector{}), - std::make_tuple(ov::intel_gpu::dump_graphs, ""), - std::make_tuple(ov::intel_gpu::force_implementations, ImplForcingMap{}), - std::make_tuple(ov::intel_gpu::partial_build_program, false), - std::make_tuple(ov::intel_gpu::allow_new_shape_infer, false), - std::make_tuple(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape, false), - std::make_tuple(ov::intel_gpu::buffers_preallocation_ratio, 1.1f), - std::make_tuple(ov::intel_gpu::max_kernels_per_batch, 8), - std::make_tuple(ov::intel_gpu::use_onednn, false)); + return *this; } -void ExecutionConfig::register_property_impl(const std::pair& property, PropertyVisibility visibility, BaseValidator::Ptr validator) { - property_validators[property.first] = validator; - supported_properties[property.first] = visibility; - internal_properties[property.first] = property.second; +void ExecutionConfig::finalize(cldnn::engine& engine) { + auto ctx = std::make_shared("GPU", std::vector{engine.get_device()}); + PluginConfig::finalize(ctx, {}); } -void ExecutionConfig::set_property(const AnyMap& config) { - for (auto& kv : config) { - auto& name = kv.first; - auto& val = kv.second; - OPENVINO_ASSERT(is_supported(kv.first), "[GPU] Attempt to set property ", name, " (", val.as(), ") which was not registered!\n"); - OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name, ": ", val.as()); - internal_properties[name] = val; +void ExecutionConfig::apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) { + const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + if (!info.supports_immad) { + apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); + apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); } -} - -bool ExecutionConfig::is_supported(const std::string& name) const { - bool supported = supported_properties.find(name) != supported_properties.end(); - bool has_validator = property_validators.find(name) != property_validators.end(); + apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); - return supported && has_validator; + // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with + // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not + // using that mechanism. + if (get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) { + apply_rt_info_property(ov::weights_path, rt_info); + } } -bool ExecutionConfig::is_set_by_user(const std::string& name) const { - return user_properties.find(name) != user_properties.end(); -} +void ExecutionConfig::finalize_impl(std::shared_ptr context) { + if (m_help) { + print_help(); + exit(-1); + } -void ExecutionConfig::set_user_property(const AnyMap& config) { - for (auto& kv : config) { - auto& name = kv.first; - auto& val = kv.second; - bool supported = is_supported(name) && supported_properties.at(name) == PropertyVisibility::PUBLIC; - OPENVINO_ASSERT(supported, "[GPU] Attempt to set user property ", name, " (", val.as(), ") which was not registered or internal!\n"); - OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name, ": `", val.as(), "`"); + const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + apply_hints(info); + if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { + m_enable_lp_transformations = info.supports_imad || info.supports_immad; + } + if (!is_set_by_user(ov::intel_gpu::use_onednn) && info.supports_immad) { + m_use_onednn = true; + } + if (get_use_onednn()) { + m_queue_type = QueueTypes::in_order; + } - user_properties[kv.first] = kv.second; + // Enable KV-cache compression by default for non-systolic platforms + if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { + m_kv_cache_precision = ov::element::i8; } -} -Any ExecutionConfig::get_property(const std::string& name) const { - if (user_properties.find(name) != user_properties.end()) { - return user_properties.at(name); + // Enable dynamic quantization by default for non-systolic platforms + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { + m_dynamic_quantization_group_size = 32; } - OPENVINO_ASSERT(internal_properties.find(name) != internal_properties.end(), "[GPU] Can't get internal property with name ", name); - return internal_properties.at(name); + if (!get_force_implementations().empty()) { + m_optimize_data = true; + } +} + +void ExecutionConfig::apply_hints(const cldnn::device_info& info) { + apply_execution_hints(info); + apply_performance_hints(info); + apply_priority_hints(info); } void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::execution_mode)) { - const auto mode = get_property(ov::hint::execution_mode); + const auto mode = get_execution_mode(); if (!is_set_by_user(ov::hint::inference_precision)) { if (mode == ov::hint::ExecutionMode::ACCURACY) { - set_property(ov::hint::inference_precision(ov::element::undefined)); + m_inference_precision = ov::element::undefined; } else if (mode == ov::hint::ExecutionMode::PERFORMANCE) { if (info.supports_fp16) - set_property(ov::hint::inference_precision(ov::element::f16)); + m_inference_precision = ov::element::f16; else - set_property(ov::hint::inference_precision(ov::element::f32)); + m_inference_precision = ov::element::f32; } } } @@ -151,136 +115,49 @@ void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::performance_mode)) { - const auto mode = get_property(ov::hint::performance_mode); + const auto mode = get_performance_mode(); if (!is_set_by_user(ov::num_streams)) { if (mode == ov::hint::PerformanceMode::LATENCY) { - set_property(ov::num_streams(1)); + m_num_streams = 1; } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) { - set_property(ov::num_streams(ov::streams::AUTO)); + m_num_streams = ov::streams::AUTO; } } } - if (get_property(ov::num_streams) == ov::streams::AUTO) { + if (get_num_streams() == ov::streams::AUTO) { int32_t n_streams = std::max(info.num_ccs, 2); - set_property(ov::num_streams(n_streams)); + m_num_streams = n_streams; } - if (get_property(ov::internal::exclusive_async_requests)) { - set_property(ov::num_streams(1)); + if (get_exclusive_async_requests()) { + m_num_streams = 1; } // Allow kernels reuse only for single-stream scenarios - if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) { - if (get_property(ov::num_streams) != 1) { - set_property(ov::intel_gpu::hint::enable_kernels_reuse(false)); + if (get_enable_kernels_reuse()) { + if (get_num_streams() != 1) { + m_enable_kernels_reuse = false; } } } void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::model_priority)) { - const auto priority = get_property(ov::hint::model_priority); + const auto priority = get_model_priority(); if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { - set_property(ov::intel_gpu::hint::queue_priority(priority)); + m_queue_priority = priority; } } } -void ExecutionConfig::apply_debug_options(const cldnn::device_info& info) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); - } - - GPU_DEBUG_IF(debug_config->serialize_compile == 1) { - set_property(ov::compilation_num_threads(1)); - } - - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { - GPU_DEBUG_COUT << "[WARNING] ov::enable_profiling property was forced because of enabled OV_GPU_DumpProfilingData debug option\n"; - set_property(ov::enable_profiling(true)); - } - - GPU_DEBUG_IF(debug_config->disable_dynamic_impl == 1) { - set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); - } - - GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { - if (debug_config->dynamic_quantize_group_size == -1) - set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX)); - else - set_property(ov::hint::dynamic_quantization_group_size(debug_config->dynamic_quantize_group_size)); - } - - GPU_DEBUG_IF(debug_config->use_kv_cache_compression != -1) { - GPU_DEBUG_IF(debug_config->use_kv_cache_compression == 1) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); - } else { - set_property(ov::hint::kv_cache_precision(ov::element::undefined)); - } - } -} - -void ExecutionConfig::apply_hints(const cldnn::device_info& info) { - apply_execution_hints(info); - apply_performance_hints(info); - apply_priority_hints(info); - apply_debug_options(info); -} - -void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { - // Copy internal properties before applying hints to ensure that - // a property set by hint won't be overriden by a value in user config. - // E.g num_streams=AUTO && hint=THROUGHPUT - // If we apply hints first and then copy all values from user config to internal one, - // then we'll get num_streams=AUTO in final config while some integer number is expected. - for (auto& kv : user_properties) { - internal_properties[kv.first] = kv.second; - } - apply_hints(info); - if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { - set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); - } - if (info.supports_immad) { - set_property(ov::intel_gpu::use_onednn(true)); - } - if (get_property(ov::intel_gpu::use_onednn)) { - set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); - } - - // Enable KV-cache compression by default for non-systolic platforms - if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); - } - - // Enable dynamic quantization by default for non-systolic platforms - if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { - set_property(ov::hint::dynamic_quantization_group_size(32)); - } - - user_properties.clear(); -} - -void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info) { - if (!info.supports_immad) { - apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); - apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); - } - apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); -} - -std::string ExecutionConfig::to_string() const { - std::stringstream s; - s << "internal properties:\n"; - for (auto& kv : internal_properties) { - s << "\t" << kv.first << ": " << kv.second.as() << std::endl; - } - s << "user properties:\n"; - for (auto& kv : user_properties) { - s << "\t" << kv.first << ": " << kv.second.as() << std::endl; - } - return s.str(); +const ov::PluginConfig::OptionsDesc& ExecutionConfig::get_options_desc() const { + static ov::PluginConfig::OptionsDesc help_map { + #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" + #undef OV_CONFIG_OPTION + }; + return help_map; } } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp index 74dbc016c65d31..4826e502aeaa97 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp @@ -295,9 +295,6 @@ device_info init_device_info(const cl::Device& device, const cl::Context& contex GPU_DEBUG_INFO << "GPU version: " << static_cast(info.gfx_ver.major) << "." << static_cast(info.gfx_ver.minor) << "." << static_cast(info.gfx_ver.revision) << (info.has_separate_cache ? " with separate cache" : "") << std::endl; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_onednn) - info.supports_immad = false; } else if (nv_device_attr_supported) { info.gfx_ver = {static_cast(device.getInfo()), static_cast(device.getInfo()), diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp index df1cad281d636c..6eb5855c29f9b5 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp @@ -64,7 +64,7 @@ void ocl_engine::create_onednn_engine(const ExecutionConfig& config) { auto casted = std::dynamic_pointer_cast(_device); OPENVINO_ASSERT(casted, "[GPU] Invalid device type stored in ocl_engine"); - std::string cache_dir = config.get_property(ov::cache_dir); + std::string cache_dir = config.get_cache_dir(); if (cache_dir.empty()) { _onednn_engine = std::make_shared(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get())); } else { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp index e227c94c7dc06d..bc01a8174292e4 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp @@ -189,22 +189,22 @@ void set_arguments_impl(ocl_kernel_type& kernel, } // namespace ocl_stream::ocl_stream(const ocl_engine &engine, const ExecutionConfig& config) - : stream(config.get_property(ov::intel_gpu::queue_type), stream::get_expected_sync_method(config)) + : stream(config.get_queue_type(), stream::get_expected_sync_method(config)) , _engine(engine) { auto context = engine.get_cl_context(); auto device = engine.get_cl_device(); ocl::command_queues_builder queue_builder; - queue_builder.set_profiling(config.get_property(ov::enable_profiling)); + queue_builder.set_profiling(config.get_enable_profiling()); queue_builder.set_out_of_order(m_queue_type == QueueTypes::out_of_order); OPENVINO_ASSERT(m_sync_method != SyncMethods::none || m_queue_type == QueueTypes::in_order, "[GPU] Unexpected sync method (none) is specified for out_of_order queue"); bool priorty_extensions = engine.extension_supported("cl_khr_priority_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_priority_mode(config.get_property(ov::intel_gpu::hint::queue_priority), priorty_extensions); + queue_builder.set_priority_mode(config.get_queue_priority(), priorty_extensions); bool throttle_extensions = engine.extension_supported("cl_khr_throttle_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_throttle_mode(config.get_property(ov::intel_gpu::hint::queue_throttle), throttle_extensions); + queue_builder.set_throttle_mode(config.get_queue_throttle(), throttle_extensions); bool queue_families_extension = engine.get_device_info().supports_queue_families; queue_builder.set_supports_queue_families(queue_families_extension); diff --git a/src/plugins/intel_gpu/src/runtime/stream.cpp b/src/plugins/intel_gpu/src/runtime/stream.cpp index d79a144c08ee86..d8a3e559db2c93 100644 --- a/src/plugins/intel_gpu/src/runtime/stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/stream.cpp @@ -20,8 +20,8 @@ QueueTypes stream::detect_queue_type(engine_types engine_type, void* queue_handl } SyncMethods stream::get_expected_sync_method(const ExecutionConfig& config) { - auto profiling = config.get_property(ov::enable_profiling); - auto queue_type = config.get_property(ov::intel_gpu::queue_type); + auto profiling = config.get_enable_profiling(); + auto queue_type = config.get_queue_type(); return profiling ? SyncMethods::events : queue_type == QueueTypes::out_of_order ? SyncMethods::barriers : SyncMethods::none; } diff --git a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp index 235853eaf79f60..528ed566524b4f 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp @@ -318,7 +318,7 @@ class WeightsPrimitiveFusingTestOneDNN : public BaseFusingTestget_layout_optimizer().set_implementation_forcing(config.get_property(ov::intel_gpu::force_implementations)); + prog->get_layout_optimizer().set_implementation_forcing(config.get_force_implementations()); program_wrapper::apply_opt_pass(*prog); ASSERT_TRUE(!has_node(*prog, "permute")); diff --git a/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp b/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp index 1fbeab7e67ac2d..b46033f15d77db 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp @@ -136,7 +136,7 @@ TEST(remove_redundant_reorders, skip_reorder_fusing_when_sibling_not_support_pad auto prog = program::build_program(engine, topology, config, false, true); config.set_property(ov::intel_gpu::optimize_data(true)); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -165,7 +165,7 @@ TEST(remove_redundant_reorders, not_to_fuse_reshape_with_fused_prims) { auto prog = program::build_program(engine, topology, config, false, true); program_wrapper::apply_opt_pass(*prog); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -204,7 +204,7 @@ TEST(remove_redundant_reorders, not_to_fuse_permute) { auto prog = program::build_program(engine, topology, config, false, true); ASSERT_NE(prog, nullptr); - bool opt_data = config.get_property(ov::intel_gpu::optimize_data); + bool opt_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog); program_wrapper::apply_opt_pass(*prog, opt_data); @@ -266,7 +266,7 @@ TEST(remove_redundant_reorders, remove_fused) { auto prog = program::build_program(engine, topology, config, false, true); program_wrapper::apply_opt_pass(*prog); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -293,7 +293,7 @@ TEST(remove_redundant_reorders, fuse_reorder_to_prev_mvn_dyn) { config.set_property(ov::intel_gpu::optimize_data(true)); auto prog = program::build_program(engine, topology, config, false, true); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -336,7 +336,7 @@ TEST(remove_redundant_reorders, fuse_reorder_to_prev_concat_dyn) { config.set_property(ov::intel_gpu::optimize_data(true)); auto prog = program::build_program(engine, topology, config, false, true); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp index f640b02afa99cb..8cb561f4232a6a 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp @@ -1422,7 +1422,7 @@ struct concat_gpu_4d_implicit : public concat_gpu { } auto outputs = concat_network->execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network->get_primitive("concat"))->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); @@ -1642,7 +1642,7 @@ struct concat_gpu_4d_implicit_onednn : public concat_gpu { } auto outputs = concat_network.execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat"))->node->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); @@ -1805,7 +1805,7 @@ struct concat_gpu_4d_explicit : public concat_gpu { } auto outputs = concat_network.execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat"))->node->can_be_optimized(); // If sibling is using onednn impl and batch > 1, the onednn impl cannot process the implicit concat'ed buffer. diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp index 20d42e85d0c301..ad8cd9648cbc24 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp @@ -1317,7 +1317,6 @@ TEST_P(crop_gpu_dynamic, i32_in2x3x2x2_crop_offsets) { } } } - config2.set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); network network2(engine, topology, config2); // run with static kernel network2.set_input_data("input", input); auto outputs2 = network2.execute(); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index f59dc5c42cffc1..6bf44a31add0f4 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -1555,7 +1555,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_user_property(ov::hint::dynamic_quantization_group_size(32)); + config.set_property(ov::hint::dynamic_quantization_group_size(32)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1643,7 +1643,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1669,7 +1669,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1753,7 +1753,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1780,9 +1780,9 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); if (is_dyn_quan) { - config.set_user_property(ov::hint::dynamic_quantization_group_size(32)); + config.set_property(ov::hint::dynamic_quantization_group_size(32)); } else { - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); } network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1923,7 +1923,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl = { in_layout.format, "", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim1", fc_impl }, { "fc_prim2", fc_impl } })); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1952,7 +1952,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -2905,7 +2905,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topo, config); network.set_input_data("input", input_mem); @@ -2931,7 +2931,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_user_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); + config.set_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false); @@ -3031,7 +3031,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bf_tiled", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topo, config); network.set_input_data("input", input_mem); @@ -3057,7 +3057,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_user_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); + config.set_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp index e9d55960cf568f..ca33241e31d248 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp @@ -55,7 +55,6 @@ static program::ptr build_program(engine& engine, ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(output_names_vec)); - config.set_property(ov::intel_gpu::max_dynamic_batch(1)); config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer)); return program::build_program(engine, body_topology, config, false, false, true); @@ -837,7 +836,7 @@ static void test_loop_gpu_multiple_shapes(ov::PartialShape body_input_layout, permute("permute1", input_info("input_origin"), {0, 1, 2, 3}), concatenation("input1", {input_info("permute1"), input_info("input_origin")}, 0), loop("loop", - {input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input1"), input_info("input2")}, + {input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input1"), input_info("input2")}, body_program, trip_count_id, initial_condition_id, actual_iteration_count_id, input_primitive_maps, output_primitive_maps, back_edges, num_iterations, body_current_iteration_id, body_execution_condition_id, 2), @@ -1105,7 +1104,7 @@ static void test_loop_gpu_wo_trip_count_update_primitive_id(ov::PartialShape bod auto body_program = build_program(engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true); auto const_shape = engine.allocate_memory({ov::PartialShape{4}, data_types::i32, format::bfyx}); - + std::vector body_input_layouts; for (size_t i = 0; i < body_input_layout.size(); i++) { diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp index 8ade3b6c8e0f31..1fac766cd572ae 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp @@ -1913,7 +1913,7 @@ TEST(reorder_gpu_opt, non_trivial_remove_redundant) auto outputs = net.execute(); auto executed_primitives = net.get_executed_primitives(); - if (config.get_property(ov::intel_gpu::queue_type) != QueueTypes::out_of_order) + if (config.get_queue_type() != QueueTypes::out_of_order) GTEST_SKIP(); ASSERT_TRUE(executed_primitives.count("in") == 1); diff --git a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp index 474182801dbfc2..ba129090c99ce9 100644 --- a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp @@ -54,14 +54,14 @@ void generic_test::run_single_test(bool is_caching_test) { } } std::string input_name = "input" + std::to_string(i); - if ((i == 0) && generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { + if ((i == 0) && generic_params->network_config.get_optimize_data()) { // Add reorder after the first input in case of optimize data flag since it might change the input layout. input_name = "input0_init"; } // First input is provided to the network as input_layout. // Other inputs are provided as input_layout if optimize data flag is off. Otherwise they are provided as data. - if ((i == 0) || !generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { + if ((i == 0) || !generic_params->network_config.get_optimize_data()) { topology.add(input_layout(input_name, input_mems[i]->get_layout())); input_layouts_names.push_back(input_name); } else { @@ -74,7 +74,7 @@ void generic_test::run_single_test(bool is_caching_test) { } } - if (generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { + if (generic_params->network_config.get_optimize_data()) { // Add reorder after the first input in case of optimize data flag since it might change the input layout. topology.add(reorder("input0", input_info("input0_init"), input_mems[0]->get_layout())); }