diff --git a/include/MaaFramework/MaaDef.h b/include/MaaFramework/MaaDef.h index a311208d1..44773fa60 100644 --- a/include/MaaFramework/MaaDef.h +++ b/include/MaaFramework/MaaDef.h @@ -111,6 +111,12 @@ typedef MaaOption MaaResOption; enum MaaResOptionEnum { MaaResOption_Invalid = 0, + + /// Use the specified inference device, the default is INT32_MAX, which means CPU. + /// Please set this option before loading the model. + /// + /// value: int32_t, eg: 0; val_size: sizeof(int32_t) + MaaResOption_GpuId = 1, }; typedef MaaOption MaaCtrlOption; diff --git a/source/MaaFramework/Resource/OCRResMgr.cpp b/source/MaaFramework/Resource/OCRResMgr.cpp index 6d8c8da25..9dec3621d 100644 --- a/source/MaaFramework/Resource/OCRResMgr.cpp +++ b/source/MaaFramework/Resource/OCRResMgr.cpp @@ -15,6 +15,22 @@ OCRResMgr::OCRResMgr() option_.UseOrtBackend(); } +bool OCRResMgr::use_cpu() +{ + LogInfo; + + option_.UseCpu(); + return true; +} + +bool OCRResMgr::use_gpu(int device_id) +{ + LogInfo << VAR(device_id); + + option_.UseGpu(device_id); + return true; +} + bool OCRResMgr::lazy_load(const std::filesystem::path& path, bool is_base) { LogFunc << VAR(path) << VAR(is_base); diff --git a/source/MaaFramework/Resource/OCRResMgr.h b/source/MaaFramework/Resource/OCRResMgr.h index 1e2ac22b0..490c3d419 100644 --- a/source/MaaFramework/Resource/OCRResMgr.h +++ b/source/MaaFramework/Resource/OCRResMgr.h @@ -19,6 +19,9 @@ class OCRResMgr : public NonCopyable { public: OCRResMgr(); + + bool use_cpu(); + bool use_gpu(int device_id); bool lazy_load(const std::filesystem::path& path, bool is_base); void clear(); diff --git a/source/MaaFramework/Resource/ONNXResMgr.cpp b/source/MaaFramework/Resource/ONNXResMgr.cpp index a61137116..f81996f79 100644 --- a/source/MaaFramework/Resource/ONNXResMgr.cpp +++ b/source/MaaFramework/Resource/ONNXResMgr.cpp @@ -2,12 +2,109 @@ #include #include +#include + +#ifdef _WIN32 +#include "Utils/SafeWindows.hpp" +#endif + +#if __has_include() +#define MAA_WITH_DML +#include +#endif + +#if __has_include() +#define MAA_WITH_COREML +#include +#endif #include "Utils/Logger.h" #include "Utils/Platform.h" MAA_RES_NS_BEGIN +ONNXResMgr::~ONNXResMgr() +{ + if (gpu_device_id_) { + LogWarn << "GPU is enabled, leaking resources"; + + // FIXME: intentionally leak ort objects to avoid crash (double free?) + // https://github.com/microsoft/onnxruntime/issues/15174 + for (auto& session : classifiers_ | std::views::values) { + auto leak_session = new Ort::Session(nullptr); + *leak_session = std::move(*session); + } + for (auto& session : detectors_ | std::views::values) { + auto leak_session = new Ort::Session(nullptr); + *leak_session = std::move(*session); + } + + auto leak_options = new Ort::SessionOptions(nullptr); + *leak_options = std::move(options_); + } +} + +bool ONNXResMgr::use_cpu() +{ + LogInfo; + + options_ = {}; + gpu_device_id_ = std::nullopt; + return true; +} + +bool ONNXResMgr::use_gpu(int device_id) +{ + LogInfo << VAR(device_id); + + if (gpu_device_id_ && *gpu_device_id_ == device_id) { + LogWarn << "GPU is already enabled"; + return true; + } + options_ = {}; + + auto all_providers_vec = Ort::GetAvailableProviders(); + std::unordered_set all_providers( + std::make_move_iterator(all_providers_vec.begin()), + std::make_move_iterator(all_providers_vec.end())); + LogInfo << VAR(all_providers); + + if (all_providers.contains("CUDAExecutionProvider")) { + OrtCUDAProviderOptions cuda_options {}; + cuda_options.device_id = device_id; + options_.AppendExecutionProvider_CUDA(cuda_options); + + LogInfo << "Using CUDA execution provider with device_id " << device_id; + } +#ifdef MAA_WITH_DML + else if (all_providers.contains("DmlExecutionProvider")) { + auto status = OrtSessionOptionsAppendExecutionProvider_DML(options_, device_id); + if (!Ort::Status(status).IsOK()) { + LogError << "Failed to append DML execution provider with device_id " << device_id; + return false; + } + LogInfo << "Using DML execution provider with device_id " << device_id; + } +#endif +#ifdef MAA_WITH_COREML + else if (all_providers.contains("CoreMLExecutionProvider")) { + auto status = OrtSessionOptionsAppendExecutionProvider_CoreML((OrtSessionOptions*)options_, 0); + if (!Ort::Status(status).IsOK()) { + LogError << "Failed to append CoreML execution provider"; + return false; + } + LogInfo << "Using CoreML execution provider"; + } +#endif + else { + LogError << "No supported execution provider found"; + return false; + } + + gpu_device_id_ = device_id; + return true; +} + bool ONNXResMgr::lazy_load(const std::filesystem::path& path, bool is_base) { LogFunc << VAR(path) << VAR(is_base); @@ -71,7 +168,7 @@ std::shared_ptr ONNXResMgr::load(const std::string& name, const st } LogTrace << VAR(path); - Ort::Session session(m_env, path.c_str(), m_options); + Ort::Session session(env_, path.c_str(), options_); return std::make_shared(std::move(session)); } diff --git a/source/MaaFramework/Resource/ONNXResMgr.h b/source/MaaFramework/Resource/ONNXResMgr.h index 343bf06ad..36e912a7a 100644 --- a/source/MaaFramework/Resource/ONNXResMgr.h +++ b/source/MaaFramework/Resource/ONNXResMgr.h @@ -2,6 +2,7 @@ #include #include +#include #include @@ -17,7 +18,12 @@ class ONNXResMgr : public NonCopyable inline static const std::filesystem::path kClassifierDir = "classify"; inline static const std::filesystem::path kDetectorDir = "detect"; + ~ONNXResMgr(); + public: + bool use_cpu(); + bool use_gpu(int device_id); + bool lazy_load(const std::filesystem::path& path, bool is_base); void clear(); @@ -31,8 +37,9 @@ class ONNXResMgr : public NonCopyable std::vector classifier_roots_; std::vector detector_roots_; - Ort::Env m_env; - Ort::SessionOptions m_options; + Ort::Env env_; + Ort::SessionOptions options_; + std::optional gpu_device_id_; mutable std::unordered_map> classifiers_; mutable std::unordered_map> detectors_; diff --git a/source/MaaFramework/Resource/ResourceMgr.cpp b/source/MaaFramework/Resource/ResourceMgr.cpp index d054454f4..a8d15b1b5 100644 --- a/source/MaaFramework/Resource/ResourceMgr.cpp +++ b/source/MaaFramework/Resource/ResourceMgr.cpp @@ -28,11 +28,16 @@ ResourceMgr::~ResourceMgr() bool ResourceMgr::set_option(MaaResOption key, MaaOptionValue value, MaaOptionValueSize val_size) { - std::ignore = key; - std::ignore = value; - std::ignore = val_size; + LogFunc << VAR(key) << VAR_VOIDP(value) << VAR(val_size); - return false; + switch (key) { + case MaaResOption_GpuId: + return set_gpu_id(value, val_size); + + default: + LogError << "Unknown key" << VAR(key) << VAR(value); + return false; + } } MaaResId ResourceMgr::post_path(const std::filesystem::path& path) @@ -243,6 +248,30 @@ CustomActionSession ResourceMgr::custom_action(const std::string& name) const return it->second; } +bool ResourceMgr::set_gpu_id(MaaOptionValue value, MaaOptionValueSize val_size) +{ + LogFunc << VAR_VOIDP(value) << VAR(val_size); + + if (val_size != sizeof(int32_t)) { + LogError << "invalid size" << VAR(val_size); + return false; + } + + int32_t gpu_id = *reinterpret_cast(value); + LogInfo << VAR(gpu_id); + + if (gpu_id == INT32_MAX) { + onnx_res_.use_cpu(); + ocr_res_.use_cpu(); + } + else { + onnx_res_.use_gpu(gpu_id); + ocr_res_.use_gpu(gpu_id); + } + + return true; +} + bool ResourceMgr::run_load(typename AsyncRunner::Id id, std::filesystem::path path) { LogFunc << VAR(id) << VAR(path); diff --git a/source/MaaFramework/Resource/ResourceMgr.h b/source/MaaFramework/Resource/ResourceMgr.h index 688aeeef6..6f9479fde 100644 --- a/source/MaaFramework/Resource/ResourceMgr.h +++ b/source/MaaFramework/Resource/ResourceMgr.h @@ -78,6 +78,8 @@ class ResourceMgr : public MaaResource CustomActionSession custom_action(const std::string& name) const; private: + bool set_gpu_id(MaaOptionValue value, MaaOptionValueSize val_size); + bool run_load(typename AsyncRunner::Id id, std::filesystem::path path); bool load(const std::filesystem::path& path); bool check_stop(); diff --git a/source/MaaFramework/Vision/NeuralNetworkClassifier.cpp b/source/MaaFramework/Vision/NeuralNetworkClassifier.cpp index 0cdb0393a..47d2fbaf8 100644 --- a/source/MaaFramework/Vision/NeuralNetworkClassifier.cpp +++ b/source/MaaFramework/Vision/NeuralNetworkClassifier.cpp @@ -59,8 +59,6 @@ NeuralNetworkClassifier::Result NeuralNetworkClassifier::classify() const cv::Size input_image_size(static_cast(input_shape[3]), static_cast(input_shape[2])); cv::resize(image, image, input_image_size, 0, 0, cv::INTER_AREA); std::vector input = image_to_tensor(image); - - // TODO: GPU auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); Ort::Value input_tensor = diff --git a/source/MaaFramework/Vision/NeuralNetworkDetector.cpp b/source/MaaFramework/Vision/NeuralNetworkDetector.cpp index 70d8066cd..40a1bd632 100644 --- a/source/MaaFramework/Vision/NeuralNetworkDetector.cpp +++ b/source/MaaFramework/Vision/NeuralNetworkDetector.cpp @@ -62,8 +62,6 @@ NeuralNetworkDetector::ResultsVec NeuralNetworkDetector::detect() const cv::Size input_image_size(static_cast(input_shape[3]), static_cast(input_shape[2])); cv::resize(image, image, input_image_size, 0, 0, cv::INTER_AREA); std::vector input = image_to_tensor(image); - - // TODO: GPU auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault); Ort::Value input_tensor = diff --git a/source/MaaFramework/Vision/OCRer.cpp b/source/MaaFramework/Vision/OCRer.cpp index ca52afa4a..2304d6e7a 100644 --- a/source/MaaFramework/Vision/OCRer.cpp +++ b/source/MaaFramework/Vision/OCRer.cpp @@ -85,7 +85,7 @@ OCRer::ResultsVec OCRer::predict_det_and_rec(const cv::Mat& image_roi) const fastdeploy::vision::OCRResult ocr_result; bool ret = ocrer_->Predict(image_roi, &ocr_result); if (!ret) { - LogWarn << "inferencer return false" << VAR(ocrer_) << VAR(image_) << VAR(image_roi); + LogWarn << "predict return false" << VAR(ocrer_) << VAR(image_) << VAR(image_roi); return {}; } diff --git a/source/MaaProjectInterface/Impl/Configurator.cpp b/source/MaaProjectInterface/Impl/Configurator.cpp index c919af763..e16bd03aa 100644 --- a/source/MaaProjectInterface/Impl/Configurator.cpp +++ b/source/MaaProjectInterface/Impl/Configurator.cpp @@ -120,6 +120,8 @@ std::optional Configurator::generate_runtime() const } } + runtime.gpu = config_.gpu; + return runtime; } diff --git a/source/MaaProjectInterface/Impl/Runner.cpp b/source/MaaProjectInterface/Impl/Runner.cpp index 3e676c8be..255ba1cf2 100644 --- a/source/MaaProjectInterface/Impl/Runner.cpp +++ b/source/MaaProjectInterface/Impl/Runner.cpp @@ -44,6 +44,7 @@ bool Runner::run( } auto resource_handle = MaaResourceCreate(notify, notify_trans_arg); + MaaResourceSetOption(resource_handle, MaaResOption_GpuId, const_cast(¶m.gpu), sizeof(int32_t)); MaaId cid = MaaControllerPostConnection(controller_handle); MaaId rid = 0; diff --git a/source/binding/Python/maa/define.py b/source/binding/Python/maa/define.py index 66bb348fe..e4a4e6574 100644 --- a/source/binding/Python/maa/define.py +++ b/source/binding/Python/maa/define.py @@ -49,6 +49,7 @@ class MaaStatusEnum(IntEnum): MaaOption = ctypes.c_int32 MaaGlobalOption = MaaOption MaaCtrlOption = MaaOption +MaaResOption = MaaOption class MaaGlobalOptionEnum: @@ -104,6 +105,15 @@ class MaaCtrlOptionEnum: Recording = 5 +class MaaResOptionEnum: + Invalid = 0 + + # Use the specified inference device, the default is INT32_MAX, which means CPU. + # Please set this option before loading the model. + # value: int32_t, eg: 0; val_size: sizeof(int32_t) + GpuId = 1 + + MaaAdbScreencapMethod = ctypes.c_uint64 diff --git a/source/binding/Python/maa/resource.py b/source/binding/Python/maa/resource.py index 51d0d01db..c38a241b5 100644 --- a/source/binding/Python/maa/resource.py +++ b/source/binding/Python/maa/resource.py @@ -60,6 +60,21 @@ def loaded(self) -> bool: def clear(self) -> bool: return bool(Library.framework.MaaResourceClear(self._handle)) + def set_gpu(self, device_id: int) -> bool: + cint = ctypes.c_int32(device_id) + return bool( + Library.framework.MaaResourceSetOption( + self._handle, + MaaResOptionEnum.GpuId, + ctypes.pointer(cint), + ctypes.sizeof(ctypes.c_int32), + ) + ) + + def set_cpu(self) -> bool: + INT32_MAX = 2147483647 # means CPU + return self.set_gpu(INT32_MAX) + def register_custom_recognition( self, name: str, recognition: "CustomRecognition" # type: ignore ) -> bool: @@ -189,6 +204,14 @@ def _set_api_properties(): MaaStringBufferHandle, ] + Library.framework.MaaResourceSetOption.restype = MaaBool + Library.framework.MaaResourceSetOption.argtypes = [ + MaaResourceHandle, + MaaResOption, + MaaOptionValue, + MaaOptionValueSize, + ] + Library.framework.MaaResourceRegisterCustomRecognition.restype = MaaBool Library.framework.MaaResourceRegisterCustomRecognition.argtypes = [ MaaResourceHandle, diff --git a/source/include/ProjectInterface/Types.h b/source/include/ProjectInterface/Types.h index 3ed04f984..9c54dd0d9 100644 --- a/source/include/ProjectInterface/Types.h +++ b/source/include/ProjectInterface/Types.h @@ -150,8 +150,9 @@ struct Configuration Win32Config win32; std::string resource; std::vector task; + int32_t gpu = INT32_MAX; - MEO_JSONIZATION(controller, MEO_OPT adb, MEO_OPT win32, resource, task); + MEO_JSONIZATION(controller, MEO_OPT adb, MEO_OPT win32, resource, task, MEO_OPT gpu); }; struct RuntimeParam @@ -184,6 +185,7 @@ struct RuntimeParam std::vector resource_path; std::vector task; + int32_t gpu = INT32_MAX; }; struct CustomRecognitionSession diff --git a/test/python/binding_test.py b/test/python/binding_test.py index e56c89905..a3edce16a 100644 --- a/test/python/binding_test.py +++ b/test/python/binding_test.py @@ -108,7 +108,10 @@ def run( def api_test(): r1 = Resource() + r1.set_gpu(0) + r1.set_gpu(1) r2 = Resource() + r2.set_cpu() r2.post_path("C:/_maafw_testing_/aaabbbccc").wait() t1 = Tasker() t2 = Tasker()