Skip to content

Commit

Permalink
perf & fix: NN 算法移除 cls_size 字段需求,自适应模型输入尺寸
Browse files Browse the repository at this point in the history
fix #281
  • Loading branch information
MistEO committed Jul 15, 2024
1 parent d2c8154 commit cf56471
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 111 deletions.
8 changes: 0 additions & 8 deletions docs/en_us/3.1-PipelineProtocol.md
Original file line number Diff line number Diff line change
Expand Up @@ -373,9 +373,6 @@ This task property requires additional fields:
- `roi`: *array<int, 4>* | *list<array<int, 4>>*
Same as `TemplateMatch`.`roi`.

- `cls_size`: *int*
The total number of categories. Required.

- `labels`: *list<string, >*
Labels, meaning the names of each category. Optional.
It only affects debugging images and logs. If not filled, it will be filled with "Unknown."
Expand All @@ -400,7 +397,6 @@ For example, if you want to recognize whether a cat or a mouse appears in a **fi
```jsonc
{
"cls_size": 3,
"labels": ["Cat", "Dog", "Mouse"],
"expected": [0, 2]
}
Expand All @@ -419,9 +415,6 @@ This task property requires additional fields:
- `roi`: *array<int, 4>* | *list<array<int, 4>>*
Same as `TemplateMatch`.`roi`.
- `cls_size`: *int*
The total number of categories. Required.
- `labels`: *list<string, >*
Labels, meaning the names of each category. Optional.
It only affects debugging images and logs. If not filled, it will be filled with "Unknown."
Expand Down Expand Up @@ -450,7 +443,6 @@ For example, if you want to detect cats, dogs, and mice in an image and only cli

```jsonc
{
"cls_size": 3,
"labels": ["Cat", "Dog", "Mouse"],
"expected": [0, 2]
}
Expand Down
8 changes: 0 additions & 8 deletions docs/zh_cn/3.1-任务流水线协议.md
Original file line number Diff line number Diff line change
Expand Up @@ -381,9 +381,6 @@ graph LR;
- `roi`: *array<int, 4>* | *list<array<int, 4>>*
同 `TemplateMatch`.`roi`
- `cls_size`: *int*
总分类数,必选。
- `labels`: *list<string, >*
标注,即每个分类的名字。可选。
仅影响调试图片及日志等,若未填写则会填充 "Unknown"
Expand All @@ -409,7 +406,6 @@ graph LR;
```jsonc
{
"cls_size": 3,
"labels": ["Cat", "Dog", "Mouse"],
"expected": [0, 2]
}
Expand All @@ -428,9 +424,6 @@ graph LR;
- `roi`: *array<int, 4>* | *list<array<int, 4>>*
同 `TemplateMatch`.`roi`
- `cls_size`: *int*
总分类数,必选。
- `labels`: *list<string, >*
标注,即每个分类的名字。可选。
仅影响调试图片及日志等,若未填写则会填充 "Unknown"
Expand Down Expand Up @@ -460,7 +453,6 @@ graph LR;
```jsonc
{
"cls_size": 3,
"labels": ["Cat", "Dog", "Mouse"],
"expected": [0, 2]
}
Expand Down
22 changes: 0 additions & 22 deletions source/MaaFramework/Resource/PipelineResMgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -858,21 +858,10 @@ bool PipelineResMgr::parse_nn_classifier_param(
return false;
}

if (!get_and_check_value(input, "cls_size", output.cls_size, default_value.cls_size)) {
LogError << "failed to get_and_check_value cls_size" << VAR(input);
return false;
}

if (!get_and_check_value_or_array(input, "labels", output.labels, default_value.labels)) {
LogError << "failed to get_and_check_value_or_array labels" << VAR(input);
return false;
}
if (output.labels.size() < output.cls_size) {
LogDebug << "labels.size() < cls_size, fill 'Unknown'" << VAR(output.labels.size())
<< VAR(output.cls_size);
output.labels.resize(output.cls_size, "Unknown");
}

if (!get_and_check_value(input, "model", output.model, default_value.model)) {
LogError << "failed to get_and_check_value model" << VAR(input);
return false;
Expand Down Expand Up @@ -913,21 +902,10 @@ bool PipelineResMgr::parse_nn_detector_param(
return false;
}

if (!get_and_check_value(input, "cls_size", output.cls_size, default_value.cls_size)) {
LogError << "failed to get_and_check_value cls_size" << VAR(input);
return false;
}

if (!get_and_check_value_or_array(input, "labels", output.labels, default_value.labels)) {
LogError << "failed to get_and_check_value_or_array labels" << VAR(input);
return false;
}
if (output.labels.size() < output.cls_size) {
LogDebug << "labels.size() < cls_size, fill 'Unknown'" << VAR(output.labels.size())
<< VAR(output.cls_size);
output.labels.resize(output.cls_size, "Unknown");
}

if (!get_and_check_value(input, "model", output.model, default_value.model)) {
LogError << "failed to get_and_check_value model" << VAR(input);
return false;
Expand Down
70 changes: 31 additions & 39 deletions source/MaaFramework/Vision/NeuralNetworkClassifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,6 @@ void NeuralNetworkClassifier::analyze()
LogError << "OrtSession not loaded";
return;
}
if (param_.cls_size == 0) {
LogError << "cls_size == 0";
return;
}
if (param_.cls_size != param_.labels.size()) {
LogError << "cls_size != labels.size()" << VAR(param_.cls_size)
<< VAR(param_.labels.size());
return;
}

auto start_time = std::chrono::steady_clock::now();

auto results = classify_all_rois();
Expand Down Expand Up @@ -71,14 +61,22 @@ NeuralNetworkClassifier::Result NeuralNetworkClassifier::classify(const cv::Rect
LogError << "OrtSession not loaded";
return {};
}
// batch_size, channel, height, width
// for yolov8, input_shape is { 1, 3, 640, 640 }
const auto input_shape = session_->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
if (input_shape.size() != 4) {
LogError << "Input shape is not 4" << VAR(input_shape);
return {};
}

cv::Mat image = image_with_roi(roi);
cv::Size raw_roi_size(image.cols, image.rows);
cv::Size input_image_size(static_cast<int>(input_shape[3]), static_cast<int>(input_shape[2]));
cv::resize(image, image, input_image_size, 0, 0, cv::INTER_AREA);
std::vector<float> input = image_to_tensor(image);

// TODO: GPU
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
constexpr int64_t kBatchSize = 1;
std::array<int64_t, 4> input_shape { kBatchSize, image.channels(), image.cols, image.rows };

Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
memory_info,
Expand All @@ -87,47 +85,41 @@ NeuralNetworkClassifier::Result NeuralNetworkClassifier::classify(const cv::Rect
input_shape.data(),
input_shape.size());

std::vector<float> output;
output.resize(param_.cls_size);
std::array<int64_t, 2> output_shape { kBatchSize, static_cast<int64_t>(param_.cls_size) };
Ort::Value output_tensor = Ort::Value::CreateTensor<float>(
memory_info,
output.data(),
output.size(),
output_shape.data(),
output_shape.size());

Ort::AllocatorWithDefaultOptions allocator;
const std::string in_0 = session_->GetInputNameAllocated(0, allocator).get();
const std::string out_0 = session_->GetOutputNameAllocated(0, allocator).get();
const std::vector input_names { in_0.c_str() };
const std::vector output_names { out_0.c_str() };

Ort::RunOptions run_options;
session_->Run(
auto output_tensor = session_->Run(
run_options,
input_names.data(),
&input_tensor,
1,
input_names.size(),
output_names.data(),
&output_tensor,
1);

Result result;
result.raw = std::move(output);
result.probs = softmax(result.raw);
result.cls_index =
std::max_element(result.probs.begin(), result.probs.end()) - result.probs.begin();
result.score = result.probs[result.cls_index];
result.label = param_.labels[result.cls_index];
result.box = roi;
output_names.size());

const float* raw_output = output_tensor[0].GetTensorData<float>();
std::vector<float> output(
raw_output,
raw_output + output_tensor[0].GetTensorTypeAndShapeInfo().GetElementCount());

Result res;
res.raw = std::move(output);
res.probs = softmax(res.raw);
res.cls_index = std::max_element(res.probs.begin(), res.probs.end()) - res.probs.begin();
res.score = res.probs[res.cls_index];
res.label = res.cls_index < param_.labels.size() ? param_.labels[res.cls_index]
: std::format("Unkonwn_{}", res.cls_index);
res.box = roi;

if (debug_draw_) {
auto draw = draw_result(result);
auto draw = draw_result(res);
handle_draw(draw);
}

return result;
return res;
}

void NeuralNetworkClassifier::add_results(ResultsVec results, const std::vector<size_t>& expected)
Expand All @@ -154,7 +146,7 @@ cv::Mat NeuralNetworkClassifier::draw_result(const Result& res) const
cv::Mat image_draw = draw_roi(res.box);
cv::Point pt(res.box.x + res.box.width + 5, res.box.y + 20);

for (size_t i = 0; i != param_.cls_size; ++i) {
for (size_t i = 0; i != res.raw.size(); ++i) {
const auto color = i == res.cls_index ? cv::Scalar(0, 0, 255) : cv::Scalar(255, 0, 0);
std::string text = std::format(
"{} {}: prob {:.3f}, raw {:.3f}",
Expand Down Expand Up @@ -193,4 +185,4 @@ void NeuralNetworkClassifier::sort_(ResultsVec& results) const
}
}

MAA_VISION_NS_END
MAA_VISION_NS_END
54 changes: 29 additions & 25 deletions source/MaaFramework/Vision/NeuralNetworkDetector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,6 @@ void NeuralNetworkDetector::analyze()
LogError << "OrtSession not loaded";
return;
}
if (param_.cls_size == 0) {
LogError << "cls_size == 0";
return;
}
if (param_.cls_size != param_.labels.size()) {
LogError << "cls_size != labels.size()" << VAR(param_.cls_size)
<< VAR(param_.labels.size());
return;
}

auto start_time = std::chrono::steady_clock::now();

Expand Down Expand Up @@ -73,13 +64,22 @@ NeuralNetworkDetector::ResultsVec NeuralNetworkDetector::detect(const cv::Rect&
return {};
}

// batch_size, channel, height, width
// for yolov8, input_shape is { 1, 3, 640, 640 }
const auto input_shape = session_->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
if (input_shape.size() != 4) {
LogError << "Input shape is not 4" << VAR(input_shape);
return {};
}

cv::Mat image = image_with_roi(roi);
cv::Size raw_roi_size(image.cols, image.rows);
cv::Size input_image_size(static_cast<int>(input_shape[3]), static_cast<int>(input_shape[2]));
cv::resize(image, image, input_image_size, 0, 0, cv::INTER_AREA);
std::vector<float> input = image_to_tensor(image);

// TODO: GPU
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
constexpr int64_t kBatchSize = 1;
std::array<int64_t, 4> input_shape { kBatchSize, image.channels(), image.cols, image.rows };
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
memory_info,
Expand Down Expand Up @@ -124,12 +124,9 @@ NeuralNetworkDetector::ResultsVec NeuralNetworkDetector::detect(const cv::Rect&
raw_output + (i + 1) * output_shape[2]);
}

ResultsVec all_nms_results;

const size_t output_size = output.back().size();
ResultsVec raw_results;
for (size_t i = 0; i < output_size; ++i) {
ResultsVec raw_results;

constexpr size_t kConfidenceIndex = 4;
for (size_t j = kConfidenceIndex; j < output.size(); ++j) {
float score = output[j][i];
Expand All @@ -149,25 +146,32 @@ NeuralNetworkDetector::ResultsVec NeuralNetworkDetector::detect(const cv::Rect&

Result res;
res.cls_index = j - kConfidenceIndex;
res.label = param_.labels[res.cls_index];
res.label = res.cls_index < param_.labels.size()
? param_.labels[res.cls_index]
: std::format("Unkonwn_{}", res.cls_index);
res.box = box;
res.score = score;

raw_results.emplace_back(std::move(res));
}
auto nms_results = NMS(std::move(raw_results));
all_nms_results.insert(
all_nms_results.end(),
std::make_move_iterator(nms_results.begin()),
std::make_move_iterator(nms_results.end()));
}

auto nms_results = NMS(std::move(raw_results));

// post process
for (Result& res : nms_results) {
res.box.x = res.box.x * raw_roi_size.width / input_image_size.width + roi.x;
res.box.y = res.box.y * raw_roi_size.height / input_image_size.height + roi.y;
res.box.width = res.box.width * raw_roi_size.width / input_image_size.width;
res.box.height = res.box.height * raw_roi_size.height / input_image_size.height;
}

if (debug_draw_) {
auto draw = draw_result(roi, all_nms_results);
auto draw = draw_result(roi, nms_results);
handle_draw(draw);
}

return all_nms_results;
return nms_results;
}

void NeuralNetworkDetector::add_results(ResultsVec results, const std::vector<size_t>& expected)
Expand Down Expand Up @@ -244,4 +248,4 @@ void NeuralNetworkDetector::sort_(ResultsVec& results) const
}
}

MAA_VISION_NS_END
MAA_VISION_NS_END
2 changes: 0 additions & 2 deletions source/MaaFramework/Vision/VisionTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ struct CustomRecognizerParam

struct NeuralNetworkClassifierParam
{
size_t cls_size = 0;
std::vector<std::string> labels; // only for output and debug
std::string model;

Expand All @@ -90,7 +89,6 @@ struct NeuralNetworkDetectorParam
inline static constexpr Net kDefaultNet = Net::YoloV8;
inline static constexpr double kDefaultThreshold = 0.3;

size_t cls_size = 0;
std::vector<std::string> labels; // only for output and debug
std::string model;
Net net = kDefaultNet;
Expand Down
7 changes: 0 additions & 7 deletions tools/pipeline.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,6 @@
"default": false
},
"model": {},
"cls_size": {
"description": "总分类数,必选。",
"type": "integer",
"default": 2
},
"labels": {
"description": "标注,即每个分类的名字。可选。",
"type": "array",
Expand Down Expand Up @@ -409,7 +404,6 @@
}
},
"required": [
"cls_size",
"model",
"expected"
]
Expand Down Expand Up @@ -447,7 +441,6 @@
}
},
"required": [
"cls_size",
"model",
"expected"
]
Expand Down

0 comments on commit cf56471

Please sign in to comment.