Skip to content

Commit

Permalink
Merge pull request #26 from liwuhen/feat-dev
Browse files Browse the repository at this point in the history
update trt, decode modules
  • Loading branch information
liwuhen authored Jan 21, 2025
2 parents 2e59b72 + 88cf368 commit 42ca24b
Show file tree
Hide file tree
Showing 14 changed files with 208 additions and 77 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/linters.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ jobs:
-build/include,\
-build/namespaces,\
-build/header_guard,\
-whitespace/comma, \
-whitespace/comments, \
-whitespace/line_length,\
-whitespace/indent_namespace,\
-runtime/string" # Ignore runtime checks on string usage.
34 changes: 19 additions & 15 deletions modules/app_yolo/architecture/common/appconfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ int AppConfig::dst_img_w_;
int AppConfig::dst_img_h_;
int AppConfig::dst_img_c_;
int AppConfig::model_acc_;
int AppConfig::branch_num_;
int AppConfig::batchsizes_;
int AppConfig::decode_type_;
int AppConfig::max_objects_;
float AppConfig::obj_threshold_;
float AppConfig::nms_threshold_;
Expand Down Expand Up @@ -81,25 +83,27 @@ AppConfig::AppConfig(const std::string& config_filename) : config_filename_(conf
return;
}

src_img_w_ = yaml_node_["preprocessor_config"]["src_img_width"].as<int>();
src_img_h_ = yaml_node_["preprocessor_config"]["src_img_height"].as<int>();
src_img_c_ = yaml_node_["preprocessor_config"]["src_img_channel"].as<int>();
dst_img_w_ = yaml_node_["preprocessor_config"]["dst_img_width"].as<int>();
dst_img_h_ = yaml_node_["preprocessor_config"]["dst_img_height"].as<int>();
dst_img_c_ = yaml_node_["preprocessor_config"]["dst_img_channel"].as<int>();
batchsizes_ = yaml_node_["preprocessor_config"]["batch_size"].as<int>();
src_img_w_ = yaml_node_["preprocessor_config"]["src_img_width"].as<int>();
src_img_h_ = yaml_node_["preprocessor_config"]["src_img_height"].as<int>();
src_img_c_ = yaml_node_["preprocessor_config"]["src_img_channel"].as<int>();
dst_img_w_ = yaml_node_["preprocessor_config"]["dst_img_width"].as<int>();
dst_img_h_ = yaml_node_["preprocessor_config"]["dst_img_height"].as<int>();
dst_img_c_ = yaml_node_["preprocessor_config"]["dst_img_channel"].as<int>();
batchsizes_ = yaml_node_["preprocessor_config"]["batch_size"].as<int>();
branch_num_ = yaml_node_["predict_config"]["branch_num"].as<int>();
predict_dim_ = yaml_node_["predict_config"]["predict_dim"].as<std::vector<int>>();
decode_type_ = yaml_node_["predict_config"]["decode_type"].as<int>();
max_objects_ = yaml_node_["predict_config"]["max_objects"].as<int>();
obj_threshold_ = yaml_node_["predict_config"]["obj_threshold"].as<float>();
nms_threshold_ = yaml_node_["predict_config"]["nms_threshold"].as<float>();
img_path_ = yaml_node_["inference_config"]["offline_test"]["img_path"].as<std::string>();
save_img_ = yaml_node_["inference_config"]["offline_test"]["save_img"].as<std::string>();
trt_path_ = yaml_node_["inference_config"]["engine_path"].as<std::string>();
onnx_path_ = yaml_node_["inference_config"]["onnx_path"].as<std::string>();
model_acc_ = yaml_node_["inference_config"]["model_acc"].as<int>();
predict_path_ = yaml_node_["inference_config"]["predict_path"].as<std::string>();
log_path_ = yaml_node_["common_config"]["log_path"].as<std::string>();
imgs_path_ = yaml_node_["common_config"]["imgs_path"].as<std::string>();
img_path_ = yaml_node_["inference_config"]["offline_test"]["img_path"].as<std::string>();
save_img_ = yaml_node_["inference_config"]["offline_test"]["save_img"].as<std::string>();
trt_path_ = yaml_node_["inference_config"]["engine_path"].as<std::string>();
onnx_path_ = yaml_node_["inference_config"]["onnx_path"].as<std::string>();
model_acc_ = yaml_node_["inference_config"]["model_acc"].as<int>();
predict_path_ = yaml_node_["inference_config"]["predict_path"].as<std::string>();
log_path_ = yaml_node_["common_config"]["log_path"].as<std::string>();
imgs_path_ = yaml_node_["common_config"]["imgs_path"].as<std::string>();

if (trt_path_ == "") {
throw std::invalid_argument("engine_path is empty");
Expand Down
2 changes: 2 additions & 0 deletions modules/app_yolo/architecture/common/appconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ class AppConfig {
REG_YAML_VAR(int, dst_img_w_);
REG_YAML_VAR(int, dst_img_h_);
REG_YAML_VAR(int, dst_img_c_);
REG_YAML_VAR(int, branch_num_);
REG_YAML_VAR(int, batchsizes_);
REG_YAML_VAR(int, decode_type_);
REG_YAML_VAR(int, max_objects_);
REG_YAML_VAR(int, model_acc_);
REG_YAML_VAR(float, obj_threshold_);
Expand Down
26 changes: 14 additions & 12 deletions modules/app_yolo/architecture/common/parseconfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,23 @@ void ParseMsgs::ReadYamlParam() {
dst_img_h_ = app_config->get_dst_img_h_();
dst_img_c_ = app_config->get_dst_img_c_();
model_acc_ = app_config->get_model_acc_();
batchsizes_ = app_config->get_batchsizes_();
predict_dim_ = app_config->get_predict_dim_();
max_objects_ = app_config->get_max_objects_();
branch_num_ = app_config->get_branch_num_();
batchsizes_ = app_config->get_batchsizes_();
predict_dim_ = app_config->get_predict_dim_();
decode_type_ = app_config->get_decode_type_();
max_objects_ = app_config->get_max_objects_();
obj_threshold_ = app_config->get_obj_threshold_();
nms_threshold_ = app_config->get_nms_threshold_();
img_path_ = app_config->get_home_path_() + app_config->get_img_path_();
save_img_ = app_config->get_home_path_() + app_config->get_save_img_();
trt_path_ = app_config->get_home_path_() + app_config->get_trt_path_();
onnx_path_ = app_config->get_home_path_() + app_config->get_onnx_path_();
predict_path_ = app_config->get_home_path_() + app_config->get_predict_path_();
log_path_ = app_config->get_home_path_() + app_config->get_log_path_();
imgs_path_ = app_config->get_home_path_() + app_config->get_imgs_path_();
img_path_ = app_config->get_home_path_() + app_config->get_img_path_();
save_img_ = app_config->get_home_path_() + app_config->get_save_img_();
trt_path_ = app_config->get_home_path_() + app_config->get_trt_path_();
onnx_path_ = app_config->get_home_path_() + app_config->get_onnx_path_();
predict_path_ = app_config->get_home_path_() + app_config->get_predict_path_();
log_path_ = app_config->get_home_path_() + app_config->get_log_path_();
imgs_path_ = app_config->get_home_path_() + app_config->get_imgs_path_();

srcimg_size_ = src_img_w_ * src_img_h_ * src_img_c_;
dstimg_size_ = dst_img_w_ * dst_img_h_ * dst_img_c_;
srcimg_size_ = src_img_w_ * src_img_h_ * src_img_c_;
dstimg_size_ = dst_img_w_ * dst_img_h_ * dst_img_c_;
}

} // namespace common
Expand Down
2 changes: 2 additions & 0 deletions modules/app_yolo/architecture/common/parseconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ class ParseMsgs {
int dst_img_c_; // Target image channel
int dstimg_size_; // Target image size
int model_acc_; // Model quantisation accuracy
int branch_num_; // Model branch number
int batchsizes_; // Batch size
int decode_type_; // Decode type
int max_objects_; // Maximum number of targets
float obj_threshold_; // Target Thresholds
float nms_threshold_; // Nms Target Thresholds
Expand Down
119 changes: 100 additions & 19 deletions modules/app_yolo/architecture/decodeprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ bool DecodeProcessor::DataResourceRelease() {}
/**
* @description: Inference
*/
bool DecodeProcessor::Inference(float* predict, InfertMsg& infer_msg, std::shared_ptr<InferMsgQue>& bboxQueue) {
bool DecodeProcessor::Inference(float* predict,
InfertMsg& infer_msg, std::shared_ptr<InferMsgQue>& bboxQueue) {
imgshape_["src"] = make_pair(infer_msg.height, infer_msg.width);

vector<Box> box_result;
Expand All @@ -108,7 +109,8 @@ bool DecodeProcessor::Inference(float* predict, InfertMsg& infer_msg, std::share
/**
* @description: Visualization
*/
void DecodeProcessor::Visualization(bool real_time, cv::Mat& img, int64_t timestamp, vector<Box>& results) {
void DecodeProcessor::Visualization(bool real_time,
cv::Mat& img, int64_t timestamp, vector<Box>& results) {
for (auto& box : results) {
cv::Scalar color;
tie(color[0], color[1], color[2]) = random_color(box.label);
Expand All @@ -134,29 +136,95 @@ void DecodeProcessor::Visualization(bool real_time, cv::Mat& img, int64_t timest
* @description: Bbox mapping to original map scale.
*/
void DecodeProcessor::ScaleBoxes(vector<Box>& box_result) {
float gain = min(imgshape_["dst"].first / static_cast<float>(imgshape_["src"].first), imgshape_["dst"].second / static_cast<float>(imgshape_["src"].second));
float pad[] = {(imgshape_["dst"].second - imgshape_["src"].second * gain) * 0.5, (imgshape_["dst"].first - imgshape_["src"].first * gain) * 0.5};
float gain = min(imgshape_["dst"].first / static_cast<float>(imgshape_["src"].first),\
imgshape_["dst"].second / static_cast<float>(imgshape_["src"].second));
float pad[] = {(imgshape_["dst"].second - imgshape_["src"].second * gain) * 0.5, \
(imgshape_["dst"].first - imgshape_["src"].first * gain) * 0.5};
for (int index = 0; index < box_result.size(); index++) {
box_result[index].left = clamp((box_result[index].left - pad[0]) / gain, 0.0f, static_cast<float>(imgshape_["src"].second));
box_result[index].right = clamp((box_result[index].right - pad[0]) / gain, 0.0f, static_cast<float>(imgshape_["src"].second));
box_result[index].top = clamp((box_result[index].top - pad[1]) / gain, 0.0f, static_cast<float>(imgshape_["src"].first));
box_result[index].bottom = clamp((box_result[index].bottom - pad[1]) / gain, 0.0f, static_cast<float>(imgshape_["src"].first));
box_result[index].left = clamp((box_result[index].left - pad[0]) / gain, 0.0f, \
static_cast<float>(imgshape_["src"].second));
box_result[index].right = clamp((box_result[index].right - pad[0]) / gain, 0.0f, \
static_cast<float>(imgshape_["src"].second));
box_result[index].top = clamp((box_result[index].top - pad[1]) / gain, 0.0f, \
static_cast<float>(imgshape_["src"].first));
box_result[index].bottom = clamp((box_result[index].bottom - pad[1]) / gain, 0.0f, \
static_cast<float>(imgshape_["src"].first));
}
}

/**
* @description: Cpu decode
* @description: Bounding box decoding at feature level
*/
void DecodeProcessor::CpuDecode(float* predict, InfertMsg& infer_msg, vector<Box>& box_result) {
void DecodeProcessor::BboxDecodeFeatureLevel(float* predict,
InfertMsg& infer_msg, vector<Box>& box_result)
{
// for (int j = 0; j < out_node_vec[1]; j++)
// {
// float* lables_node = cpu_output_buffers_[0] + j * 3;
// float* scores_node = cpu_output_buffers_[1] + j * 1;
// float* boxes_node = cpu_output_buffers_[2] + j * 4; // 特征图级别

// int label = std::max_element(lables_node, lables_node + 3) - lables_node;
// float prob = lables_node[label];

// float objness = scores_node[0];
// if(objness < confidence_threshold)
// continue;

// float confidence = prob * objness;
// if(confidence < confidence_threshold)
// continue;

// if (j < 7680) {
// grid_x = anchor_points[0][j].first;
// grid_y = anchor_points[0][j].second;
// stride = 8;
// }
// else if (j >= 7680 && j < 9600) {
// grid_x = anchor_points[1][j-7680].first;
// grid_y = anchor_points[1][j-7680].second;
// stride = 16;
// }
// else if (j >= 9600 && j < 10080) {
// grid_x = anchor_points[2][j-9600].first;
// grid_y = anchor_points[2][j-9600].second;
// stride = 32;
// }

// // 特征图级别 -> 输入图像层级
// float cx = (boxes_node[0] + grid_x) * stride; // 输入图像级别
// float cy = (boxes_node[1] + grid_y) * stride;
// float width = exp(boxes_node[2]) * stride;
// float height = exp(boxes_node[3]) * stride; // anchor free
// float left = cx - width * 0.5; // 输入图像级别
// float top = cy - height * 0.5;
// float right = cx + width * 0.5;
// float bottom = cy + height * 0.5;

// // 输入图像层级 -> 原图图像层级
// float image_base_left = d2i[0] * left + d2i[2];
// float image_base_right = d2i[0] * right + d2i[2];
// float image_base_top = d2i[0] * top + d2i[5];
// float image_base_bottom = d2i[0] * bottom + d2i[5];
// bboxes.push_back({image_base_left, image_base_top, image_base_right, image_base_bottom, (float)label, confidence});
// }
}

/**
* @description: Bounding box decoding at input level.
*/
void DecodeProcessor::BboxDecodeInputLevel(float* predict,
InfertMsg& infer_msg, vector<Box>& box_result) {
vector<Box> boxes;
int num_classes = parsemsgs_->predict_dim_[2] - 5;
for (int i = 0; i < parsemsgs_->predict_dim_[1]; ++i) {
float* pitem = predict + i * parsemsgs_->predict_dim_[2];
for (int i = 0; i < parsemsgs_->predict_dim_[1]; ++i)
{
float* pitem = predict + i * parsemsgs_->predict_dim_[2];
float objness = pitem[4];
if (objness < parsemsgs_->obj_threshold_) continue;
float* pclass = pitem + 5;

int label = std::max_element(pclass, pclass + num_classes) - pclass;
int label = std::max_element(pclass, pclass + num_classes) - pclass;
float prob = pclass[label];
float confidence = prob * objness;
if (confidence < parsemsgs_->obj_threshold_) continue;
Expand All @@ -165,22 +233,35 @@ void DecodeProcessor::CpuDecode(float* predict, InfertMsg& infer_msg, vector<Box
float cy = pitem[1];
float width = pitem[2];
float height = pitem[3];
float left = cx - width * 0.5;
float left = cx - width * 0.5;
float top = cy - height * 0.5;
float right = cx + width * 0.5;
float right = cx + width * 0.5;
float bottom = cy + height * 0.5;

// 输入图像层级模型预测框 ==> 映射回原图上尺寸
float image_left = infer_msg.affineMatrix_inv(0, 0) * left + infer_msg.affineMatrix_inv(0, 2);
float image_top = infer_msg.affineMatrix_inv(1, 1) * top + infer_msg.affineMatrix_inv(1, 2);
float image_right = infer_msg.affineMatrix_inv(0, 0) * right + infer_msg.affineMatrix_inv(0, 2);
float image_left = infer_msg.affineMatrix_inv(0, 0) * left + infer_msg.affineMatrix_inv(0, 2);
float image_top = infer_msg.affineMatrix_inv(1, 1) * top + infer_msg.affineMatrix_inv(1, 2);
float image_right = infer_msg.affineMatrix_inv(0, 0) * right + infer_msg.affineMatrix_inv(0, 2);
float image_bottom = infer_msg.affineMatrix_inv(1, 1) * bottom + infer_msg.affineMatrix_inv(1, 2);

boxes.emplace_back(image_left, image_top, image_right, image_bottom, confidence, label);
}

nms_plugin_->Nms(boxes, box_result, parsemsgs_->nms_threshold_);
}

/**
* @description: Cpu decode.
*/
void DecodeProcessor::CpuDecode(float* predict,
InfertMsg& infer_msg, vector<Box>& box_result) {
if((DecodeType)parsemsgs_->decode_type_ == DecodeType::FEATURE_LEVEL) {
BboxDecodeFeatureLevel(predict, infer_msg, box_result);
} else if ((DecodeType)parsemsgs_->decode_type_ == DecodeType::INPUT_LEVEL) {
BboxDecodeInputLevel(predict, infer_msg, box_result);
} else {
GLOG_ERROR("[CpuDecode]: Decoding method error. ");
}
}

} // namespace appinfer
} // namespace hpc
16 changes: 16 additions & 0 deletions modules/app_yolo/architecture/decodeprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,22 @@ class DecodeProcessor : public InferModuleBase {
*/
void ScaleBoxes(vector<Box>& box_result);

/**
* @brief Box decode feature level.
* @param[in] [float*, InfertMsg&, vector<Box>&].
* @return void.
*/
void BboxDecodeFeatureLevel(float* predict,
InfertMsg& infer_msg, vector<Box>& box_result);

/**
* @brief Box decode input level.
* @param[in] [float*, InfertMsg&, vector<Box>&].
* @return void.
*/
void BboxDecodeInputLevel(float* predict,
InfertMsg& infer_msg, vector<Box>& box_result);

/**
* @brief Cpu decode.
* @param[in] [float*, vector<Box>&].
Expand Down
33 changes: 20 additions & 13 deletions modules/app_yolo/architecture/preprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ bool PreProcessor::DataResourceRelease() {}
/**
* @description: Inference.
*/
bool PreProcessor::Inference(InfertMsg& input_msg, float* dstimg, DeviceMode inferMode, cudaStream_t stream) {
bool PreProcessor::Inference(InfertMsg& input_msg,
float* dstimg, DeviceMode inferMode, cudaStream_t stream)
{
CalAffineMatrix(input_msg);

switch (inferMode) {
Expand All @@ -108,14 +110,17 @@ bool PreProcessor::Inference(InfertMsg& input_msg, float* dstimg, DeviceMode inf
/**
* @description: Gpu preprocessor.
*/
bool PreProcessor::GpuPreprocessor(InfertMsg& input_msg, float* dstimg, cudaStream_t stream) {
checkRuntime(cudaMemcpy(input_data_device_, input_msg.image.data, input_msg.img_size * sizeof(uint8_t), cudaMemcpyHostToDevice));
bool PreProcessor::GpuPreprocessor(InfertMsg& input_msg, float* dstimg, cudaStream_t stream)
{
checkRuntime(cudaMemcpy(input_data_device_, input_msg.image.data,\
input_msg.img_size * sizeof(uint8_t), cudaMemcpyHostToDevice));

if (std::string(MODEL_FLAG) == "yolov5") {
warp_affine_bilinear(input_data_device_, parsemsgs_->batchsizes_, input_msg, dstimg, parsemsgs_->dst_img_w_, parsemsgs_->dst_img_h_, 114, nullptr, AppYolo::YOLOV5_MODE);
warp_affine_bilinear(input_data_device_, parsemsgs_->batchsizes_, input_msg, dstimg, \
parsemsgs_->dst_img_w_, parsemsgs_->dst_img_h_, 114, nullptr, AppYolo::YOLOV5_MODE);
} else if (std::string(MODEL_FLAG) == "yolox") {
warp_affine_bilinear(input_data_device_, parsemsgs_->batchsizes_, input_msg, dstimg, parsemsgs_->dst_img_w_, parsemsgs_->dst_img_h_, 114, nullptr, AppYolo::YOLOX_MODE);
} else {
warp_affine_bilinear(input_data_device_, parsemsgs_->batchsizes_, input_msg, dstimg, \
parsemsgs_->dst_img_w_, parsemsgs_->dst_img_h_, 114, nullptr, AppYolo::YOLOX_MODE);
}

return true;
Expand All @@ -124,13 +129,15 @@ bool PreProcessor::GpuPreprocessor(InfertMsg& input_msg, float* dstimg, cudaStre
/**
* @description: Cpu preprocessor.
*/
bool PreProcessor::CpuPreprocessor(cv::Mat& srcimg, uint64_t timestamp, float* input_device_gpu, cudaStream_t stream) {
bool PreProcessor::CpuPreprocessor(cv::Mat& srcimg, uint64_t timestamp,
float* input_device_gpu, cudaStream_t stream)
{
checkRuntime(cudaMallocHost(&input_data_host_, sizeof(float) * parsemsgs_->dstimg_size_));

float scale_x = parsemsgs_->dst_img_w_ / static_cast<float>(parsemsgs_->src_img_w_);
float scale_y = parsemsgs_->dst_img_h_ / static_cast<float>(parsemsgs_->src_img_h_);
float scale = std::min(scale_x, scale_y);
float i2d[6], d2i[6];
float scale = std::min(scale_x, scale_y);
float i2d[6];
// resize 图像,源图像和目标图像几何中心的对齐
i2d[0] = scale;
i2d[1] = 0;
Expand All @@ -140,12 +147,11 @@ bool PreProcessor::CpuPreprocessor(cv::Mat& srcimg, uint64_t timestamp, float* i
i2d[5] = (-scale * parsemsgs_->src_img_h_ + parsemsgs_->dst_img_h_ + scale - 1) * 0.5;

cv::Mat m2x3_i2d(2, 3, CV_32F, i2d); // image to dst(network), 2x3 matrix
cv::Mat m2x3_d2i(2, 3, CV_32F, d2i); // dst to image, 2x3 matrix
cv::invertAffineTransform(m2x3_i2d, m2x3_d2i); // 计算一个反仿射变换

cv::Mat input_image(parsemsgs_->dst_img_h_, parsemsgs_->dst_img_w_, CV_8UC3);
// 对图像做平移缩放旋转变换,可逆
cv::warpAffine(srcimg, input_image, m2x3_i2d, input_image.size(), cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar::all(114));
cv::warpAffine(srcimg, input_image, m2x3_i2d, input_image.size(), \
cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar::all(114));
std::string path = parsemsgs_->save_img_ + "/img_cpu_test_" + std::to_string(timestamp) + ".jpg";
cv::imwrite(path, input_image);

Expand All @@ -161,7 +167,8 @@ bool PreProcessor::CpuPreprocessor(cv::Mat& srcimg, uint64_t timestamp, float* i
*phost_b++ = pimage[2] / 255.0f;
}

checkRuntime(cudaMemcpyAsync(input_device_gpu, input_data_host_, sizeof(float) * parsemsgs_->dstimg_size_, cudaMemcpyHostToDevice, stream));
checkRuntime(cudaMemcpyAsync(input_device_gpu, input_data_host_, \
sizeof(float) * parsemsgs_->dstimg_size_, cudaMemcpyHostToDevice, stream));

return true;
}
Expand Down
Loading

0 comments on commit 42ca24b

Please sign in to comment.