From 2d9af8e00108e90767a00502c07fe264f080d081 Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Sun, 11 Aug 2024 07:20:19 +0300 Subject: [PATCH] YOLOv8-obb works with TensorRT backend --- README.md | 7 +++ data/DOTA.names | 30 ++++++------- data/settings_yolov8_obb.ini | 8 ++-- example/examples.h | 2 +- src/Detector/YoloTensorRTDetector.cpp | 11 +++-- src/Detector/tensorrt_yolo/YoloONNXv8_bb.hpp | 5 +-- src/Detector/tensorrt_yolo/YoloONNXv8_obb.hpp | 45 ++++++++++++------- src/Detector/tensorrt_yolo/YoloONNXv9_bb.hpp | 10 +++-- 8 files changed, 72 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 8707027b..27b5fba2 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ # Last changes +* YOLOv8-obb detector worked with TensorRT! Export pretrained Pytorch models [here (ultralytics/ultralytics)](https://github.com/ultralytics/ultralytics) to onnx format and run Multitarget-tracker with -e=6 example + * YOLOv10 detector worked with TensorRT! Export pretrained Pytorch models [here (THU-MIG/yolov10)](https://github.com/THU-MIG/yolov10) to onnx format and run Multitarget-tracker with -e=6 example * YOLOv9 detector worked with TensorRT! Export pretrained Pytorch models [here (WongKinYiu/yolov9)](https://github.com/WongKinYiu/yolov9) to onnx format and run Multitarget-tracker with -e=6 example @@ -13,6 +15,11 @@ # New videos! +* YOLOv8-obb detection with rotated boxes (DOTA v1.0 trained) + +[![YOLOv8-obb detection:](https://img.youtube.com/vi/1e6ur57Fhzs/0.jpg)](https://youtu.be/1e6ur57Fhzs) + + * YOLOv7 instance segmentation [![YOLOv7 instance segmentation:](https://img.youtube.com/vi/gZxuYyFz1dU/0.jpg)](https://youtu.be/gZxuYyFz1dU) diff --git a/data/DOTA.names b/data/DOTA.names index af6540f0..adea7619 100644 --- a/data/DOTA.names +++ b/data/DOTA.names @@ -1,15 +1,15 @@ -name_1 -name_2 -name_3 -name_4 -name_5 -name_6 -name_7 -name_8 -name_9 -name_10 -name_11 -name_12 -name_13 -name_14 -name_15 +plane +ship +storage_tank +baseball_diamond +tennis_court +basketball_court +ground_track_field +harbor +bridge +large_vehicle +small_vehicle +helicopter +roundabout +soccer_ball_field +swimming_pool \ No newline at end of file diff --git a/data/settings_yolov8_obb.ini b/data/settings_yolov8_obb.ini index 2cc18d8a..ea95b634 100644 --- a/data/settings_yolov8_obb.ini +++ b/data/settings_yolov8_obb.ini @@ -27,14 +27,14 @@ ocv_dnn_target = DNN_TARGET_CPU ocv_dnn_backend = DNN_BACKEND_OPENCV #----------------------------- -nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/yolov8s-obb.onnx -nn_config = C:/work/home/mtracker/Multitarget-tracker/data/yolov8s-obb.onnx +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/yolov8x-obb.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/yolov8x-obb.onnx class_names = C:/work/home/mtracker/Multitarget-tracker/data/DOTA.names #----------------------------- -confidence_threshold = 0.5 +confidence_threshold = 0.6 -max_crop_ratio = 0 +max_crop_ratio = 1 max_batch = 1 gpu_id = 0 diff --git a/example/examples.h b/example/examples.h index 64bd2571..1be76399 100644 --- a/example/examples.h +++ b/example/examples.h @@ -923,7 +923,7 @@ class YoloTensorRTExample final : public VideoExample } } - m_detector->CalcMotionMap(frame); + //m_detector->CalcMotionMap(frame); } }; diff --git a/src/Detector/YoloTensorRTDetector.cpp b/src/Detector/YoloTensorRTDetector.cpp index 094bedfa..a0ebeb44 100644 --- a/src/Detector/YoloTensorRTDetector.cpp +++ b/src/Detector/YoloTensorRTDetector.cpp @@ -235,7 +235,12 @@ void YoloTensorRTDetector::Detect(const cv::UMat& colorFrame) for (const tensor_rt::Result& bbox : detects[j]) { if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.m_id)) != std::end(m_classesWhiteList)) - tmpRegions.emplace_back(cv::Rect(bbox.m_brect.x + crop.x, bbox.m_brect.y + crop.y, bbox.m_brect.width, bbox.m_brect.height), T2T(bbox.m_id), bbox.m_prob); + { + cv::RotatedRect newRRect(bbox.m_rrect); + newRRect.center.x += crop.x; + newRRect.center.y += crop.y; + tmpRegions.emplace_back(newRRect, T2T(bbox.m_id), bbox.m_prob); + } } } } @@ -279,8 +284,8 @@ void YoloTensorRTDetector::Detect(const std::vector& frames, std::vect const tensor_rt::BatchResult& dets = detects[i]; for (const tensor_rt::Result& bbox : dets) { - if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.m_id)) != std::end(m_classesWhiteList)) - regions[i].emplace_back(bbox.m_brect, T2T(bbox.m_id), bbox.m_prob); + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.m_id)) != std::end(m_classesWhiteList)) + regions[i].emplace_back(bbox.m_rrect, T2T(bbox.m_id), bbox.m_prob); } } m_regions.assign(std::begin(regions.back()), std::end(regions.back())); diff --git a/src/Detector/tensorrt_yolo/YoloONNXv8_bb.hpp b/src/Detector/tensorrt_yolo/YoloONNXv8_bb.hpp index 39e44759..9a21e397 100644 --- a/src/Detector/tensorrt_yolo/YoloONNXv8_bb.hpp +++ b/src/Detector/tensorrt_yolo/YoloONNXv8_bb.hpp @@ -17,9 +17,8 @@ class YOLOv8_bb_onnx : public YoloONNX { std::vector resBoxes; - //0: name: images, size: 1x3x1024x1024 - //1: name: output0, size: 1x20x21504 - //20: 15 DOTA classes + x + y + w + h + a + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x84x8400 const float fw = static_cast(frameSize.width) / static_cast(m_inputDims.d[3]); const float fh = static_cast(frameSize.height) / static_cast(m_inputDims.d[2]); diff --git a/src/Detector/tensorrt_yolo/YoloONNXv8_obb.hpp b/src/Detector/tensorrt_yolo/YoloONNXv8_obb.hpp index 370d905a..4c39c5a4 100644 --- a/src/Detector/tensorrt_yolo/YoloONNXv8_obb.hpp +++ b/src/Detector/tensorrt_yolo/YoloONNXv8_obb.hpp @@ -17,8 +17,10 @@ class YOLOv8_obb_onnx : public YoloONNX { std::vector resBoxes; - //0: name: images, size: 1x3x640x640 - //1: name: output0, size: 1x84x8400 + //0: name: images, size: 1x3x1024x1024 + //1: name: output0, size: 1x20x21504 + //20: 15 DOTA classes + x + y + w + h + a + constexpr int shapeDataSize = 5; const float fw = static_cast(frameSize.width) / static_cast(m_inputDims.d[3]); const float fh = static_cast(frameSize.height) / static_cast(m_inputDims.d[2]); @@ -27,8 +29,8 @@ class YOLOv8_obb_onnx : public YoloONNX size_t ncInd = 1; size_t lenInd = 2; - int nc = m_outpuDims[0].d[ncInd] - 4; - int dimensions = nc + 4; + int nc = m_outpuDims[0].d[ncInd] - shapeDataSize; + int dimensions = nc + shapeDataSize; size_t len = static_cast(m_outpuDims[0].d[lenInd]) / m_params.explicitBatchSize; //auto Volume = [](const nvinfer1::Dims& d) //{ @@ -57,7 +59,7 @@ class YOLOv8_obb_onnx : public YoloONNX std::vector classIds; std::vector confidences; - std::vector rectBoxes; + std::vector rectBoxes; classIds.reserve(len); confidences.reserve(len); rectBoxes.reserve(len); @@ -65,7 +67,7 @@ class YOLOv8_obb_onnx : public YoloONNX for (size_t i = 0; i < len; ++i) { // Box - size_t k = i * (nc + 4); + size_t k = i * (nc + shapeDataSize); int classId = -1; float objectConf = 0.f; @@ -80,30 +82,41 @@ class YOLOv8_obb_onnx : public YoloONNX } //if (i == 0) - // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + //{ + // for (int jj = 0; jj < 20; ++jj) + // { + // std::cout << output[jj] << " "; + // } + // std::cout << std::endl; + //} if (objectConf >= m_params.confThreshold) { classIds.push_back(classId); confidences.push_back(objectConf); - // (center x, center y, width, height) to (x, y, w, h) - float x = fw * (output[k] - output[k + 2] / 2); - float y = fh * (output[k + 1] - output[k + 3] / 2); + // (center x, center y, width, height) + float cx = fw * output[k]; + float cy = fh * output[k + 1]; float width = fw * output[k + 2]; float height = fh * output[k + 3]; - rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + float angle = 180.f * output[k + nc + shapeDataSize - 1] / M_PI; + rectBoxes.emplace_back(cv::Point2f(cx, cy), cv::Size2f(width, height), angle); + + //if (rectBoxes.size() == 1) + // std::cout << i << ": object_conf = " << objectConf << ", classId = " << classId << ", rect = " << rectBoxes.back().boundingRect() << ", angle = " << angle << std::endl; } } // Non-maximum suppression to eliminate redudant overlapping boxes - std::vector indices; - cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.confThreshold, m_params.nmsThreshold, indices); - resBoxes.reserve(indices.size()); + //std::vector indices; + //cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.confThreshold, m_params.nmsThreshold, indices); + //resBoxes.reserve(indices.size()); - for (size_t bi = 0; bi < indices.size(); ++bi) + resBoxes.reserve(rectBoxes.size()); + for (size_t bi = 0; bi < rectBoxes.size(); ++bi) { - resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], rectBoxes[indices[bi]]); + resBoxes.emplace_back(classIds[bi], confidences[bi], rectBoxes[bi]); } return resBoxes; diff --git a/src/Detector/tensorrt_yolo/YoloONNXv9_bb.hpp b/src/Detector/tensorrt_yolo/YoloONNXv9_bb.hpp index 6c821351..f4c99ebd 100644 --- a/src/Detector/tensorrt_yolo/YoloONNXv9_bb.hpp +++ b/src/Detector/tensorrt_yolo/YoloONNXv9_bb.hpp @@ -19,6 +19,8 @@ class YOLOv9_bb_onnx : public YoloONNX //0: name: images, size: 1x3x640x640 //1: name: output0, size: 1x84x8400 + //84: 80 COCO classes + x + y + w + h + constexpr int shapeDataSize = 4; const float fw = static_cast(frameSize.width) / static_cast(m_inputDims.d[3]); const float fh = static_cast(frameSize.height) / static_cast(m_inputDims.d[2]); @@ -27,8 +29,8 @@ class YOLOv9_bb_onnx : public YoloONNX size_t ncInd = 1; size_t lenInd = 2; - int nc = m_outpuDims[0].d[ncInd] - 4; - int dimensions = nc + 4; + int nc = m_outpuDims[0].d[ncInd] - shapeDataSize; + int dimensions = nc + shapeDataSize; size_t len = static_cast(m_outpuDims[0].d[lenInd]) / m_params.explicitBatchSize; //auto Volume = [](const nvinfer1::Dims& d) //{ @@ -65,13 +67,13 @@ class YOLOv9_bb_onnx : public YoloONNX for (size_t i = 0; i < len; ++i) { // Box - size_t k = i * (nc + 4); + size_t k = i * (nc + shapeDataSize); int classId = -1; float objectConf = 0.f; for (int j = 0; j < nc; ++j) { - const float classConf = output[k + 4 + j]; + const float classConf = output[k + shapeDataSize + j]; if (classConf > objectConf) { classId = j;