From 2d9af8e00108e90767a00502c07fe264f080d081 Mon Sep 17 00:00:00 2001
From: Nuzhny007 <nuzhny@mail.ru>
Date: Sun, 11 Aug 2024 07:20:19 +0300
Subject: [PATCH] YOLOv8-obb works with TensorRT backend

---
 README.md                                     |  7 +++
 data/DOTA.names                               | 30 ++++++-------
 data/settings_yolov8_obb.ini                  |  8 ++--
 example/examples.h                            |  2 +-
 src/Detector/YoloTensorRTDetector.cpp         | 11 +++--
 src/Detector/tensorrt_yolo/YoloONNXv8_bb.hpp  |  5 +--
 src/Detector/tensorrt_yolo/YoloONNXv8_obb.hpp | 45 ++++++++++++-------
 src/Detector/tensorrt_yolo/YoloONNXv9_bb.hpp  | 10 +++--
 8 files changed, 72 insertions(+), 46 deletions(-)

diff --git a/README.md b/README.md
index 8707027b..27b5fba2 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,8 @@
 
 # Last changes
 
+* YOLOv8-obb detector worked with TensorRT! Export pretrained Pytorch models [here (ultralytics/ultralytics)](https://github.com/ultralytics/ultralytics) to onnx format and run Multitarget-tracker with -e=6 example
+
 * YOLOv10 detector worked with TensorRT! Export pretrained Pytorch models [here (THU-MIG/yolov10)](https://github.com/THU-MIG/yolov10) to onnx format and run Multitarget-tracker with -e=6 example
 
 * YOLOv9 detector worked with TensorRT! Export pretrained Pytorch models [here (WongKinYiu/yolov9)](https://github.com/WongKinYiu/yolov9) to onnx format and run Multitarget-tracker with -e=6 example
@@ -13,6 +15,11 @@
 
 # New videos!
 
+* YOLOv8-obb detection with rotated boxes (DOTA v1.0 trained)
+
+[![YOLOv8-obb detection:](https://img.youtube.com/vi/1e6ur57Fhzs/0.jpg)](https://youtu.be/1e6ur57Fhzs)
+
+
 * YOLOv7 instance segmentation
 
 [![YOLOv7 instance segmentation:](https://img.youtube.com/vi/gZxuYyFz1dU/0.jpg)](https://youtu.be/gZxuYyFz1dU)
diff --git a/data/DOTA.names b/data/DOTA.names
index af6540f0..adea7619 100644
--- a/data/DOTA.names
+++ b/data/DOTA.names
@@ -1,15 +1,15 @@
-name_1
-name_2
-name_3
-name_4
-name_5
-name_6
-name_7
-name_8
-name_9
-name_10
-name_11
-name_12
-name_13
-name_14
-name_15
+plane
+ship
+storage_tank
+baseball_diamond
+tennis_court
+basketball_court
+ground_track_field
+harbor
+bridge
+large_vehicle
+small_vehicle
+helicopter
+roundabout
+soccer_ball_field
+swimming_pool
\ No newline at end of file
diff --git a/data/settings_yolov8_obb.ini b/data/settings_yolov8_obb.ini
index 2cc18d8a..ea95b634 100644
--- a/data/settings_yolov8_obb.ini
+++ b/data/settings_yolov8_obb.ini
@@ -27,14 +27,14 @@ ocv_dnn_target = DNN_TARGET_CPU
 ocv_dnn_backend = DNN_BACKEND_OPENCV
 
 #-----------------------------
-nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/yolov8s-obb.onnx
-nn_config = C:/work/home/mtracker/Multitarget-tracker/data/yolov8s-obb.onnx
+nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/yolov8x-obb.onnx
+nn_config = C:/work/home/mtracker/Multitarget-tracker/data/yolov8x-obb.onnx
 class_names = C:/work/home/mtracker/Multitarget-tracker/data/DOTA.names
 
 #-----------------------------
-confidence_threshold = 0.5
+confidence_threshold = 0.6
 	
-max_crop_ratio = 0
+max_crop_ratio = 1
 max_batch = 1
 gpu_id = 0
 
diff --git a/example/examples.h b/example/examples.h
index 64bd2571..1be76399 100644
--- a/example/examples.h
+++ b/example/examples.h
@@ -923,7 +923,7 @@ class YoloTensorRTExample final : public VideoExample
 			}
 		}
 
-		m_detector->CalcMotionMap(frame);
+		//m_detector->CalcMotionMap(frame);
 	}
 };
 
diff --git a/src/Detector/YoloTensorRTDetector.cpp b/src/Detector/YoloTensorRTDetector.cpp
index 094bedfa..a0ebeb44 100644
--- a/src/Detector/YoloTensorRTDetector.cpp
+++ b/src/Detector/YoloTensorRTDetector.cpp
@@ -235,7 +235,12 @@ void YoloTensorRTDetector::Detect(const cv::UMat& colorFrame)
 				for (const tensor_rt::Result& bbox : detects[j])
 				{
 					if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.m_id)) != std::end(m_classesWhiteList))
-						tmpRegions.emplace_back(cv::Rect(bbox.m_brect.x + crop.x, bbox.m_brect.y + crop.y, bbox.m_brect.width, bbox.m_brect.height), T2T(bbox.m_id), bbox.m_prob);
+					{
+						cv::RotatedRect newRRect(bbox.m_rrect);
+						newRRect.center.x += crop.x;
+						newRRect.center.y += crop.y;
+						tmpRegions.emplace_back(newRRect, T2T(bbox.m_id), bbox.m_prob);
+					}
 				}
 			}
         }
@@ -279,8 +284,8 @@ void YoloTensorRTDetector::Detect(const std::vector<cv::UMat>& frames, std::vect
             const tensor_rt::BatchResult& dets = detects[i];
             for (const tensor_rt::Result& bbox : dets)
             {
-                if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.m_id)) != std::end(m_classesWhiteList))
-                    regions[i].emplace_back(bbox.m_brect, T2T(bbox.m_id), bbox.m_prob);
+				if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.m_id)) != std::end(m_classesWhiteList))
+					regions[i].emplace_back(bbox.m_rrect, T2T(bbox.m_id), bbox.m_prob);
             }
         }
         m_regions.assign(std::begin(regions.back()), std::end(regions.back()));
diff --git a/src/Detector/tensorrt_yolo/YoloONNXv8_bb.hpp b/src/Detector/tensorrt_yolo/YoloONNXv8_bb.hpp
index 39e44759..9a21e397 100644
--- a/src/Detector/tensorrt_yolo/YoloONNXv8_bb.hpp
+++ b/src/Detector/tensorrt_yolo/YoloONNXv8_bb.hpp
@@ -17,9 +17,8 @@ class YOLOv8_bb_onnx : public YoloONNX
 	{
 		std::vector<tensor_rt::Result> resBoxes;
 
-		//0: name: images, size: 1x3x1024x1024
-		//1: name: output0, size: 1x20x21504
-		//20: 15 DOTA classes + x + y + w + h + a
+		//0: name: images, size: 1x3x640x640
+		//1: name: output0, size: 1x84x8400
 
 		const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
 		const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
diff --git a/src/Detector/tensorrt_yolo/YoloONNXv8_obb.hpp b/src/Detector/tensorrt_yolo/YoloONNXv8_obb.hpp
index 370d905a..4c39c5a4 100644
--- a/src/Detector/tensorrt_yolo/YoloONNXv8_obb.hpp
+++ b/src/Detector/tensorrt_yolo/YoloONNXv8_obb.hpp
@@ -17,8 +17,10 @@ class YOLOv8_obb_onnx : public YoloONNX
 	{
 		std::vector<tensor_rt::Result> resBoxes;
 
-		//0: name: images, size: 1x3x640x640
-		//1: name: output0, size: 1x84x8400
+		//0: name: images, size: 1x3x1024x1024
+		//1: name: output0, size: 1x20x21504
+		//20: 15 DOTA classes + x + y + w + h + a
+		constexpr int shapeDataSize = 5;
 
 		const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
 		const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
@@ -27,8 +29,8 @@ class YOLOv8_obb_onnx : public YoloONNX
 
 		size_t ncInd = 1;
 		size_t lenInd = 2;
-		int nc = m_outpuDims[0].d[ncInd] - 4;
-		int dimensions = nc + 4;
+		int nc = m_outpuDims[0].d[ncInd] - shapeDataSize;
+		int dimensions = nc + shapeDataSize;
 		size_t len = static_cast<size_t>(m_outpuDims[0].d[lenInd]) / m_params.explicitBatchSize;
 		//auto Volume = [](const nvinfer1::Dims& d)
 		//{
@@ -57,7 +59,7 @@ class YOLOv8_obb_onnx : public YoloONNX
 
 		std::vector<int> classIds;
 		std::vector<float> confidences;
-		std::vector<cv::Rect> rectBoxes;
+		std::vector<cv::RotatedRect> rectBoxes;
 		classIds.reserve(len);
 		confidences.reserve(len);
 		rectBoxes.reserve(len);
@@ -65,7 +67,7 @@ class YOLOv8_obb_onnx : public YoloONNX
 		for (size_t i = 0; i < len; ++i)
 		{
 			// Box
-			size_t k = i * (nc + 4);
+			size_t k = i * (nc + shapeDataSize);
 
 			int classId = -1;
 			float objectConf = 0.f;
@@ -80,30 +82,41 @@ class YOLOv8_obb_onnx : public YoloONNX
 			}
 
 			//if (i == 0)
-			//	std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl;
+			//{
+			//	for (int jj = 0; jj < 20; ++jj)
+			//	{
+			//		std::cout << output[jj] << " ";
+			//	}
+			//	std::cout << std::endl;
+			//}
 
 			if (objectConf >= m_params.confThreshold)
 			{
 				classIds.push_back(classId);
 				confidences.push_back(objectConf);
 
-				// (center x, center y, width, height) to (x, y, w, h)
-				float x = fw * (output[k] - output[k + 2] / 2);
-				float y = fh * (output[k + 1] - output[k + 3] / 2);
+				// (center x, center y, width, height)
+				float cx = fw * output[k];
+				float cy = fh * output[k + 1];
 				float width = fw * output[k + 2];
 				float height = fh * output[k + 3];
-				rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height));
+				float angle = 180.f * output[k + nc + shapeDataSize - 1] / M_PI;
+				rectBoxes.emplace_back(cv::Point2f(cx, cy), cv::Size2f(width, height), angle);
+
+				//if (rectBoxes.size() == 1)
+				//	std::cout << i << ": object_conf = " << objectConf << ", classId = " << classId << ", rect = " << rectBoxes.back().boundingRect() << ", angle = " << angle << std::endl;
 			}
 		}
 
 		// Non-maximum suppression to eliminate redudant overlapping boxes
-		std::vector<int> indices;
-		cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.confThreshold, m_params.nmsThreshold, indices);
-		resBoxes.reserve(indices.size());
+		//std::vector<int> indices;
+		//cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.confThreshold, m_params.nmsThreshold, indices);
+		//resBoxes.reserve(indices.size());
 
-		for (size_t bi = 0; bi < indices.size(); ++bi)
+		resBoxes.reserve(rectBoxes.size());
+		for (size_t bi = 0; bi < rectBoxes.size(); ++bi)
 		{
-			resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], rectBoxes[indices[bi]]);
+			resBoxes.emplace_back(classIds[bi], confidences[bi], rectBoxes[bi]);
 		}
 
 		return resBoxes;
diff --git a/src/Detector/tensorrt_yolo/YoloONNXv9_bb.hpp b/src/Detector/tensorrt_yolo/YoloONNXv9_bb.hpp
index 6c821351..f4c99ebd 100644
--- a/src/Detector/tensorrt_yolo/YoloONNXv9_bb.hpp
+++ b/src/Detector/tensorrt_yolo/YoloONNXv9_bb.hpp
@@ -19,6 +19,8 @@ class YOLOv9_bb_onnx : public YoloONNX
 
 		//0: name: images, size: 1x3x640x640
 		//1: name: output0, size: 1x84x8400
+		//84: 80 COCO classes + x + y + w + h
+		constexpr int shapeDataSize = 4;
 
 		const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
 		const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
@@ -27,8 +29,8 @@ class YOLOv9_bb_onnx : public YoloONNX
 
 		size_t ncInd = 1;
 		size_t lenInd = 2;
-		int nc = m_outpuDims[0].d[ncInd] - 4;
-		int dimensions = nc + 4;
+		int nc = m_outpuDims[0].d[ncInd] - shapeDataSize;
+		int dimensions = nc + shapeDataSize;
 		size_t len = static_cast<size_t>(m_outpuDims[0].d[lenInd]) / m_params.explicitBatchSize;
 		//auto Volume = [](const nvinfer1::Dims& d)
 		//{
@@ -65,13 +67,13 @@ class YOLOv9_bb_onnx : public YoloONNX
 		for (size_t i = 0; i < len; ++i)
 		{
 			// Box
-			size_t k = i * (nc + 4);
+			size_t k = i * (nc + shapeDataSize);
 
 			int classId = -1;
 			float objectConf = 0.f;
 			for (int j = 0; j < nc; ++j)
 			{
-				const float classConf = output[k + 4 + j];
+				const float classConf = output[k + shapeDataSize + j];
 				if (classConf > objectConf)
 				{
 					classId = j;