From 8026eaa14f1bf3b999e1cd77015a2211c88d8c3f Mon Sep 17 00:00:00 2001 From: kartikdutt18 Date: Fri, 14 Aug 2020 13:11:33 +0530 Subject: [PATCH 1/9] Add YOLO preprocessor --- dataloader/preprocessor.hpp | 131 ++++++++++++++++++++++++++++++++++- tests/CMakeLists.txt | 1 + tests/preprocessor_tests.cpp | 62 +++++++++++++++++ 3 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 tests/preprocessor_tests.cpp diff --git a/dataloader/preprocessor.hpp b/dataloader/preprocessor.hpp index 85887a0b..724ad96f 100644 --- a/dataloader/preprocessor.hpp +++ b/dataloader/preprocessor.hpp @@ -1,5 +1,5 @@ /** - * @file dataloader.hpp + * @file preprocessor.hpp * @author Kartik Dutt * * Definition of PreProcessor class for popular datasets. @@ -93,6 +93,135 @@ class PreProcessor trainFeatures(i) = ((uint8_t)(trainFeatures(i)) / 255.0); } } + + /** + * PreProcessor for YOLO model. Converts arma::field type annotations to + * arma::mat type for training YOLO model. Each column in target matrix has + * the size : gridWidth * gridHeight * (5 * numBoxes + classes). + * + * @param annotations Field object created using model's dataloader containing + * annotation for images. + * @param output Output matrix where output will be stored. + * @param imageWidth Width of image used for training YOLO model. + * @param imageHeight Height of image used for training YOLO model. + * @param gridWidth Width of output feature map of YOLO model. + * @param gridHeight Height of output feature map of YOLO model. + * @param numBoxes Number of bounding boxes per grid. + * @param numClasses Number of classes in training set. + * + * Note : This function must be called manually before model is used. + */ + template + static void YOLOPreProcessor(const DatasetY& annotations, + arma::Mat& output, + const size_t version = 1, + const size_t imageWidth = 224, + const size_t imageHeight = 224, + const size_t gridWidth = 7, + const size_t gridHeight = 7, + const size_t numBoxes = 2, + const size_t numClasses = 20) + { + // See if we can change this to v4 / v5. + mlpack::Log::Assert(version >= 1 && version <= 3, "Supported YOLO versions \ + are version 1 to version 3."); + + mlpack::Log::Assert(typeid(annotations) == typeid(arma::field), + "Use Field type to represent annotations."); + + size_t batchSize = annotations.n_cols; + size_t numPredictions = 5 * numBoxes + numClasses; + if (version > 1) + { + // Each bounding boxes has a corresponding class. + numPredictions = numBoxes * (5 + numClasses); + } + + double cellSizeHeight = (double) 1.0 / gridHeight; + double cellSizeWidth = (double) 1.0 / gridWidth; + + // Set size of output and use cubes convenience. + output.set_size(gridWidth * gridHeight * numPredictions, batchSize); + output.zeros(); + + // Use offset to create a cube for a particular column / batch. + size_t offset = 0; + for (size_t boxIdx = 0; boxIdx < batchSize; boxIdx++) + { + arma::cube outputTemp(const_cast &>(output).memptr() + + offset, gridHeight, gridWidth, numPredictions, false, false); + offset += gridWidth * gridHeight * numPredictions; + + // Get the bounding box and labels corresponding to current image. + arma::mat labels(1, annotations(0, boxIdx).n_elem / 5); + arma::mat boundingBoxes(4, annotations(0, boxIdx).n_elem / 5); + for (size_t i = 0; i < boundingBoxes.n_cols; i++) + { + labels.col(i)(0) = annotations(0, boxIdx)(i * 5); + boundingBoxes.col(i) = annotations(0, boxIdx)(arma::span(i * 5 + 1, + (i + 1) * 5 - 1)); + } + + // Normalize the coordinates. + boundingBoxes.row(0) /= imageWidth; + boundingBoxes.row(2) /= imageWidth; + boundingBoxes.row(1) /= imageHeight; + boundingBoxes.row(3) /= imageHeight; + + // Get width and height as well as centres for the bounding box. + arma::mat widthAndHeight(2, boundingBoxes.n_cols); + widthAndHeight.row(0) = (boundingBoxes.row(2) - boundingBoxes.row(0)); + widthAndHeight.row(1) = (boundingBoxes.row(3) - boundingBoxes.row(1)); + + arma::mat centres(2, boundingBoxes.n_cols); + centres.row(0) = (boundingBoxes.row(2) + boundingBoxes.row(0)) / 2.0; + centres.row(1) = (boundingBoxes.row(3) + boundingBoxes.row(1)) / 2.0; + + // Assign bounding boxes to the grid. + for (size_t i = 0; i < boundingBoxes.n_cols; i++) + { + // Index for representing bounding box on grid. + arma::vec gridCoordinates = centres.col(i); + gridCoordinates(0) = std::ceil(gridCoordinates(0) / cellSizeWidth) - 1; + gridCoordinates(1) = std::ceil(gridCoordinates(1) / cellSizeHeight) - 1; + + size_t gridX = gridCoordinates(0); + size_t gridY = gridCoordinates(1); + gridCoordinates(0) = gridCoordinates(0) * cellSizeWidth; + gridCoordinates(1) = gridCoordinates(1) * cellSizeHeight; + + // Normalize to 1.0. + gridCoordinates = centres.col(i) - gridCoordinates; + gridCoordinates(0) /= cellSizeWidth; + gridCoordinates(1) /= cellSizeHeight; + + if (version == 1) + { + // Fill elements in the grid. + for (size_t k = 0; k < numBoxes; k++) + { + size_t s = 5 * k; + outputTemp(arma::span(gridX), arma::span(gridY), + arma::span(s, s + 1)) = gridCoordinates; + outputTemp(arma::span(gridX), arma::span(gridY), + arma::span(s + 2, s + 3)) = widthAndHeight.col(i); + outputTemp(gridX, gridY, s + 4) = 1.0; + } + outputTemp(gridX, gridY, 5 * numBoxes + labels.col(i)(0)) = 1; + } + else + { + size_t bBoxOffset = (5 + numClasses) * i; + outputTemp(arma::span(gridX), arma::span(gridY), + arma::span(bBoxOffset, bBoxOffset + 1)) = gridCoordinates; + outputTemp(arma::span(gridX), arma::span(gridY), + arma::span(bBoxOffset + 2, bBoxOffset + 3)) = widthAndHeight.col(i); + outputTemp(gridX, gridY, bBoxOffset + 4) = 1.0; + outputTemp(gridX, gridY, bBoxOffset + 5 + labels.col(i)(0)) = 1; + } + } + } + } }; #endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index dacc08f8..bdf51dc5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -10,6 +10,7 @@ add_executable(models_test augmentation_tests.cpp ffn_model_tests.cpp dataloader_tests.cpp + preprocessor_tests.cpp utils_tests.cpp ) diff --git a/tests/preprocessor_tests.cpp b/tests/preprocessor_tests.cpp new file mode 100644 index 00000000..7fbab984 --- /dev/null +++ b/tests/preprocessor_tests.cpp @@ -0,0 +1,62 @@ +/** + * @file preprocessor_tests.cpp + * @author Kartik Dutt + * + * Tests for various functionalities of PreProcessor class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ + +#define BOOST_TEST_DYN_LINK +#include +#include +#include + +using namespace boost::unit_test; + +BOOST_AUTO_TEST_SUITE(PreProcessorsTest); + +BOOST_AUTO_TEST_CASE(YOLOPreProcessor) +{ + arma::field input; + input.set_size(1, 1); + + arma::vec bBox(5); + bBox << 2 << 84 << 48 << 493 << 387 << arma::endr; + input(0, 0) = bBox; + arma::mat output; + + // Single input check. + PreProcessor>::YOLOPreProcessor( + input, output, 1, 500, 387); + BOOST_REQUIRE_CLOSE(arma::accu(output), 8.3342, 1e-3); + + input.clear(); + input.set_size(1, 3); + input(0, 0) = bBox; + + // Multiple bounding boxes check. + bBox.clear(); + bBox.set_size(15); + bBox << 8 << 341 << 217 << 487 << 375 << 8 << 114 << 209 << 183 << + 298 << 19 << 237 << 110 << 320 << 176 << arma::endr; + input(0, 1) = bBox; + + bBox.clear(); + bBox.set_size(5); + bBox << 7 << 157 << 90 << 486 << 372 << arma::endr; + input(0, 2) = bBox; + + PreProcessor>::YOLOPreProcessor( + input, output, 1, 500, 387); + + arma::vec desiredSum(3); + desiredSum << 8.3342 << 18.4093 << 7.13195 << arma::endr; + for (size_t i = 0; i < output.n_cols; i++) + BOOST_REQUIRE_CLOSE(arma::accu(output.col(i)), desiredSum(i), 1e-3); +} + +BOOST_AUTO_TEST_SUITE_END(); From c77971640b60ee4453008daa345c265829f7700c Mon Sep 17 00:00:00 2001 From: kartikdutt18 Date: Sat, 15 Aug 2020 22:01:07 +0530 Subject: [PATCH 2/9] Fix YOLOv3 implementation for preprocessor --- dataloader/preprocessor.hpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/dataloader/preprocessor.hpp b/dataloader/preprocessor.hpp index 724ad96f..17de8650 100644 --- a/dataloader/preprocessor.hpp +++ b/dataloader/preprocessor.hpp @@ -162,6 +162,11 @@ class PreProcessor (i + 1) * 5 - 1)); } + // For YOLOv2 or higher, each bounding box can represent a class so we don't + // Repeat labels as done for YOLOv1. We will use map to store last inserted + // bounding box. + unordered_map, int> boundingBoxOffset; + // Normalize the coordinates. boundingBoxes.row(0) /= imageWidth; boundingBoxes.row(2) /= imageWidth; @@ -211,7 +216,19 @@ class PreProcessor } else { - size_t bBoxOffset = (5 + numClasses) * i; + size_t s = 0; + if (boundingBoxOffset.count({gridX, gridY})) + { + s = boundingBoxOffset[{gridX, gridY}] + 1; + boundingBoxOffset[{gridX, gridY}]++; + } + else + boundingBoxOffset.insert({{gridX, gridY}, s}); + + if (s > numBoxes) + continue; + + size_t bBoxOffset = (5 + numClasses) * s; outputTemp(arma::span(gridX), arma::span(gridY), arma::span(bBoxOffset, bBoxOffset + 1)) = gridCoordinates; outputTemp(arma::span(gridX), arma::span(gridY), From 3b4f17644bc1abc25dbccb5a5ceb427f444a8126 Mon Sep 17 00:00:00 2001 From: kartikdutt18 Date: Sat, 15 Aug 2020 22:08:59 +0530 Subject: [PATCH 3/9] don't use int and unordered_map --- dataloader/preprocessor.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataloader/preprocessor.hpp b/dataloader/preprocessor.hpp index 17de8650..007849a3 100644 --- a/dataloader/preprocessor.hpp +++ b/dataloader/preprocessor.hpp @@ -165,7 +165,7 @@ class PreProcessor // For YOLOv2 or higher, each bounding box can represent a class so we don't // Repeat labels as done for YOLOv1. We will use map to store last inserted // bounding box. - unordered_map, int> boundingBoxOffset; + std::map, size_t> boundingBoxOffset; // Normalize the coordinates. boundingBoxes.row(0) /= imageWidth; From 90d7e34b67f47ba436265539a1117aad653825c1 Mon Sep 17 00:00:00 2001 From: kartikdutt18 Date: Sun, 16 Aug 2020 12:57:13 +0530 Subject: [PATCH 4/9] Completed everything and added tests,TODO: add remaining suggestions --- tests/preprocessor_tests.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/preprocessor_tests.cpp b/tests/preprocessor_tests.cpp index 7fbab984..40dc2c8e 100644 --- a/tests/preprocessor_tests.cpp +++ b/tests/preprocessor_tests.cpp @@ -57,6 +57,12 @@ BOOST_AUTO_TEST_CASE(YOLOPreProcessor) desiredSum << 8.3342 << 18.4093 << 7.13195 << arma::endr; for (size_t i = 0; i < output.n_cols; i++) BOOST_REQUIRE_CLOSE(arma::accu(output.col(i)), desiredSum(i), 1e-3); + + desiredSum << 4.6671 << 10.70465 << 4.065975 << arma::endr; + PreProcessor>::YOLOPreProcessor( + input, output, 3, 500, 387); + for (size_t i = 0; i < output.n_cols; i++) + BOOST_REQUIRE_CLOSE(arma::accu(output.col(i)), desiredSum(i), 1e-3); } BOOST_AUTO_TEST_SUITE_END(); From 2f2d25ac45b0beb78111ec8c49336bde3d578ff5 Mon Sep 17 00:00:00 2001 From: kartikdutt18 Date: Sun, 16 Aug 2020 12:59:39 +0530 Subject: [PATCH 5/9] Style fixes --- dataloader/preprocessor.hpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dataloader/preprocessor.hpp b/dataloader/preprocessor.hpp index 007849a3..838faa49 100644 --- a/dataloader/preprocessor.hpp +++ b/dataloader/preprocessor.hpp @@ -162,9 +162,9 @@ class PreProcessor (i + 1) * 5 - 1)); } - // For YOLOv2 or higher, each bounding box can represent a class so we don't - // Repeat labels as done for YOLOv1. We will use map to store last inserted - // bounding box. + // For YOLOv2 or higher, each bounding box can represent a class + // so we don't repeat labels as done for YOLOv1. We will use map + // to store last inserted bounding box. std::map, size_t> boundingBoxOffset; // Normalize the coordinates. @@ -232,7 +232,8 @@ class PreProcessor outputTemp(arma::span(gridX), arma::span(gridY), arma::span(bBoxOffset, bBoxOffset + 1)) = gridCoordinates; outputTemp(arma::span(gridX), arma::span(gridY), - arma::span(bBoxOffset + 2, bBoxOffset + 3)) = widthAndHeight.col(i); + arma::span(bBoxOffset + 2, + bBoxOffset + 3)) = widthAndHeight.col(i); outputTemp(gridX, gridY, bBoxOffset + 4) = 1.0; outputTemp(gridX, gridY, bBoxOffset + 5 + labels.col(i)(0)) = 1; } From 8739797719236b70e21bd50682878a022eca74a0 Mon Sep 17 00:00:00 2001 From: kartikdutt18 Date: Sun, 16 Aug 2020 13:40:30 +0530 Subject: [PATCH 6/9] Make normalization optional --- dataloader/preprocessor.hpp | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/dataloader/preprocessor.hpp b/dataloader/preprocessor.hpp index 838faa49..c3d1de76 100644 --- a/dataloader/preprocessor.hpp +++ b/dataloader/preprocessor.hpp @@ -108,6 +108,8 @@ class PreProcessor * @param gridHeight Height of output feature map of YOLO model. * @param numBoxes Number of bounding boxes per grid. * @param numClasses Number of classes in training set. + * @param normalize Boolean to determine whether coordinates are to + * to be normalized or not. Defaults to true. * * Note : This function must be called manually before model is used. */ @@ -120,7 +122,8 @@ class PreProcessor const size_t gridWidth = 7, const size_t gridHeight = 7, const size_t numBoxes = 2, - const size_t numClasses = 20) + const size_t numClasses = 20, + const bool normalize = true) { // See if we can change this to v4 / v5. mlpack::Log::Assert(version >= 1 && version <= 3, "Supported YOLO versions \ @@ -187,8 +190,19 @@ class PreProcessor { // Index for representing bounding box on grid. arma::vec gridCoordinates = centres.col(i); - gridCoordinates(0) = std::ceil(gridCoordinates(0) / cellSizeWidth) - 1; - gridCoordinates(1) = std::ceil(gridCoordinates(1) / cellSizeHeight) - 1; + arma::vec centreCoordinates = centres.col(i); + + if (normalize) + { + gridCoordinates(0) = std::ceil(gridCoordinates(0) / cellSizeWidth) - 1; + gridCoordinates(1) = std::ceil(gridCoordinates(1) / cellSizeHeight) - 1; + } + else + { + gridCoordinates(0) = std::ceil((gridCoordinates(0) / imageWidth) / cellSizeWidth) - 1; + gridCoordinates(1) = std::ceil((gridCoordinates(1) / imageHeight) / cellSizeHeight) - 1; + } + size_t gridX = gridCoordinates(0); size_t gridY = gridCoordinates(1); @@ -200,6 +214,9 @@ class PreProcessor gridCoordinates(0) /= cellSizeWidth; gridCoordinates(1) /= cellSizeHeight; + if (normalize) + centreCoordinates = gridCoordinates; + if (version == 1) { // Fill elements in the grid. @@ -207,7 +224,7 @@ class PreProcessor { size_t s = 5 * k; outputTemp(arma::span(gridX), arma::span(gridY), - arma::span(s, s + 1)) = gridCoordinates; + arma::span(s, s + 1)) = centreCoordinates; outputTemp(arma::span(gridX), arma::span(gridY), arma::span(s + 2, s + 3)) = widthAndHeight.col(i); outputTemp(gridX, gridY, s + 4) = 1.0; @@ -230,7 +247,7 @@ class PreProcessor size_t bBoxOffset = (5 + numClasses) * s; outputTemp(arma::span(gridX), arma::span(gridY), - arma::span(bBoxOffset, bBoxOffset + 1)) = gridCoordinates; + arma::span(bBoxOffset, bBoxOffset + 1)) = centreCoordinates; outputTemp(arma::span(gridX), arma::span(gridY), arma::span(bBoxOffset + 2, bBoxOffset + 3)) = widthAndHeight.col(i); From 80e0c946b268f8fdfa4f50b1270aca4f4df4c38a Mon Sep 17 00:00:00 2001 From: kartikdutt18 Date: Sun, 16 Aug 2020 13:47:11 +0530 Subject: [PATCH 7/9] Another style fix --- dataloader/preprocessor.hpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/dataloader/preprocessor.hpp b/dataloader/preprocessor.hpp index c3d1de76..f9b49e00 100644 --- a/dataloader/preprocessor.hpp +++ b/dataloader/preprocessor.hpp @@ -194,15 +194,19 @@ class PreProcessor if (normalize) { - gridCoordinates(0) = std::ceil(gridCoordinates(0) / cellSizeWidth) - 1; - gridCoordinates(1) = std::ceil(gridCoordinates(1) / cellSizeHeight) - 1; + gridCoordinates(0) = std::ceil(gridCoordinates(0) / + cellSizeWidth) - 1; + gridCoordinates(1) = std::ceil(gridCoordinates(1) / + cellSizeHeight) - 1; } else { - gridCoordinates(0) = std::ceil((gridCoordinates(0) / imageWidth) / cellSizeWidth) - 1; - gridCoordinates(1) = std::ceil((gridCoordinates(1) / imageHeight) / cellSizeHeight) - 1; + gridCoordinates(0) = std::ceil((gridCoordinates(0) / + imageWidth) / cellSizeWidth) - 1; + gridCoordinates(1) = std::ceil((gridCoordinates(1) / + imageHeight) / cellSizeHeight) - 1; } - + size_t gridX = gridCoordinates(0); size_t gridY = gridCoordinates(1); From 1b771205fbb14d680cf693c7d9093c2ad8d3766d Mon Sep 17 00:00:00 2001 From: kartikdutt18 Date: Sun, 16 Aug 2020 18:13:01 +0530 Subject: [PATCH 8/9] Complete tests --- tests/preprocessor_tests.cpp | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/preprocessor_tests.cpp b/tests/preprocessor_tests.cpp index 40dc2c8e..2bd1b214 100644 --- a/tests/preprocessor_tests.cpp +++ b/tests/preprocessor_tests.cpp @@ -63,6 +63,41 @@ BOOST_AUTO_TEST_CASE(YOLOPreProcessor) input, output, 3, 500, 387); for (size_t i = 0; i < output.n_cols; i++) BOOST_REQUIRE_CLOSE(arma::accu(output.col(i)), desiredSum(i), 1e-3); + + + // For better unit testing, we create a very small output grid of size + // numBoxes * 5 + numClasses, where numBoxes = 1, numClasses = 2. + // The grid width and height will be 2 x 2. Hence, for + // single input label, target map will be of size 1 x 2 x 2 x 7. + input.clear(); + input.set_size(1, 1); + bBox.clear(); + bBox.set_size(5); + bBox << 0 << 157 << 90 << 486 << 300 << arma::endr; + input(0, 0) = bBox; + + PreProcessor>::YOLOPreProcessor( + input, output, 1, 500, 387, 2, 2, 1, 2); + + arma::mat desiredOutput(2 * 2 * 7, 1); + desiredOutput.zeros(); + // To convert 4d Tensor to 1D array use tensor.numpy().ravel(). + desiredOutput(3) = 0.2860; + desiredOutput(7) = 0.0078; + desiredOutput(11) = 0.6580; + desiredOutput(15) = 0.5426; + desiredOutput(19) = 1.0; + desiredOutput(23) = 1.0; + + // check for each value in matrix. + double tolerance = 1e-1; + for (size_t i = 0; i < output.n_elem; i++) + { + if (std::abs(output(i)) < tolerance / 2) + BOOST_REQUIRE_SMALL(desiredOutput(i), tolerance / 2); + else + BOOST_REQUIRE_CLOSE(desiredOutput(i), output(i), 1e-2); + } } BOOST_AUTO_TEST_SUITE_END(); From 7275abfe21120656fa4b5fb52def23bd8f4c901e Mon Sep 17 00:00:00 2001 From: kartikdutt18 Date: Thu, 27 Aug 2020 19:22:22 +0530 Subject: [PATCH 9/9] style fixes --- dataloader/preprocessor.hpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dataloader/preprocessor.hpp b/dataloader/preprocessor.hpp index f9b49e00..6454df41 100644 --- a/dataloader/preprocessor.hpp +++ b/dataloader/preprocessor.hpp @@ -100,7 +100,7 @@ class PreProcessor * the size : gridWidth * gridHeight * (5 * numBoxes + classes). * * @param annotations Field object created using model's dataloader containing - * annotation for images. + * annotation for images. * @param output Output matrix where output will be stored. * @param imageWidth Width of image used for training YOLO model. * @param imageHeight Height of image used for training YOLO model. @@ -152,7 +152,7 @@ class PreProcessor for (size_t boxIdx = 0; boxIdx < batchSize; boxIdx++) { arma::cube outputTemp(const_cast &>(output).memptr() + - offset, gridHeight, gridWidth, numPredictions, false, false); + offset, gridHeight, gridWidth, numPredictions, false, false); offset += gridWidth * gridHeight * numPredictions; // Get the bounding box and labels corresponding to current image. @@ -166,8 +166,8 @@ class PreProcessor } // For YOLOv2 or higher, each bounding box can represent a class - // so we don't repeat labels as done for YOLOv1. We will use map - // to store last inserted bounding box. + // so we don't repeat labels as done for YOLOv1. We will use map + // to store last inserted bounding box. std::map, size_t> boundingBoxOffset; // Normalize the coordinates. @@ -207,7 +207,6 @@ class PreProcessor imageHeight) / cellSizeHeight) - 1; } - size_t gridX = gridCoordinates(0); size_t gridY = gridCoordinates(1); gridCoordinates(0) = gridCoordinates(0) * cellSizeWidth;