Merge pull request #25 from kartikdutt18/YOLOPreProcessor

Add YOLO PreProcessor
mlpack · Sep 11, 2020 · 27c4814 · 27c4814
2 parents 5bb3309 + 7275abf
commit 27c4814
Show file tree

Hide file tree

Showing 3 changed files with 272 additions and 1 deletion.
diff --git a/dataloader/preprocessor.hpp b/dataloader/preprocessor.hpp
@@ -1,5 +1,5 @@
 /**
- * @file dataloader.hpp
+ * @file preprocessor.hpp
  * @author Kartik Dutt
  * 
  * Definition of PreProcessor class for popular datasets.
@@ -93,6 +93,173 @@ class PreProcessor
           trainFeatures(i) = ((uint8_t)(trainFeatures(i)) / 255.0);
     }
   }
+
+  /**
+   * PreProcessor for YOLO model. Converts arma::field type annotations to
+   * arma::mat type for training YOLO model. Each column in target matrix has
+   * the size : gridWidth * gridHeight * (5 * numBoxes + classes).
+   *
+   * @param annotations Field object created using model's dataloader containing
+   *     annotation for images.
+   * @param output Output matrix where output will be stored.
+   * @param imageWidth Width of image used for training YOLO model.
+   * @param imageHeight Height of image used for training YOLO model.
+   * @param gridWidth Width of output feature map of YOLO model.
+   * @param gridHeight Height of output feature map of YOLO model.
+   * @param numBoxes Number of bounding boxes per grid.
+   * @param numClasses Number of classes in training set.
+   * @param normalize Boolean to determine whether coordinates are to
+   *    to be normalized or not. Defaults to true.
+   *
+   * Note : This function must be called manually before model is used.
+   */
+  template<typename eT>
+  static void YOLOPreProcessor(const DatasetY& annotations,
+                               arma::Mat<eT>& output,
+                               const size_t version = 1,
+                               const size_t imageWidth = 224,
+                               const size_t imageHeight = 224,
+                               const size_t gridWidth = 7,
+                               const size_t gridHeight = 7,
+                               const size_t numBoxes = 2,
+                               const size_t numClasses = 20,
+                               const bool normalize = true)
+  {
+    // See if we can change this to v4 / v5.
+    mlpack::Log::Assert(version >= 1 && version <= 3, "Supported YOLO versions \
+        are version 1 to version 3.");
+
+    mlpack::Log::Assert(typeid(annotations) == typeid(arma::field<arma::vec>),
+        "Use Field type to represent annotations.");
+
+    size_t batchSize = annotations.n_cols;
+    size_t numPredictions = 5 * numBoxes + numClasses;
+    if (version > 1)
+    {
+      // Each bounding boxes has a corresponding class.
+      numPredictions = numBoxes * (5 + numClasses);
+    }
+
+    double cellSizeHeight = (double) 1.0 / gridHeight;
+    double cellSizeWidth = (double) 1.0 / gridWidth;
+
+    // Set size of output and use cubes convenience.
+    output.set_size(gridWidth * gridHeight * numPredictions, batchSize);
+    output.zeros();
+
+    // Use offset to create a cube for a particular column / batch.
+    size_t offset = 0;
+    for (size_t boxIdx = 0; boxIdx < batchSize; boxIdx++)
+    {
+      arma::cube outputTemp(const_cast<arma::Mat<eT> &>(output).memptr() +
+          offset, gridHeight, gridWidth, numPredictions, false, false);
+      offset += gridWidth * gridHeight * numPredictions;
+
+      // Get the bounding box and labels corresponding to current image.
+      arma::mat labels(1, annotations(0, boxIdx).n_elem / 5);
+      arma::mat boundingBoxes(4, annotations(0, boxIdx).n_elem / 5);
+      for (size_t i = 0; i < boundingBoxes.n_cols; i++)
+      {
+        labels.col(i)(0) = annotations(0, boxIdx)(i * 5);
+        boundingBoxes.col(i) = annotations(0, boxIdx)(arma::span(i * 5 + 1,
+            (i + 1) * 5 - 1));
+      }
+
+      // For YOLOv2 or higher, each bounding box can represent a class
+      // so we don't repeat labels as done for YOLOv1. We will use map
+      // to store last inserted bounding box.
+      std::map<std::pair<size_t, size_t>, size_t> boundingBoxOffset;
+
+      // Normalize the coordinates.
+      boundingBoxes.row(0) /= imageWidth;
+      boundingBoxes.row(2) /= imageWidth;
+      boundingBoxes.row(1) /= imageHeight;
+      boundingBoxes.row(3) /= imageHeight;
+
+      // Get width and height as well as centres for the bounding box.
+      arma::mat widthAndHeight(2, boundingBoxes.n_cols);
+      widthAndHeight.row(0) = (boundingBoxes.row(2) - boundingBoxes.row(0));
+      widthAndHeight.row(1) = (boundingBoxes.row(3) - boundingBoxes.row(1));
+
+      arma::mat centres(2, boundingBoxes.n_cols);
+      centres.row(0) = (boundingBoxes.row(2) + boundingBoxes.row(0)) / 2.0;
+      centres.row(1) = (boundingBoxes.row(3) + boundingBoxes.row(1)) / 2.0;
+
+      // Assign bounding boxes to the grid.
+      for (size_t i = 0; i < boundingBoxes.n_cols; i++)
+      {
+        // Index for representing bounding box on grid.
+        arma::vec gridCoordinates = centres.col(i);
+        arma::vec centreCoordinates = centres.col(i);
+
+        if (normalize)
+        {
+          gridCoordinates(0) = std::ceil(gridCoordinates(0) /
+              cellSizeWidth) - 1;
+          gridCoordinates(1) = std::ceil(gridCoordinates(1) /
+              cellSizeHeight) - 1;
+        }
+        else
+        {
+          gridCoordinates(0) = std::ceil((gridCoordinates(0) /
+              imageWidth) / cellSizeWidth) - 1;
+          gridCoordinates(1) = std::ceil((gridCoordinates(1) /
+              imageHeight) / cellSizeHeight) - 1;
+        }
+
+        size_t gridX = gridCoordinates(0);
+        size_t gridY = gridCoordinates(1);
+        gridCoordinates(0) = gridCoordinates(0) * cellSizeWidth;
+        gridCoordinates(1) = gridCoordinates(1) * cellSizeHeight;
+
+        // Normalize to 1.0.
+        gridCoordinates = centres.col(i) - gridCoordinates;
+        gridCoordinates(0) /= cellSizeWidth;
+        gridCoordinates(1) /= cellSizeHeight;
+
+        if (normalize)
+          centreCoordinates = gridCoordinates;
+
+        if (version == 1)
+        {
+          // Fill elements in the grid.
+          for (size_t k = 0; k < numBoxes; k++)
+          {
+            size_t s = 5 * k;
+            outputTemp(arma::span(gridX), arma::span(gridY),
+                arma::span(s, s + 1)) = centreCoordinates;
+            outputTemp(arma::span(gridX), arma::span(gridY),
+                arma::span(s + 2, s + 3)) = widthAndHeight.col(i);
+            outputTemp(gridX, gridY, s + 4) = 1.0;
+          }
+          outputTemp(gridX, gridY, 5 * numBoxes + labels.col(i)(0)) = 1;
+        }
+        else
+        {
+          size_t s = 0;
+          if (boundingBoxOffset.count({gridX, gridY}))
+          {
+            s = boundingBoxOffset[{gridX, gridY}] + 1;
+            boundingBoxOffset[{gridX, gridY}]++;
+          }
+          else
+            boundingBoxOffset.insert({{gridX, gridY}, s});
+
+          if (s > numBoxes)
+            continue;
+
+          size_t bBoxOffset = (5 + numClasses) * s;
+          outputTemp(arma::span(gridX), arma::span(gridY),
+              arma::span(bBoxOffset, bBoxOffset + 1)) = centreCoordinates;
+          outputTemp(arma::span(gridX), arma::span(gridY),
+              arma::span(bBoxOffset + 2,
+                  bBoxOffset + 3)) = widthAndHeight.col(i);
+          outputTemp(gridX, gridY, bBoxOffset + 4) = 1.0;
+          outputTemp(gridX, gridY, bBoxOffset + 5 + labels.col(i)(0)) = 1;
+        }
+      }
+    }
+  }
 };
 
 #endif
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -10,6 +10,7 @@ add_executable(models_test
                augmentation_tests.cpp
                ffn_model_tests.cpp
                dataloader_tests.cpp
+               preprocessor_tests.cpp
                utils_tests.cpp
 )
 

diff --git a/tests/preprocessor_tests.cpp b/tests/preprocessor_tests.cpp
@@ -0,0 +1,103 @@
+/**
+ * @file preprocessor_tests.cpp
+ * @author Kartik Dutt
+ *
+ * Tests for various functionalities of PreProcessor class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+
+#define BOOST_TEST_DYN_LINK
+#include <dataloader/preprocessor.hpp>
+#include <dataloader/dataloader.hpp>
+#include <boost/test/unit_test.hpp>
+
+using namespace boost::unit_test;
+
+BOOST_AUTO_TEST_SUITE(PreProcessorsTest);
+
+BOOST_AUTO_TEST_CASE(YOLOPreProcessor)
+{
+  arma::field<arma::vec> input;
+  input.set_size(1, 1);
+
+  arma::vec bBox(5);
+  bBox << 2 << 84 << 48 << 493 << 387 << arma::endr;
+  input(0, 0) = bBox;
+  arma::mat output;
+
+  // Single input check.
+  PreProcessor<arma::mat, arma::field<arma::vec>>::YOLOPreProcessor(
+      input, output, 1, 500, 387);
+  BOOST_REQUIRE_CLOSE(arma::accu(output), 8.3342, 1e-3);
+
+  input.clear();
+  input.set_size(1, 3);
+  input(0, 0) = bBox;
+
+  // Multiple bounding boxes check.
+  bBox.clear();
+  bBox.set_size(15);
+  bBox << 8 << 341 << 217 << 487 << 375 << 8 << 114 << 209 << 183 <<
+      298 << 19 << 237 << 110 << 320 << 176 << arma::endr;
+  input(0, 1) = bBox;
+
+  bBox.clear();
+  bBox.set_size(5);
+  bBox << 7 << 157 << 90 << 486 << 372 << arma::endr;
+  input(0, 2) = bBox;
+
+  PreProcessor<arma::mat, arma::field<arma::vec>>::YOLOPreProcessor(
+      input, output, 1, 500, 387);
+
+  arma::vec desiredSum(3);
+  desiredSum << 8.3342 << 18.4093 << 7.13195 << arma::endr;
+  for (size_t i = 0; i < output.n_cols; i++)
+    BOOST_REQUIRE_CLOSE(arma::accu(output.col(i)), desiredSum(i), 1e-3);
+
+  desiredSum << 4.6671 << 10.70465 << 4.065975 << arma::endr;
+  PreProcessor<arma::mat, arma::field<arma::vec>>::YOLOPreProcessor(
+      input, output, 3, 500, 387);
+  for (size_t i = 0; i < output.n_cols; i++)
+    BOOST_REQUIRE_CLOSE(arma::accu(output.col(i)), desiredSum(i), 1e-3);
+
+
+  // For better unit testing, we create a very small output grid of size
+  // numBoxes * 5 + numClasses, where numBoxes = 1, numClasses = 2.
+  // The grid width and height will be 2 x 2. Hence, for
+  // single input label, target map will be of size 1 x 2 x 2 x 7.
+  input.clear();
+  input.set_size(1, 1);
+  bBox.clear();
+  bBox.set_size(5);
+  bBox << 0 << 157 << 90 << 486 << 300 << arma::endr;
+  input(0, 0) = bBox;
+
+  PreProcessor<arma::mat, arma::field<arma::vec>>::YOLOPreProcessor(
+      input, output, 1, 500, 387, 2, 2, 1, 2);
+
+  arma::mat desiredOutput(2 * 2 * 7, 1);
+  desiredOutput.zeros();
+  // To convert 4d Tensor to 1D array use tensor.numpy().ravel().
+  desiredOutput(3) = 0.2860;
+  desiredOutput(7) = 0.0078;
+  desiredOutput(11) = 0.6580;
+  desiredOutput(15) = 0.5426;
+  desiredOutput(19) = 1.0;
+  desiredOutput(23) = 1.0;
+
+  // check for each value in matrix.
+  double tolerance = 1e-1;
+  for (size_t i = 0; i < output.n_elem; i++)
+  {
+    if (std::abs(output(i)) < tolerance / 2)
+      BOOST_REQUIRE_SMALL(desiredOutput(i), tolerance / 2);
+    else
+      BOOST_REQUIRE_CLOSE(desiredOutput(i), output(i), 1e-2);
+  }
+}
+
+BOOST_AUTO_TEST_SUITE_END();