solution-pose-estimation/solution_pose_estimation.cpp at master · tek5030/solution-pose-estimation · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#include "solution_pose_estimation.h"
#include "ar_example.h"
#include "homography_pose_estimator.h"
#include "plane_world_model.h"
#include "pnp_pose_estimator.h"
#include "moba_pose_estimator.h"
#include "scene_3d.h"
#include "opencv2/calib3d.hpp"
#include "opencv2/core/eigen.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/videoio.hpp"
#include <chrono>

// Make shorthand aliases for timing tools.
using Clock = std::chrono::high_resolution_clock;
using DurationInMs = std::chrono::duration<double, std::milli>;

// Convenient shorthand for distortion vectors.
namespace cv
{
using Vec5d = Vec<double, 5>;
}

// Struct for camera model data.
// I have chosen to use both eigen and opencv matrices for K,
// since I frequently will need both.
struct CameraModel
{
  Eigen::Matrix3d K;
  cv::Matx33d K_cv;
  cv::Vec5d dist_coeffs_cv;
  cv::Size2i img_size;

  Eigen::Vector2d principalPoint() const { return {K(0,2), K(1,2)}; }
  Eigen::Vector2d focalLengths() const { return {K(0,0), K(1,1)}; }
};


CameraModel setupCameraModel()
{
  // (Using parameters for webcam)
  // Set calibration matrix K.
  Eigen::Matrix3d K;
  K << 6.6051081297156020e+02, 0., 3.1810845757653777e+02,
      0., 6.6051081297156020e+02, 2.3995332228230293e+02,
      0., 0., 1.;

  // Create an OpenCV-version for convenience.
  cv::Matx33d K_cv;
  cv::eigen2cv(K, K_cv);

  // Set distortion coefficients [k1, k2, 0, 0, k3].
  cv::Vec5d dist_coeffs_cv{
      0., 2.2202255011309072e-01, 0., 0., -5.0348071005413975e-01};

  // Set image size.
  cv::Size2i img_size(640, 480);

  return CameraModel{K, K_cv, dist_coeffs_cv, img_size};
}


PlaneWorldModel createWorldModel()
{
  // Read "world" image corresponding to the chosen paper size.
  //std::string image_path = "../world_A4.png";
  std::string image_path = "../world_A3.png";
  cv::Mat world_image = cv::imread(image_path);
  if (world_image.empty())
  {
    throw std::runtime_error{"Could not find: " + image_path};
  }

  // Physical world sizes in meters.
  // Choose the paper size you have used.
  //const cv::Size2d a4_size{0.297, 0.210};
  const cv::Size2d a3_size{0.420, 0.297};

  // Grid size in meters.
  // This will be the physical size of axes in the visualization.
  //const double a4_grid_size{0.025};
  const double a3_grid_size{0.040};

  // Create world model.
  PlaneWorldModel world(world_image, a3_size, a3_grid_size);

  return world;
}


void runPoseEstimationSolution()
{
  // Get camera model parameters.
  const CameraModel camera_model = setupCameraModel();

  // Construct plane world model.
  const PlaneWorldModel world = createWorldModel();

  // Construct pose estimator.
  auto init_estimator = std::make_shared<HomographyPoseEstimator>(camera_model.K);
  MobaPoseEstimator pose_estimator(init_estimator, camera_model.principalPoint(), camera_model.focalLengths());

  // Construct AR visualizer.
  ARExample ar_example(world.gridSize());

  // Construct 3D visualizer.
  Scene3D scene_3D{world};

  // Setup camera stream.
  const int camera_id = 0; // Should be 0 or 1 on the lab PCs.
  cv::VideoCapture cap(camera_id);
  cap.set(cv::CAP_PROP_FRAME_WIDTH, camera_model.img_size.width);
  cap.set(cv::CAP_PROP_FRAME_HEIGHT, camera_model.img_size.height);

  if (!cap.isOpened())
  {
    throw std::runtime_error("Could not open camera " + std::to_string(camera_id));
  }

  for (;;)
  {
    // Read a frame from the camera.
    cv::Mat frame;
    cap >> frame;

    if (frame.empty())
    {
      throw std::runtime_error("Lost camera feed");
    }

    // Undistort the frame using the camera model.
    cv::Mat undistorted_frame;
    cv::undistort(frame, undistorted_frame, camera_model.K_cv, camera_model.dist_coeffs_cv);
    cv::Mat gray_frame;
    cv::cvtColor(undistorted_frame, gray_frame, cv::COLOR_BGR2GRAY);

    // Find the correspondences between the detected image points and the world points.
    // Measure how long the processing takes.
    auto start = Clock::now();
    std::vector<cv::Point2f> matched_image_points;
    std::vector<cv::Point3f> matched_world_points;
    world.findCorrespondences(gray_frame, matched_image_points, matched_world_points);
    auto end = Clock::now();
    DurationInMs correspondence_matching_duration = end - start;

    // Update the pose estimate.
    // Measure how long the processing takes.
    start = Clock::now();
    PoseEstimate estimate = pose_estimator.estimate(matched_image_points, matched_world_points);
    end = Clock::now();
    DurationInMs pose_estimation_duration = end - start;

    // Update Augmented Reality visualization.
    ar_example.update(undistorted_frame, estimate, camera_model.K,
                      correspondence_matching_duration.count(),
                      pose_estimation_duration.count());

    // Update 3D visualization.
    const auto was_stopped = scene_3D.update(undistorted_frame, estimate, camera_model.K);

    if (cv::pollKey() >= 0 || was_stopped)
    {
      break;
    }
  }
}