diff --git a/SIFTWrapper.py b/SIFTWrapper.py new file mode 100644 index 0000000..6a56b2c --- /dev/null +++ b/SIFTWrapper.py @@ -0,0 +1,52 @@ +import cv2 +import numpy as np + + +class SIFTWrapper: + + def __init__(self, im1, im2): + + self.im1 = im1 + self.im2 = im2 + + self.kps = [] + self.descs = [] + + def compute_keypoints(self): + + sift = cv2.xfeatures2d.SIFT_create(contrastThreshold=0.04, edgeThreshold=10, sigma=1.6) + kp1, des1 = sift.detectAndCompute(self.im1, None) + kp2, des2 = sift.detectAndCompute(self.im2, None) + + self.kps.append(kp1) + self.kps.append(kp2) + + self.descs.append(des1) + self.descs.append(des2) + + return kp1, des1, kp2, des2 + + def compute_matches(self, des1, des2): + + matcher = cv2.BFMatcher() + return matcher.knnMatch(des1, des2, k=2) + + def compute_best_matches(self, r=0.7): + + kp1, des1, kp2, des2 = self.compute_keypoints() + matches = self.compute_matches(des1, des2) + + good_matches = [] + for m, n in matches: + + # Compute the ratio between best match m, and second best match n here + if m.distance < r * n.distance: + good_matches.append(m) + + u1 = [] + u2 = [] + for match in good_matches: + u1.append(kp1[match.queryIdx].pt) + u2.append(kp2[match.trainIdx].pt) + + return np.asarray(u1), np.asarray(u2) diff --git a/main.py b/main.py new file mode 100644 index 0000000..fc2643b --- /dev/null +++ b/main.py @@ -0,0 +1,326 @@ +import sys + +import matplotlib.pyplot as plt +from PIL import Image, ExifTags +from scipy.optimize import least_squares + +from SIFTWrapper import * + +# Need this to plot 3d +from mpl_toolkits.mplot3d import axes3d + + +def triangulate(P0, P1, x1, x2): + + # P0,P1: projection matrices for each of two cameras/images + # x1,x1: corresponding points in each of two images (If using P that has been scaled by K, then use camera + # coordinates, otherwise use generalized coordinates) + A = np.array([[P0[2, 0] * x1[0] - P0[0, 0], P0[2, 1] * x1[0] - P0[0, 1], P0[2, 2] * x1[0] - P0[0, 2], P0[2, 3] * x1[0] - P0[0, 3]], + [P0[2, 0] * x1[1] - P0[1, 0], P0[2, 1] * x1[1] - P0[1, 1], P0[2, 2] * x1[1] - P0[1, 2], P0[2, 3] * x1[1] - P0[1, 3]], + [P1[2, 0] * x2[0] - P1[0, 0], P1[2, 1] * x2[0] - P1[0, 1], P1[2, 2] * x2[0] - P1[0, 2], P1[2, 3] * x2[0] - P1[0, 3]], + [P1[2, 0] * x2[1] - P1[1, 0], P1[2, 1] * x2[1] - P1[1, 1], P1[2, 2] * x2[1] - P1[1, 2], P1[2, 3] * x2[1] - P1[1, 3]]]) + + u, s, vt = np.linalg.svd(A) + + return vt[-1] + + +def intrinsic_cam_mtx(f, cu, cv): + + return np.asarray([[f, 0, cu], + [0, f, cv], + [0, 0, 1]]) + + +def plot_matches(img1, u1, img2, u2, skip=10): + + h = img1.shape[0] + w = img1.shape[1] + + fig = plt.figure(figsize=(12, 12)) + I_new = np.zeros((h, 2 * w, 3)).astype(int) + I_new[:, :w, :] = img1 + I_new[:, w:, :] = img2 + plt.imshow(I_new) + plt.scatter(u1[::skip, 0], u1[::skip, 1]) + plt.scatter(u2[::skip, 0] + w, u2[::skip, 1]) + [plt.plot([u1[0], u2[0] + w], [u1[1], u2[1]]) for u1, u2 in zip(u1[::skip], u2[::skip])] + plt.show() + + +# TODO: switch to piexif +def get_intrinsic_params(img): + + # Get relevant exif data + exif = {ExifTags.TAGS[k]: v for k, v in img._getexif().items() if k in ExifTags.TAGS} + + f_length_35 = int(exif['FocalLengthIn35mmFilm']) + img = np.asarray(img) + h, w, d = img.shape + + f_length = round(f_length_35 / 36 * w, 4) + sensor_size = (w // 2, h // 2) + + # print("focal length:", f_length) + # print("sensor size:", sensor_size, '\n') + + return f_length, sensor_size + + +def get_inliers(u1, u2, K): + + # Make homogeneous + u1 = np.column_stack((u1, np.ones(shape=(u1.shape[0], 1)))) + u2 = np.column_stack((u2, np.ones(shape=(u2.shape[0], 1)))) + + K_inv = np.linalg.inv(K) + + x1 = u1 @ K_inv.T + x2 = u2 @ K_inv.T + + E, inliers = cv2.findEssentialMat(x1[:, :2], x2[:, :2], np.eye(3), method=cv2.RANSAC, threshold=1e-3) + + # Flatten inlier mask for use w/ numy arrays( (n,1) -> (n,) ) + inliers = inliers.ravel().astype(bool) + + n_in, R, t, _ = cv2.recoverPose(E, x1[inliers, :2], x2[inliers, :2], cameraMatrix=K) + + # Can use this to plot inliers + # im_1 = plt.imread(sys.argv[1]) + # im_2 = plt.imread(sys.argv[2]) + # h, w, d = im_1.shape + # skip = 2 + # fig = plt.figure(figsize=(12, 12)) + # I_new = np.zeros((h, 2 * w, 3)).astype(int) + # I_new[:, :w, :] = im_1 + # I_new[:, w:, :] = im_2 + # plt.imshow(I_new) + # plt.scatter(u1[inliers, 0][::skip], u1[inliers, 1][::skip]) + # plt.scatter(u2[inliers, 0][::skip] + w, u2[inliers, 1][::skip]) + # [plt.plot([u1[0], u2[0] + w], [u1[1], u2[1]]) for u1, u2 in zip(u1[inliers][::skip], u2[inliers][::skip])] + # plt.show() + + # FIXME: + return np.hstack((R, t)), x1[inliers, :2], x2[inliers, :2], u1[inliers, :2], u2[inliers, :2] + # return np.hstack((R, t)), x1[inliers, :2], x2[inliers, :2] + + +def plot_3d(pts, ax, color): + pts = np.asarray(pts) + ax.scatter(pts[:, 0], pts[:, 1], pts[:, 2], c=color, alpha=1.0) + + +def get_point_estimates(P0, P1, x1, x2): + + point_estimates = [] + for x1_pt, x2_pt in zip(x1, x2): + general_point = triangulate(P0, P1, x1_pt, x2_pt) + general_point /= general_point[3] + + point_estimates.append(general_point[:3]) + + return np.asarray(point_estimates) + + +def compute_keypoints(im): + + sift = cv2.xfeatures2d.SIFT_create(contrastThreshold=0.04, edgeThreshold=10, sigma=1.6) + kp, des = sift.detectAndCompute(im, None) + + return kp, des + + +def compute_matches(des1, des2): + + matcher = cv2.BFMatcher() + return matcher.knnMatch(des1, des2, k=2) + + +def compute_best_matches(kp1, des1, kp2, des2, r): + + matches = compute_matches(des1, des2) + + good_matches = [] + for m, n in matches: + + # Compute the ratio between best match m, and second best match n here + if m.distance < r * n.distance: + good_matches.append(m) + + u1 = [] + u2 = [] + for match in good_matches: + u1.append(kp1[match.queryIdx].pt) + u2.append(kp2[match.trainIdx].pt) + + u1 = np.array(u1) + u2 = np.array(u2) + + return u1, u2 + + +def get_gcp_mask(orig_inliers, new_inliers): + + three_d_mask = np.full(shape=len(orig_inliers), fill_value=False) + u_mask = np.full(shape=len(new_inliers), fill_value=False) + + for i, new in enumerate(new_inliers): + for j, orig in enumerate(orig_inliers): + + if np.allclose(orig, new): + u_mask[i] = True + three_d_mask[j] = True + break + + return u_mask, three_d_mask + + +def estimate_translation(X_gcp, u_gcp, t0, R, P0): + """ + This function adjusts the translation vector such that the difference between the observed real world coordinates + X_gcp and the triangulated real world coordinates of u_gcp is minimized. + """ + # Note: u_gcp is of the form [u1, v1, u2, v2] + x1, x2 = u_gcp[:, :2], u_gcp[:, 2:] + t = t0.copy() + + def residuals(t, X_gcp, x1, x2, R, P0): + + t_tmp = t.copy() + P1 = np.column_stack((R, t_tmp)) + + xyz = get_point_estimates(P0, P1, x1, x2) + return xyz.ravel() - X_gcp.ravel() + + res = least_squares(residuals, t, method='lm', args=(X_gcp, x1, x2, R, P0)) + # print(res) + return res.x + + +def calc_final_error(P_1, P_2, shared_uv, observed_points): + + x2_inliers, x3_inliers = shared_uv[:, :2], shared_uv[:, 2:] + + predicted_points = get_point_estimates(P_1, P_2, x2_inliers, x3_inliers) + + rand_idx = np.random.randint(0, len(observed_points)) + print("Predicted point:", predicted_points[rand_idx]) + print("Observed point:", observed_points[rand_idx]) + print("Average distance between predicted and observed:", + np.sum(np.sqrt(np.sum(np.square(predicted_points - observed_points)))) / len(observed_points)) + + +def main(argv): + + if len(argv) != 3: + print("usage: Sequential Structure from Motion: ") + sys.exit(1) + + im1 = Image.open(argv[0]) + + f, sensor_size = get_intrinsic_params(im1) + cu, cv = sensor_size + + im_1 = plt.imread(argv[0]) + im_2 = plt.imread(argv[1]) + im_3 = plt.imread(argv[2]) + + # Create plot + fig = plt.figure() + ax = fig.gca(projection='3d') + + sw = SIFTWrapper(im_1, im_2) + u1, u2_1 = sw.compute_best_matches(0.7) + + # Note that here we are assuming all pictures came from same camera + # (A safe assumption for now, because we KNOW they came from the same camera) + K_cam = intrinsic_cam_mtx(f, cu, cv) + extrinsic_cam, x1_inliers, x2_1_inliers, u1_inliers, u2_1_inliers = get_inliers(u1, u2_1, K_cam) + + # Note that we are using generalized + # camera coordinates, so we do not + # need to multiply by K(For either of P0, P1) + P_0 = np.array([[1, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0]]) + + P_1 = extrinsic_cam + + # Estimate points + point_estimates = get_point_estimates(P_0, P_1, x1_inliers, x2_1_inliers) + + # Compute keypoints for image 3 + kp3, des3 = compute_keypoints(im_3) + + # Find good matches b/w images 2 and 3 + # Note that we relax r to allow for more full matches + u2_2, u3 = compute_best_matches(sw.kps[1], sw.descs[1], kp3, des3, r=0.75) + + # Estimate pose(using recovered essential matrix) and get inliers. + # These inliers will be used for point triangulation + extrinsic_cam2, x2_2_inliers, x3_inliers, u2_2_inliers, u3_inliers = get_inliers(u2_2, u3, K_cam) + + P_1_gen = np.row_stack((P_1, np.asarray([0, 0, 0, 1]))) + P_2_prime_gen = np.row_stack((extrinsic_cam2, np.asarray([0, 0, 0, 1]))) + + # Note that the rotation of this pose is correct wrt P1's coordinate system + # the translation is also correct, up to scale. + P_2_init = (P_1_gen @ P_2_prime_gen)[:3] + + # FIXME: Something with get_gcp_mask is probably broken. + # Get a shape error in estimate_translation... + + # Find set of points for which we have 3D estimates + u_mask, three_d_mask = get_gcp_mask(u2_1_inliers, u2_2_inliers) + + X_gcp = [] + + # FIXME: + # X_gcp = point_estimates[three_d_mask] + # u_gcp = np.column_stack((x2_2_inliers[u_mask], x3_inliers[u_mask])) + + # TODO: Could make this faster + shared_points = [] + for x2_2, x3 in zip(x2_2_inliers, x3_inliers): + for i, x2_1 in enumerate(x2_1_inliers): + + if np.allclose(x2_2, x2_1): + + # Use x2_1 inliers b/c they assume that the camera in img 1 is + # the origin. + shared_points.append((x2_1, x3)) + X_gcp.append(point_estimates[i]) + + X_gcp = np.array(X_gcp) + shared_points = np.array(shared_points).reshape(len(shared_points), 4) + + t0 = P_2_init[:, 3] + R = P_2_init[:, :3] + + # Minimize projection error between observed and predicted 3d coordinates + # This essentially fixes the scaling on the translation vector + # Note that we could make this a bit easier / faster by optimizing only the scale + # of the translation vector + t_est = estimate_translation(X_gcp, shared_points, t0, R, P_1) + + P_2 = np.column_stack((R, t_est)) + + second_pt_estimates = get_point_estimates(P_1, P_2, x2_2_inliers, x3_inliers) + + u_mask_inv = np.invert(u_mask) + new_points = second_pt_estimates[u_mask_inv] + + plot_3d(point_estimates, ax, 'r') + plot_3d(new_points, ax, 'b') + plt.show() + + calc_final_error(P_1, P_2, shared_points, X_gcp) + + num_new = len(new_points) + num_all = len(second_pt_estimates) + print(num_new, "new points were recovered out of", num_all, + "possible (~{0:.0f}%)".format((num_new / num_all) * 100)) + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/sword/DSC03950.JPG b/sword/DSC03950.JPG new file mode 100644 index 0000000..1417cfd Binary files /dev/null and b/sword/DSC03950.JPG differ diff --git a/sword/DSC03951.JPG b/sword/DSC03951.JPG new file mode 100644 index 0000000..9f5efe0 Binary files /dev/null and b/sword/DSC03951.JPG differ diff --git a/sword/DSC03952.JPG b/sword/DSC03952.JPG new file mode 100644 index 0000000..eebb614 Binary files /dev/null and b/sword/DSC03952.JPG differ diff --git a/sword/DSC03953.JPG b/sword/DSC03953.JPG new file mode 100644 index 0000000..6fe4a9a Binary files /dev/null and b/sword/DSC03953.JPG differ diff --git a/sword/DSC03954.JPG b/sword/DSC03954.JPG new file mode 100644 index 0000000..01253f4 Binary files /dev/null and b/sword/DSC03954.JPG differ diff --git a/sword/DSC03955.JPG b/sword/DSC03955.JPG new file mode 100644 index 0000000..a3f733f Binary files /dev/null and b/sword/DSC03955.JPG differ diff --git a/sword/DSC03956.JPG b/sword/DSC03956.JPG new file mode 100644 index 0000000..a642063 Binary files /dev/null and b/sword/DSC03956.JPG differ diff --git a/sword/DSC03957.JPG b/sword/DSC03957.JPG new file mode 100644 index 0000000..35ffa07 Binary files /dev/null and b/sword/DSC03957.JPG differ diff --git a/sword/DSC03958.JPG b/sword/DSC03958.JPG new file mode 100644 index 0000000..b5a814f Binary files /dev/null and b/sword/DSC03958.JPG differ