diff --git a/.ipynb_checkpoints/project_description-checkpoint.ipynb b/.ipynb_checkpoints/project_description-checkpoint.ipynb new file mode 100644 index 0000000..0e60103 --- /dev/null +++ b/.ipynb_checkpoints/project_description-checkpoint.ipynb @@ -0,0 +1,166 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Project 2: Image Stitcher\n", + "## Assigned: 02.01.2019\n", + "## Due Date: TBD (probably 02.20.2019)\n", + "\n", + "Panoramic photography is ubiquitous, with nearly every digital camera having a mode dedicated to doing it. Here's an example from the Italian Alps:\n", + "\n", + "Note the extreme aspect ratio: much larger than the 4:3 or 3:2 that is typical of most cameras; suffice to say, the camera that stook this picture did not have a sensor that was this wide. So how are these things made? Stated simply, multiple images are taken, mutually identifiable points are located in each of these images, and the images are warped such that these points are coincident. The matching stage might look like this:\n", + "\n", + "\n", + "For this project, you will code your own image stitcher from scratch. Despite the conceptual simplicity of this operation, there are a surprising number of challenges that need to be addressed. A general framework for a stitcher might look like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from harris_response import *\n", + "\n", + "\n", + "class Stitcher(object):\n", + " def __init__(self, image_1, image_2):\n", + " \n", + " # Convert both images to gray scale\n", + " image_1 = np.mean(image_1, -1)\n", + " image_2 = np.mean(image_2, -1)\n", + " \n", + " self.images = [image_1, image_2]\n", + "\n", + " def find_keypoints(self, image, n_keypoints):\n", + " \n", + " \"\"\"\n", + " Step 1: This method locates features that are \"good\" for matching. To do this we will implement the Harris \n", + " corner detector\n", + " \"\"\"\n", + " \n", + " filter_size = 5\n", + " \n", + " # Setup gauss filter\n", + " gauss_filter = Filter.make_gauss((filter_size, filter_size), 2)\n", + " \n", + " # Compute smoothed harris response\n", + " out = convolve(compute_harris_response(image, gauss_filter), gauss_filter) # Smooth results\n", + " \n", + " # Find some good features to match\n", + " x, y = adaptive_non_maximal_suppression(out, n_keypoints, filter_size)\n", + " \n", + " # Return the locations\n", + " return x, y\n", + " \n", + " def generate_descriptors(self):\n", + " \"\"\"\n", + " Step 2: After identifying relevant keypoints, we need to come up with a quantitative description of the \n", + " neighborhood of that keypoint, so that we can match it to keypoints in other images.\n", + " \"\"\"\n", + " im1_kpt_x, im1_kpt_y = self.find_keypoints(self.images[0], 100) \n", + " im2_kpt_x, im2_kpt_y = self.find_keypoints(self.images[1], 100) \n", + "\n", + " ofs = l // 2\n", + " im1_desc_out = []\n", + " im1_x_out = []\n", + " im1_y_out = []\n", + " im2_desc_out = []\n", + " im2_x_out = []\n", + " im2_y_out = []\n", + " \n", + " # check for u and v to be same dimensions\n", + " for i in range(len(im1_kpt_x)):\n", + " sub = self.images[0][im1_kpt_x[i]-ofs:im1_kpt_x[i]+ofs+1, im1_kpt_y[i]-ofs:im1_kpt_y[i]+ofs+1] \n", + " if sub.shape[0] == l and sub.shape[1] == l: \n", + " im1_x_out.append(im1_kpt_x[i])\n", + " im1_y_out.append(im1_kpt_y[i])\n", + " im1_desc_out.append(sub)\n", + " \n", + " for i in range(len(im2_kpt_x)):\n", + " sub2 = self.images[1][im2_kpt_x[i]-ofs:im2_kpt_x[i]+ofs+1, im2_kpt_y[i]-ofs:im2_kpt_y[i]+ofs+1]\n", + " if sub2.shape[0] == l and sub2.shape[1] == l:\n", + " im2_x_out.append(im2_kpt_x[i])\n", + " im2_y_out.append(im2_kpt_y[i])\n", + " im2_desc_out.append(sub2)\n", + " \n", + " return np.stack(im1_desc_out), np.asarray(im1_y_out), np.asarray(im1_x_out), np.stack(im2_desc_out), np.asarray(im2_y_out), np.asarray(im2_x_out)\n", + " \n", + " \n", + " def match_keypoints(self,r= 0.7):\n", + " \"\"\"\n", + " Step 3: Compare keypoint descriptions between images, identify potential matches, and filter likely\n", + " mismatches\n", + " \"\"\"\n", + " im1_desc , im1_y ,im1_x ,im2_desc, im2_y ,im2_x = self.generate_descriptors()\n", + " \n", + " match_out = []\n", + " for index1 , D1 in enumerate (im1_desc):\n", + " smallest = np.inf\n", + " temp_index2 = 0\n", + " D1_hat = (D1 - np.mean(D1)) / np.std(D1)\n", + " \n", + " for index2, D2 in enumerate (im2_desc):\n", + " D2_hat = (D2 - np.mean(D2)) / np.std(D2)\n", + " E =np.sum(np.square(D1_hat-D2_hat))\n", + " if E < smallest:\n", + " smallest = E\n", + " temp_index2 = index2\n", + " match_out.append((index1 , temp_index2))\n", + " # delete elemntes from match_out < r \n", + " return np.asarray(match_out)\n", + " \n", + " def find_homography(self):\n", + " \"\"\"\n", + " Step 4: Find a linear transformation (of various complexities) that maps pixels from the second image to \n", + " pixels in the first image\n", + " \"\"\"\n", + " \n", + " def stitch(self):\n", + " \"\"\"\n", + " Step 5: Transform second image into local coordinate system of first image, and (perhaps) perform blending\n", + " to avoid obvious seams between images.\n", + " \"\"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will populate these functions over the next several weeks, a process that will involve delving into some of the most elementary operations in digital signal processing. \n", + "\n", + "As a test case, apply your stitcher to at least four overlapping images that you've taken. With a stitcher that works on two images, more images can be added by applying the method recursively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Filter.py b/Filter.py new file mode 100644 index 0000000..04fb325 --- /dev/null +++ b/Filter.py @@ -0,0 +1,52 @@ +import numpy as np + + +class Filter: + + @staticmethod + def make_average(shape): + """Creates average filter with given shape""" + + # Make sure filter is only 2D and square + assert len(shape) == 2 + assert shape[0] == shape[1] + + return np.ones(shape) * 1 / np.prod(shape) + + @staticmethod + def make_gauss(shape, sigma=1): + """Creates a Gaußian filter with given shape and standard deviation""" + + # Make sure filter is only 2D and square + assert len(shape) == 2 + assert shape[0] == shape[1] + + range_end = shape[0] // 2 + + xs = np.arange(-range_end, range_end + 1, 1) + ys = np.arange(-range_end, range_end + 1, 1) + + xx, yy = np.meshgrid(xs, ys, indexing='xy') + + # Compute gaussian + g_filter = np.exp(-((xx ** 2 + yy ** 2) / (2 * sigma ** 2))) + + # Normalize + g_filter /= np.sum(g_filter) + + return g_filter + + @staticmethod + def make_sobel_u(): + """Creates a Sobel filter for approximating the discrete derivative of the image wrt u (aka x)""" + + return np.array([[-1, 0, 1], + [-2, 0, 2], + [-1, 0, 1]]) + + @staticmethod + def make_sobel_v(): + """Creates a Sobel filter for approximating the discrete derivative of the image wrt v (aka y)""" + return np.array([[-1, -2, -1], + [ 0, 0, 0], + [ 1, 2, 1]]) diff --git a/Stitcher.py b/Stitcher.py new file mode 100644 index 0000000..7e6523b --- /dev/null +++ b/Stitcher.py @@ -0,0 +1,228 @@ +from harris_response import * +import skimage.transform as skt + + +class Stitcher(object): + + def __init__(self, image_1, image_2): + + self.images = [image_1, image_2] + + def find_keypoints(self, image, n_keypoints): + + """ + Step 1: This method locates features that are "good" for matching. To do this we will implement the Harris + corner detector + """ + + filter_shape = (5, 5) + + # Compute smoothed harris response + out = convolve(compute_harris_response(image, filter_shape), + Filter.make_gauss(filter_shape, 2)) # Smooth results + + # Find some good features to match + # form: [u, v] + x, y = anms_fast(h=out, n=n_keypoints) + + # Return the locations + # form: [u, v] + return x, y + + def generate_descriptors(self, img, l=21): + """ + Step 2: After identifying relevant keypoints, we need to come up with a quantitative description of the + neighborhood of that keypoint, so that we can match it to keypoints in other images. + """ + u, v = self.find_keypoints(img, 100) + + ofs = l // 2 + + d_out = [] + u_out = [] + v_out = [] + + m = len(img) + n = len(img[0]) + + # check for u and v to be same dimensions + for i in range(len(u)): + + c_x = v[i] + c_y = u[i] + + # If we cannot get a description for key point, throw it out + if c_x + ofs > m or c_x - ofs < 0 or c_y + ofs > n or c_y - ofs < 0: + continue + + sub = img[v[i] - ofs: v[i] + ofs + 1, u[i] - ofs: u[i] + ofs + 1] + if sub.shape[0] == l and sub.shape[1] == l: + u_out.append(u[i]) + v_out.append(v[i]) + d_out.append(sub) + + return np.stack(d_out), np.asarray(u_out, dtype=int), np.asarray(v_out, dtype=int) + + def D_hat(self, d): + return (d - d.mean()) / np.std(d) + + def error(self, d1, d2): + return np.sum((d1 - d2) ** 2) + + def match_keypoints(self, r=0.7): + """ + Step 3: Compare keypoint descriptions between images, identify potential matches, and filter likely + mismatches + """ + + d1, u1, v1 = self.generate_descriptors(self.images[0], 0) + d2, u2, v2 = self.generate_descriptors(self.images[1], 1) + + match_out = [] + value_list = [] + for i, D1 in enumerate(d1): + + smallest = np.inf + index2_smallest = 0 + smallest2 = np.inf + index2_smallest2 = 0 + D1_hat = (D1 - np.mean(D1)) / np.std(D1) + value = 0 + value2 = 0 + + for j, D2 in enumerate(d2): + D2_hat = (D2 - np.mean(D2)) / np.std(D2) + E = np.sum(np.square(D1_hat - D2_hat)) + if E < smallest: # best match + smallest = E + value = E + index2_smallest = j + np.delete(d1, index2_smallest, 0) + np.delete(d2, index2_smallest, 0) + + for j, D2 in enumerate(d2): + + D2_hat = (D2 - np.mean(D2)) / np.std(D2) + E = np.sum(np.square(D1_hat - D2_hat)) + + if E < smallest2: # the second best match + smallest2 = E + value2 = E + index2_smallest = j + + if value < (r * value2): + match_out.append((u1[i], v1[i], u2[index2_smallest], v2[index2_smallest])) + value_list.append(value) + + return np.asarray(match_out) + + def find_homography(self, uv, uv2): + """ + Step 4: Find a linear transformation (of various complexities) that maps pixels from the second image to + pixels in the first image + """ + + if uv.shape != uv2.shape: + raise ValueError("X and X_prime must have matching shapes") + if uv.shape[0] < 4: + raise ValueError("Not enough points") + + # matches = np.column_stack(uv, uv2) + + A = np.zeros((2 * len(uv), 9)) + + for i in range(len(uv)): + A[2 * i, :] = [0, 0, 0, -uv[i, 0], -uv[i, 1], -1, uv2[i, 1] * uv[i, 0], uv2[i, 1] * uv[i, 1], uv2[i, 1]] + A[2 * i + 1, :] = [uv[i, 0], uv[i, 1], 1, 0, 0, 0, -uv2[i, 0] * uv[i, 0], -uv2[i, 0] * uv[i, 1], -uv2[i, 0]] + + # print(A) + U, Sigma, Vt = np.linalg.svd(A) + + H = Vt[-1, :].reshape((3, 3)) + H /= H[2, 2] + + return H + + def RANSAC(self, number_of_iterations=10, n=10, r=3, d=8): + + H_best = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + num_inliers = 0 + + matches = self.match_keypoints() # matches should be of the form [u1, v1, u2, v2] + + for i in range(number_of_iterations): + # 1. Select a random sample of length n from the matches + np.random.shuffle(matches) + sub = matches[0:n, :] + test = matches[n:, :] + + # 2. Compute a homography based on these points using the methods given above + H = self.find_homography(sub[:, 0:2], sub[:, 2:]) + + # 3. Apply this homography to the remaining points that were not randomly selected + test_p = test[:, 0:2] + test_p = np.column_stack((test_p, np.ones(len(test_p)))) + uv_p = (H @ test_p.T).T + test_u = uv_p[:, 0] / uv_p[:, 2] + test_v = uv_p[:, 1] / uv_p[:, 2] + + # 4. Compute the residual between observed and predicted feature locations + R = np.zeros_like(test_u) + for i in range(len(test_p)): + R[i] = np.sqrt((test_u[i] - test[i, 2]) ** 2 + (test_v[i] - test[i, 3]) ** 2) + + # 5. Flag predictions that lie within a predefined distance r from observations as inliers + inl = np.zeros_like(R) + for i in range(len(inl)): + if R[i] < r: + inl[i] = 1 + else: + inl[i] = 0 + num_inl = np.sum(inl) + + # 6. If number of inliers is greater than the previous best + # and greater than a minimum number of inliers d, + # 7. update H_best + # 8. update list_of_inliers + if num_inl > num_inliers: + if num_inl > d: + H_best = H + num_inliers = num_inl + + return H_best, num_inliers + + def stitch(self): + """ + Step 5: Transform second image into local coordinate system of first image, and (perhaps) perform blending + to avoid obvious seams between images. + """ + H_best = self.RANSAC(10, 10, 3, 8) + + im1 = self.images[0] + im2 = self.images[1] + + transform = skt.ProjectiveTransform(H_best) + im_2_warped = skt.warp(im2, transform, output_shape=(im1.shape[0], im1.shape[1] + (int(im1.shape[1] * 0.4)))) + + im1t = np.zeros_like(im_2_warped) + + for v in range(im1.shape[0]): + for u in range(im1.shape[1]): + if im1[v, u] != 0: + im1t[v, u] = im1[v, u] + + img_out = np.zeros_like(im_2_warped) + + for v in range(img_out.shape[0]): + for u in range(img_out.shape[1]): + if im1t[v, u] == 0 and im_2_warped[v, u] == 0: + img_out[v, u] = 0 + + elif im1t[v, u] != 0 and im_2_warped[v, u] == 0: + img_out[v, u] = im1[v, u] + elif im1t[v, u] == 0. and im_2_warped[v, u] != 0: + img_out[v, u] = im_2_warped[v, u] + else: + img_out[v, u] = (im_2_warped[v, u] + im1[v, u]) / 2 + + return img_out diff --git a/convolution.py b/convolution.py new file mode 100644 index 0000000..083a423 --- /dev/null +++ b/convolution.py @@ -0,0 +1,92 @@ +import numpy as np +import matplotlib.pyplot as plt + + +def convolve(g, h): + + """ + Convolves g with h, returns valid portion + :param g: image + :param h: filter + :return: result of convolution, shape=( max( len(g), len(h) ), max( len(g[0]), len(h[0]) ) ) + result of convolution, shape=( max( len(g), len(h) ) - min( len(g), len(h) ) + 1, + max( len(g[0]), len(h[0]) ) - min( len(g[0]), len(h[0]) ) + 1 ) + """ + + out_m = max(len(g), len(h)) - min(len(g), len(h)) + 1 + out_n = max(len(g[0]), len(h[0])) - min(len(g[0]), len(h[0])) + 1 + + out_array = np.zeros(shape=(out_m, out_n)) + + # Iterate over entire output image + for u in range(len(out_array)): + for v in range(len(out_array[u])): + # One-liner to do convolution + out_array[u][v] = np.sum(np.multiply(h, g[u: u + len(h), v: v + len(h[0])])) + + return out_array + + +def _pad_input(pad_x, pad_y, g, padding_scheme='fill', fill_val=0): + + if padding_scheme == 'fill': + + v_buff = np.zeros(shape=(pad_x, len(g[0]))) + fill_val + g = np.vstack((v_buff, g, v_buff)) + + h_buff = np.zeros(shape=(len(g), pad_y)) + fill_val + g = np.hstack((h_buff, g, h_buff)) + + elif padding_scheme == 'nn': + + v_buff = np.tile(np.zeros(shape=(1, len(g[0]))) + g[0], (pad_x, 1)) + g = np.vstack((v_buff, g, v_buff)) + + # Have to do some transposition to get column vectors + h_buff = np.tile((np.zeros(shape=(len(g), 1)).T + g[:, 0]).T, (1, pad_y)) + g = np.hstack((h_buff, g, h_buff)) + + # TODO: fill in diagonals? + + elif padding_scheme == 'mirror': + raise NotImplementedError("Padding scheme {0} is not yet implemented".format(padding_scheme)) + + return g + + +def non_linear_convolve(g, filter_size): + + # Calculate output shape + out_m = max(len(g), filter_size) + out_n = max(len(g[0]), filter_size) + + # Calculate pad size + pad_x = filter_size - 1 + pad_y = filter_size - 1 + + g = _pad_input(pad_x, pad_y, g) + + out_array = np.empty(shape=(out_m, out_n), dtype=bool) + + # Iterate over entire output image + for u in range(len(out_array)): + for v in range(len(out_array[u])): + # Calculate max in neighborhood + out_array[u][v] = np.equal(g[u + pad_x, v + pad_y], np.max(g[u: u + filter_size, v: v + filter_size])) + + return out_array + + +def img_convolution(img_fname, img_filter): + + img_color = plt.imread(img_fname) + img_gray = img_color.mean(axis=2) # Convert to greyscale + + out = convolve(g=img_gray, h=img_filter) + + # Plot + plt.imshow(img_gray, cmap='gray') + plt.show() + + plt.imshow(out, cmap='gray') + plt.show() diff --git a/harris_response.py b/harris_response.py new file mode 100644 index 0000000..e5ee286 --- /dev/null +++ b/harris_response.py @@ -0,0 +1,257 @@ +from convolution import * +from Filter import * +import numba + + +def compute_harris_response(img, filter_shape): + + gauss = Filter.make_gauss(shape=filter_shape, sigma=2) + img = convolve(img, gauss) + + # NOTE: (*) = convolution + + # Make respective Sobel filters for computing gradients + S_u = Filter.make_sobel_u() + S_v = Filter.make_sobel_v() + + # Compute gradient of image with respect to u = I_u + di_du = convolve(img, S_u) + + # Compute gradient of image with respect to v = I_v + di_dv = convolve(img, S_v) + + # Compute squares of gradient + # I_uu = w (*) np.multiply(I_u, I_u) + di_du_sq = convolve(di_du ** 2, gauss) + di_dv_sq = convolve(di_dv ** 2, gauss) + + # Compute product of gradients + di_du_di_dv = convolve(np.multiply(di_du, di_dv), gauss) + + # Finally, compute the harris response + # Note: we add the 1e-16 to prevent division by 0. + # 1.0 + 1e-16 = 1.0, so we do not have to worry about screwing up division + return np.divide((np.multiply(di_du_sq, di_dv_sq) - di_du_di_dv ** 2), (di_du_sq + di_dv_sq) + 1e-16) + + +def non_maximal_suppression(h, n=100, max_filter_size=3): + """Returns two vectors, one for x point location, one for y point location. + These points correspond to local maxima point locations""" + + local_max_pts = local_max_loc_and_intensity(h) + + # Filter out local maxima with relatively weak responses + local_max_pts = local_max_pts[local_max_pts[:, 2] > 1e-5] + + # Sort by intensity and reverse + local_max_pts = np.flip(local_max_pts[local_max_pts[:, 2].argsort(kind='mergesort')], axis=0) + + # Keep best n + return local_max_pts[:n, 1], local_max_pts[:n, 0] + + +def local_max_loc_and_intensity(h): + + """ + Helper function to compute locations of local maxima + :param h: harris repsonse + :param max_filter_size: size of filter window to be used + :return: matrix that is shape of number of local maxima. + First column is i point location, second column is j point location + Last column is value of harris response at that location + """ + bool_mask = non_linear_convolve(h, 3) + + # Get list of points where mask is true + i, j = np.where(bool_mask == True) + + # Grab the corresponding intensities + intensities = h[i, j] + + # (n,) -> (n, 1) + i = np.expand_dims(i, axis=1) + j = np.expand_dims(j, axis=1) + intensities = np.expand_dims(intensities, axis=1) + + # form: [v, u, intensities] + return np.hstack((i, j, intensities)) + + +@numba.jit(nopython=True) +def point_distance_sq(p1, p2): + """Computes squared euclidean distance between p1 and p2""" + + diff_x = (float(p1[0]) - float(p2[0])) + diff_y = (float(p1[1]) - float(p2[1])) + + return (diff_x * diff_x) + (diff_y * diff_y) + + +def adaptive_non_maximal_suppression(h, n, fname=None, c=0.9): + + # pt = point + # curr = current + # loc = location + # idx = index + # dist = distance + # num = number + # pot = potential + # kpts = key points + + print('running ANMS on', fname) + + max_loc_and_intensity = pre_process_loc_and_intensity(local_max_loc_and_intensity(h)) + + local_max_pts_loc = max_loc_and_intensity[:, 0:2] + local_max_pts_intensity = max_loc_and_intensity[:, 2] + + num_pot_kpts = len(local_max_pts_loc) + + # this list has index in the closest point w/ higher harris response + closest_pt_loc = np.empty(shape=(num_pot_kpts, 2), dtype=np.int) + closest_pt_dist = np.empty(shape=(num_pot_kpts, 1)) + closest_pt_dist.fill(np.inf) + + # for each point, find closest point w/ higher harris response + for i in range(num_pot_kpts): + for j in range(num_pot_kpts): + + # Don't want to compare point to itself + if i == j: + continue + + # Only compute distance if response is stronger(thanks to Dr. Douglas Brinkerhoff) + elif local_max_pts_intensity[j] * c > local_max_pts_intensity[i]: + + new_dist = point_distance_sq(local_max_pts_loc[i], local_max_pts_loc[j]) + + # If new point has higher intensity and is closer, record it + if new_dist < closest_pt_dist[i]: + closest_pt_loc[i] = local_max_pts_loc[j] + closest_pt_dist[i] = new_dist + + # If dist is still infinity, it is the 'best' local max + cond = closest_pt_dist[:, 0] == np.inf + closest_pt_loc[cond] = local_max_pts_loc[cond] + + # Create matrix of [point_location_j, point_location_i, dist to closest point] + pt_dist_mtx = np.hstack((closest_pt_loc, closest_pt_dist)) + + pt_dist_mtx = np.flipud(pt_dist_mtx[pt_dist_mtx[:, 2].argsort(kind='mergesort')]) + + points_to_keep = set() + + for i in range(len(pt_dist_mtx)): + + current_point = (int(pt_dist_mtx[i, 1]), int(pt_dist_mtx[i, 0])) + + if current_point not in points_to_keep: + points_to_keep.add(current_point) + + if len(points_to_keep) == n: + break + + print('ANMS finished', fname) + + # Save these points, b/c this process is very slow + if fname: + pts_to_keep_mtx = np.asarray(list(points_to_keep)) + np.save(fname, pts_to_keep_mtx) + + return zip(*points_to_keep) + + +@numba.jit(nopython=True, parallel=True) +def anms_helper(local_max_pts_loc, local_max_pts_intensity, c): + + num_pot_kpts = len(local_max_pts_loc) + + # this list has index in the closest point w/ higher harris response + closest_pt_loc = np.zeros(shape=(num_pot_kpts, 2), dtype=np.int64) + closest_pt_dist = np.zeros(shape=(num_pot_kpts, 1)) + closest_pt_dist.fill(np.inf) + + # for each point, find closest point w/ higher harris response + for i in range(num_pot_kpts): + for j in range(num_pot_kpts): + + # Don't want to compare point to itself + if i == j: + continue + + # Only compute distance if response is stronger(thanks to Dr. Douglas Brinkerhoff) + elif local_max_pts_intensity[j] * c > local_max_pts_intensity[i]: + + new_dist = point_distance_sq(local_max_pts_loc[i], local_max_pts_loc[j]) + + # If new point has higher intensity and is closer, record it + if new_dist < closest_pt_dist[i, 0]: + closest_pt_loc[i] = local_max_pts_loc[i] + closest_pt_dist[i] = new_dist + + return closest_pt_loc, closest_pt_dist + + +def pre_process_loc_and_intensity(max_loc_and_intensity): + + # Filter out local maxima with relatively weak responses + max_loc_and_intensity = max_loc_and_intensity[max_loc_and_intensity[:, 2] > 1e-3] + + # Sort by strength of harris response + # Note: argsort sorts in ascending order(ie: -2, -1, 0, 1, 2, ..., inf) + # but we want descending order, so we flip to reverse row order. + max_loc_and_intensity = np.flipud(max_loc_and_intensity[max_loc_and_intensity[:, 2].argsort(kind='mergesort')]) + + # Keep top 50%(this could be converted to a percentage, + # just being lazy.(could also pick number / percentage of points to throw + # out and then do so by randomly sampling from indices, this has + # added benefit of [theoretically] keeping the same spatial + # distribution as the original points(thanks to Dr. Douglas Brinkerhoff)) + num_to_keep = len(max_loc_and_intensity) // 2 + + return max_loc_and_intensity[:num_to_keep, :] + + +def anms_fast(h, n, c=0.9): + + # pt = point + # curr = current + # loc = location + # idx = index + # dist = distance + # num = number + # pot = potential + # kpts = key points + + # Compute and pre-process local maxima + # form: [v, u, intensities] + max_loc_and_intensity = pre_process_loc_and_intensity(local_max_loc_and_intensity(h)) + + local_max_pts_loc = max_loc_and_intensity[:, 0:2] + local_max_pts_intensity = max_loc_and_intensity[:, 2] + + closest_pt_loc, closest_pt_dist = anms_helper(local_max_pts_loc, local_max_pts_intensity, c) + + # If dist is still infinity, it is the 'best' local max + cond = closest_pt_dist[:, 0] == np.inf + closest_pt_loc[cond] = local_max_pts_loc[cond] + + # Create matrix of [point_location_j, point_location_i, dist to closest point] + pt_dist_mtx = np.hstack((closest_pt_loc, closest_pt_dist)) + + pt_dist_mtx = np.flipud(pt_dist_mtx[pt_dist_mtx[:, 2].argsort(kind='mergesort')]) + + points_to_keep = set() + + for i in range(len(pt_dist_mtx)): + + current_point = (int(pt_dist_mtx[i, 1]), int(pt_dist_mtx[i, 0])) + + if current_point not in points_to_keep: + points_to_keep.add(current_point) + + if len(points_to_keep) == n: + break + + # form: [u, v, intensities] + return zip(*points_to_keep) diff --git a/p1.jpg b/p1.jpg new file mode 100644 index 0000000..e41dda6 Binary files /dev/null and b/p1.jpg differ diff --git a/p2.jpg b/p2.jpg new file mode 100644 index 0000000..6cd56fd Binary files /dev/null and b/p2.jpg differ diff --git a/project_description.ipynb b/project_description.ipynb index 421c6d0..169b710 100644 --- a/project_description.ipynb +++ b/project_description.ipynb @@ -18,43 +18,243 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ + "from harris_response import *\n", + "import skimage.transform as skt\n", + "\n", + "\n", "class Stitcher(object):\n", - " def __init__(self,image_1,image_2):\n", - " self.images = [image_1,image_2]\n", - " \n", - " def find_keypoints(self):\n", + "\n", + " def __init__(self, image_1, image_2):\n", + "\n", + " # Convert both images to gray scale\n", + " image_1 = np.mean(image_1, -1)\n", + " image_2 = np.mean(image_2, -1)\n", + "\n", + " self.images = [image_1, image_2]\n", + "\n", + " def find_keypoints(self, image, n_keypoints):\n", + "\n", " \"\"\"\n", - " Step 1: This method locates features that are \"good\" for matching. To do this we will implement the Harris \n", + " Step 1: This method locates features that are \"good\" for matching. To do this we will implement the Harris\n", " corner detector\n", " \"\"\"\n", - " \n", - " def generate_descriptors(self):\n", + "\n", + " filter_size = 5\n", + "\n", + " # Setup gauss filter\n", + " gauss_filter = Filter.make_gauss((filter_size, filter_size), 2)\n", + "\n", + " # Compute smoothed harris response\n", + " out = convolve(compute_harris_response(image, gauss_filter), gauss_filter) # Smooth results\n", + "\n", + " # Find some good features to match\n", + " x, y = adaptive_non_maximal_suppression(out, n_keypoints, filter_size)\n", + "\n", + " # Return the locations\n", + " return x, y\n", + "\n", + " def generate_descriptors(self, img, l=21):\n", " \"\"\"\n", - " Step 2: After identifying relevant keypoints, we need to come up with a quantitative description of the \n", + " Step 2: After identifying relevant keypoints, we need to come up with a quantitative description of the\n", " neighborhood of that keypoint, so that we can match it to keypoints in other images.\n", " \"\"\"\n", - " \n", - " def match_keypoints(self):\n", + " v, u = self.find_keypoints(img, 100)\n", + "\n", + " ofs = l // 2\n", + "\n", + " d_out = []\n", + " u_out = []\n", + " v_out = []\n", + "\n", + " m = len(img)\n", + " n = len(img[0])\n", + "\n", + " # check for u and v to be same dimensions\n", + " for i in range(len(u)):\n", + "\n", + " c_x = v[i]\n", + " c_y = u[i]\n", + "\n", + " # If we cannot get a description for key point, throw it out\n", + " if c_x + ofs > m or c_x - ofs < 0 or c_y + ofs > n or c_y - ofs < 0:\n", + " continue\n", + "\n", + " sub = img[v[i] - ofs: v[i] + ofs + 1, u[i] - ofs: u[i] + ofs + 1]\n", + " if sub.shape[0] == l and sub.shape[1] == l:\n", + " u_out.append(u[i])\n", + " v_out.append(v[i])\n", + " d_out.append(sub)\n", + "\n", + " return np.stack(d_out), np.asarray(u_out, dtype=int), np.asarray(v_out, dtype=int)\n", + "\n", + " def D_hat(self, d):\n", + " return (d - d.mean()) / np.std(d)\n", + "\n", + " def error(self, d1, d2):\n", + " return np.sum((d1 - d2) ** 2)\n", + "\n", + " def match_keypoints(self, r=0.7):\n", " \"\"\"\n", " Step 3: Compare keypoint descriptions between images, identify potential matches, and filter likely\n", " mismatches\n", " \"\"\"\n", + "\n", + " d1, u1, v1 = self.generate_descriptors(self.images[0], 0)\n", + " d2, u2, v2 = self.generate_descriptors(self.images[1], 1)\n", " \n", - " def find_homography(self):\n", + " match_out = []\n", + " value_list = []\n", + " for i, D1 in enumerate(d1):\n", + " \n", + " smallest = np.inf\n", + " index2_smallest = 0\n", + " smallest2= np.inf\n", + " index2_smallest2 = 0\n", + " D1_hat = (D1 - np.mean(D1)) / np.std(D1)\n", + " value = 0\n", + " value2 = 0\n", + " \n", + " for j, D2 in enumerate(d2):\n", + " D2_hat = (D2 - np.mean(D2)) / np.std(D2)\n", + " E = np.sum(np.square(D1_hat-D2_hat))\n", + " if E < smallest: # best match \n", + " smallest = E\n", + " value = E\n", + " index2_smallest = j\n", + " np.delete(d1, index2_smallest, 0) \n", + " np.delete(d2, index2_smallest, 0)\n", + " \n", + " for j, D2 in enumerate(d2):\n", + " \n", + " D2_hat = (D2 - np.mean(D2)) / np.std(D2)\n", + " E = np.sum(np.square(D1_hat-D2_hat))\n", + " \n", + " if E < smallest2: # the second best match \n", + " smallest2 = E\n", + " value2= E\n", + " index2_smallest = j\n", + " \n", + " if value < (r * value2):\n", + " \n", + " match_out.append((u1[i], v1[i], u2[index2_smallest], v2[index2_smallest]))\n", + " value_list.append(value)\n", + " \n", + " return np.asarray(match_out)\n", + "\n", + " def find_homography(self, uv, uv2):\n", " \"\"\"\n", - " Step 4: Find a linear transformation (of various complexities) that maps pixels from the second image to \n", + " Step 4: Find a linear transformation (of various complexities) that maps pixels from the second image to\n", " pixels in the first image\n", " \"\"\"\n", - " \n", + "\n", + " if uv.shape != uv2.shape:\n", + " raise ValueError(\"X and X_prime must have matching shapes\")\n", + " if uv.shape[0] < 4:\n", + " raise ValueError(\"Not enough points\")\n", + "\n", + " # matches = np.column_stack(uv, uv2)\n", + "\n", + " A = np.zeros((2 * len(uv), 9))\n", + "\n", + " for i in range(len(uv)):\n", + " A[2 * i, :] = [0, 0, 0, -uv[i, 0], -uv[i, 1], -1, uv2[i, 1] * uv[i, 0], uv2[i, 1] * uv[i, 1], uv2[i, 1]]\n", + " A[2 * i + 1, :] = [uv[i, 0], uv[i, 1], 1, 0, 0, 0, -uv2[i, 0] * uv[i, 0], -uv2[i, 0] * uv[i, 1], -uv2[i, 0]]\n", + "\n", + " # print(A)\n", + " U, Sigma, Vt = np.linalg.svd(A)\n", + "\n", + " H = Vt[-1, :].reshape((3, 3))\n", + " H /= H[2, 2]\n", + "\n", + " return H\n", + "\n", + " def RANSAC(self, number_of_iterations=10, n=10, r=3, d=8):\n", + "\n", + " H_best = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])\n", + " num_inliers = 0\n", + "\n", + " matches = self.match_keypoints() # matches should be of the form [u1, v1, u2, v2]\n", + "\n", + " for i in range(number_of_iterations):\n", + " # 1. Select a random sample of length n from the matches\n", + " np.random.shuffle(matches)\n", + " sub = matches[0:n, :]\n", + " test = matches[n:, :]\n", + "\n", + " # 2. Compute a homography based on these points using the methods given above\n", + " H = self.find_homography(sub[:, 0:2], sub[:, 2:])\n", + "\n", + " # 3. Apply this homography to the remaining points that were not randomly selected\n", + " test_p = test[:, 0:2]\n", + " test_p = np.column_stack((test_p, np.ones(len(test_p))))\n", + " uv_p = (H @ test_p.T).T\n", + " test_u = uv_p[:, 0] / uv_p[:, 2]\n", + " test_v = uv_p[:, 1] / uv_p[:, 2]\n", + "\n", + " # 4. Compute the residual between observed and predicted feature locations\n", + " R = np.zeros_like(test_u)\n", + " for i in range(len(test_p)):\n", + " R[i] = np.sqrt((test_u[i] - test[i, 2]) ** 2 + (test_v[i] - test[i, 3]) ** 2)\n", + "\n", + " # 5. Flag predictions that lie within a predefined distance r from observations as inliers\n", + " inl = np.zeros_like(R)\n", + " for i in range(len(inl)):\n", + " if R[i] < r:\n", + " inl[i] = 1\n", + " else:\n", + " inl[i] = 0\n", + " num_inl = np.sum(inl)\n", + "\n", + " # 6. If number of inliers is greater than the previous best\n", + " # and greater than a minimum number of inliers d,\n", + " # 7. update H_best\n", + " # 8. update list_of_inliers\n", + " if num_inl > num_inliers:\n", + " if num_inl > d:\n", + " H_best = H\n", + " num_inliers = num_inl\n", + "\n", + " return H_best, num_inliers\n", + "\n", " def stitch(self):\n", " \"\"\"\n", " Step 5: Transform second image into local coordinate system of first image, and (perhaps) perform blending\n", " to avoid obvious seams between images.\n", - " \"\"\"" + " \"\"\"\n", + " H_best = self.RANSAC(10, 10, 3, 8)\n", + "\n", + " im1 = self.images[0]\n", + " im2 = self.images[1]\n", + "\n", + " transform = skt.ProjectiveTransform(H_best)\n", + " im_2_warped = skt.warp(im2, transform, output_shape=(im1.shape[0], im1.shape[1] + (int(im1.shape[1] * 0.4))))\n", + "\n", + " im1t = np.zeros_like(im_2_warped)\n", + "\n", + " for v in range(im1.shape[0]):\n", + " for u in range(im1.shape[1]):\n", + " if im1[v, u] != 0:\n", + " im1t[v, u] = im1[v, u]\n", + "\n", + " img_out = np.zeros_like(im_2_warped)\n", + "\n", + " for v in range(img_out.shape[0]):\n", + " for u in range(img_out.shape[1]):\n", + " if im1t[v, u] == 0 and im_2_warped[v, u] == 0:\n", + " img_out[v, u] = 0\n", + "\n", + " elif im1t[v, u] != 0 and im_2_warped[v, u] == 0:\n", + " img_out[v, u] = im1[v, u]\n", + " elif im1t[v, u] == 0. and im_2_warped[v, u] != 0:\n", + " img_out[v, u] = im_2_warped[v, u]\n", + " else:\n", + " img_out[v, u] = (im_2_warped[v, u] + im1[v, u]) / 2\n", + "\n", + " return img_out\n" ] }, { @@ -83,14 +283,14 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3 + "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" + "pygments_lexer": "ipython2", + "version": "2.7.14" } }, "nbformat": 4, diff --git a/room1.jpg b/room1.jpg new file mode 100644 index 0000000..bab4071 Binary files /dev/null and b/room1.jpg differ diff --git a/room2.jpg b/room2.jpg new file mode 100644 index 0000000..02200cc Binary files /dev/null and b/room2.jpg differ diff --git a/run_this.py b/run_this.py new file mode 100644 index 0000000..144aea6 --- /dev/null +++ b/run_this.py @@ -0,0 +1,25 @@ +from Stitcher import * +from convolution import * + +im1_fname = 'room1.jpg' +im2_fname = 'room2.jpg' + +# Convert both images to gray scale +im_1 = np.mean(plt.imread(im1_fname), -1) +im_2 = np.mean(plt.imread(im2_fname), -1) + +# Note: can uncomment this to speed things up +# Perform Gaussian blur to eliminate aliasing +# gauss = Filter.make_gauss((3, 3), 1) +# im_1 = convolve(im_1, gauss) +# im_2 = convolve(im_2, gauss) + +# Down sample image to speed things up a bit +# im_1 = block_reduce(im_1, (2, 2), func=np.mean) +# im_2 = block_reduce(im_2, (2, 2), func=np.mean) + +st = Stitcher(im_1, im_2) + +stitched_image = st.stitch() +plt.imshow(stitched_image, cmap='gray') +plt.show() \ No newline at end of file