UMT-Computer-Vision-Spring-2019 · colligant · Jan 25, 2019 · Jan 27, 2019 · Jan 27, 2019 · Jan 28, 2019
diff --git a/IMG_0433.JPG b/IMG_0433.JPG
diff --git a/annotate.py b/annotate.py
@@ -0,0 +1,12 @@
+import matplotlib.pyplot as plt
+import numpy as np
+
+ims = ['campus_stereo_1.jpg', 'campus_stereo_2.jpg']
+
+for f in ims:
+    im = plt.imread(f)
+    plt.figure(figsize=(12, 8))
+    plt.imshow(im)
+    coords = plt.ginput(n=1, show_clicks=True)
+    print(coords)
+
diff --git a/coordinates.txt b/coordinates.txt
@@ -0,0 +1,5 @@
+3917,1574,273012.94,5195299.31,990
+2310,1464,273854.87,5195177.47,1233
+175,959,274060.24,5195364.16,1343
+639,1648,273790.64,5195311.87,1199
+1061,2137,273014.38,5195303.62,990
diff --git a/project1_bonus.ipynb b/project1_bonus.ipynb
@@ -0,0 +1,219 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy.optimize import leastsq\n",
+    "import numpy as np\n",
+    "\n",
+    "class Camera(object):\n",
+    "    '''Reduce the misfit between the projection of the GCPs and their identified location in the image.\n",
+    "       Adjust the pose s.t. the squared difference between the projection of the GCP and its location\n",
+    "       in the image is minimized.\n",
+    "       Project the GCPs onto the sensor.\n",
+    "       Recall: Guess at pose vector.\n",
+    "       Then, reduce the misfit between the GCP coordinates that we predicted\n",
+    "       using our guessed-at coordinates, and the true coordinates.'''\n",
+    "    \n",
+    "    def __init__(self, focal_length=None, sensor_x=None, sensor_y=None, pose=None):\n",
+    "        self.p = pose # Pose: x, y, z, phi, theta, psi\n",
+    "        self.focal_length = focal_length                   # Focal Length in Pixels\n",
+    "        self.sensor_x = sensor_x\n",
+    "        self.sensor_y = sensor_y\n",
+    "        \n",
+    "    def projective_transform(self, X):\n",
+    "        \"\"\"  \n",
+    "        This function performs the projective transform on generalized coordinates in the camera reference frame.\n",
+    "        Expects x, y, z (non-generalized).\n",
+    "        \"\"\"\n",
+    "        x = X[:, 0]/X[:, 2]\n",
+    "        y = X[:, 1]/X[:, 2]\n",
+    "        u = self.focal_length*x + self.sensor_x / 2\n",
+    "        v = self.focal_length*y + self.sensor_y / 2 # the coordinates that input intensities map to\n",
+    "        u = np.hstack(u)\n",
+    "        v = np.hstack(v)\n",
+    "        return u, v\n",
+    "    \n",
+    "    def rotational_transform(self, X):\n",
+    "        '''Expects non-homogeneous coordinates.'''\n",
+    "        if len(X.shape) < 2:\n",
+    "            X = np.reshape(X, (1, X.shape[0]))\n",
+    "        s = np.sin\n",
+    "        c = np.cos\n",
+    "        X_h = np.zeros((X.shape[0], X.shape[1]+1))\n",
+    "        X_h[:, :X.shape[1]] = X\n",
+    "        X_h[:, 3] = np.ones((X.shape[0]))\n",
+    "        X_cam = self.p\n",
+    "        phi = X_cam[3]\n",
+    "        theta = X_cam[4]\n",
+    "        p = X_cam[5]\n",
+    "        trans = np.mat(([1, 0, 0, -X_cam[0]], [0, 1, 0, -X_cam[1]], \n",
+    "                          [0, 0, 1, -X_cam[2]], [0, 0, 0, 1]))\n",
+    "        r_yaw = np.mat(([c(phi), -s(phi), 0, 0], [s(phi), c(phi), 0, 0], [0, 0, 1, 0]))\n",
+    "        r_pitch = np.mat(([1, 0, 0], [0, c(theta), s(theta)], [0, -s(theta), c(theta)]))\n",
+    "        r_roll = np.mat(([c(p), 0, -s(p)], [0, 1, 0], [s(p), 0, c(p)]))\n",
+    "        r_axis = np.mat(([1, 0, 0], [0, 0, -1], [0, 1, 0]))\n",
+    "        C = r_axis @ r_roll @ r_pitch @ r_yaw @ trans\n",
+    "        Xt = C @ X_h.T\n",
+    "        return Xt.T\n",
+    "    \n",
+    "    def _func(self, p, x):\n",
+    "        self.p = p \n",
+    "        X = self.rotational_transform(x)\n",
+    "        u, v = self.projective_transform(X)\n",
+    "        u = u.T\n",
+    "        v = v.T\n",
+    "        z = np.asarray(np.hstack((u, v)))\n",
+    "        return z.ravel()\n",
+    "    \n",
+    "    def _errfunc(self, p, x, y, func, err):\n",
+    "        xx = func(p, x)\n",
+    "        ss = y.ravel() - xx\n",
+    "        return ss\n",
+    "    \n",
+    "    def estimate_pose(self, X_gcp, u_gcp):\n",
+    "        \"\"\"\n",
+    "        This function adjusts the pose vector such that the difference between the observed pixel coordinates u_gcp \n",
+    "        and the projected pixels coordinates of X_gcp is minimized.\n",
+    "        \"\"\"\n",
+    "        err = np.ones(X_gcp.shape)\n",
+    "        out = leastsq(self._errfunc, self.p, args=(X_gcp, u_gcp, self._func, err), full_output=1)\n",
+    "        self.p = out[0]\n",
+    "        return out[0]\n",
+    "    \n",
+    "\n",
+    "def sfm(c1, c2, guess, u_gcp):\n",
+    "    '''Estimates world coordinates of a given point in an\n",
+    "       image given their image coordinates and a guess at\n",
+    "       the world coordinates. This requires two calibrated\n",
+    "       cameras.''' \n",
+    "    \n",
+    "    def func(x, c1, c2):\n",
+    "        '''Return u, v given guess at X, Y, Z'''\n",
+    "        xt_1 = c1.rotational_transform(x)\n",
+    "        xt_2 = c2.rotational_transform(x)\n",
+    "        u1, v1 = c1.projective_transform(xt_1)\n",
+    "        u2, v2 = c2.projective_transform(xt_2)\n",
+    "        u1 = u1.T\n",
+    "        u2 = u2.T\n",
+    "        v1 = v1.T\n",
+    "        v2 = v2.T\n",
+    "        z = np.asarray(np.hstack((u1, v1, u2, v2)))\n",
+    "        return z.ravel()\n",
+    "\n",
+    "    def errfunc(p, c1, c2, y):\n",
+    "        xx = func(p, c1, c2)\n",
+    "        ss = y.ravel() - xx\n",
+    "        return ss\n",
+    "    \n",
+    "    out = leastsq(errfunc, guess, args=(c1, c2, u_gcp), full_output=1)\n",
+    "    \n",
+    "    return out[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "pose estimated from gcp 1: [2.72735198e+05 5.19395745e+06 1.04764910e+03 1.48067511e+00\n",
+      " 2.78576743e-01 3.15407517e+00]\n",
+      "\n",
+      "pose estimated from gcp 1: [2.72557604e+05 5.19393093e+06 1.03205593e+03 2.84411119e+02\n",
+      " 1.53909151e+00 2.85108707e+02]\n",
+      "\n",
+      " Predicted center of clock tower [1]: [2.72558355e+05 5.19393378e+06 9.98084621e+02]\n",
+      "\n",
+      " Predicted center of clock tower [2]: [2.72558309e+05 5.19393380e+06 9.98518681e+02]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "f_length = 1.6 * 55 # from canon.com\n",
+    "\n",
+    "img_width = 4272\n",
+    "img_height = 2848\n",
+    "focal_length = f_length/36*img_width\n",
+    "\n",
+    "sensor_y = img_height\n",
+    "sensor_x = img_width\n",
+    "focal_length = (55 / 22.2) * 4272 # from canon\n",
+    "coords_1 = np.loadtxt(\"gcp_stereo_1.txt\", delimiter=',')\n",
+    "coords_2 = np.loadtxt(\"gcp_stereo_2.txt\", delimiter=',')\n",
+    "pose_1 = [273171, 5193938, 900, 1, 1, 1]\n",
+    "pose_2 = [273131, 5101992, 900, 0.5, np.pi/2, 0]\n",
+    "u_gcp_1 = coords_1[:, :2]\n",
+    "X_gcp_1 = coords_1[:, 2:]\n",
+    "u_gcp_2 = coords_2[:, :2]\n",
+    "X_gcp_2 = coords_2[:, 2:]\n",
+    "\n",
+    "c1 = Camera(focal_length=focal_length, sensor_x=sensor_x, \n",
+    "            sensor_y=sensor_y, pose=pose_1)\n",
+    "c2 = Camera(focal_length=focal_length, sensor_x=sensor_x, \n",
+    "            sensor_y=sensor_y, pose=pose_2)\n",
+    "p1 = c1.estimate_pose(X_gcp_1, u_gcp_1)\n",
+    "p2 = c2.estimate_pose(X_gcp_2, u_gcp_2)\n",
+    "\n",
+    "print(\"pose estimated from gcp 1:\", p1)\n",
+    "print(\"\\npose estimated from gcp 1:\", p2)\n",
+    "\n",
+    "uv =  np.array([[2018.9675324675327, 1295.0324675324675], \n",
+    "               [1202.3051948051948, 1219.5259740259739]])\n",
+    "uv2 = np.array([[2013.3159, 1269.1184], [1199.59303, 1198.185]]) # another set of coords\n",
+    "# meter vector: the E, N, Elev\n",
+    "# to minimize: The difference between the predicted u, v values and \n",
+    "# the known camera u, v for a given X, Y, z\n",
+    "guess = np.array([270000, 5197990, 1010])\n",
+    "p_1 = sfm(c1, c2, guess, uv)\n",
+    "print(\"\\n Predicted center of clock tower [1]:\",  p_1)\n",
+    "p_2 = sfm(c1, c2, guess, uv2)\n",
+    "print(\"\\n Predicted center of clock tower [2]:\",  p_2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This is ok. Elevation is missing by ~10m. That makes sense given by the pose estimation\n",
+    "is off by a bit well."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "base"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/project1_solution.ipynb b/project1_solution.ipynb
diff --git a/project_description.ipynb b/project_description.ipynb
@@ -42,24 +42,38 @@
    "outputs": [],
    "source": [
     "class Camera(object):\n",
-    "    def __init__(self):\n",
+    "    '''Reduce the misfit between the projection of the GCPs and their identified location in the image.\n",
+    "       Adjust the pose s.t. the squared difference between the projection of the GCP and its location\n",
+    "       in the image is minimized.\n",
+    "       Project the GCPs onto the sensor.\n",
+    "       Recall: Guess at pose vector.\n",
+    "       Then, reduce the misfit between the GCP coordinates that we predicted\n",
+    "       using our guessed-at coordinates, and the true coordinates.'''\n",
+    "    def __init__(self, focal_length=None, sensor_x=None, sensor_y=None):\n",
     "        self.p = None                   # Pose\n",
-    "        self.f = None                   # Focal Length in Pixels\n",
-    "        self.c = np.array([None,None])  # \n",
+    "        self.focal_length = focal_length                   # Focal Length in Pixels\n",
+    "        self.sensor_x = sensor_x\n",
+    "        self.sensor_y = sensor_y\n",
     "        \n",
-    "    def projective_transform(self,x):\n",
+    "    def projective_transform(self, x):\n",
     "        \"\"\"  \n",
     "        This function performs the projective transform on generalized coordinates in the camera reference frame.\n",
     "        \"\"\"\n",
-    "        pass\n",
+    "        x = x/z\n",
+    "        y = y/z\n",
+    "        u = focal_length*x + sensor_x / 2\n",
+    "        v = focal_length*y + sensor_y / 2 # the coordinates that input intensities map to\n",
+    "        u = u.astype(np.int) # these should be indices in our output array\n",
+    "        v = v.astype(np.int)\n",
+    "        return u, v\n",
     "    \n",
-    "    def rotational_transform(self,X):\n",
+    "    def rotational_transform(self, X):\n",
     "        \"\"\"  \n",
     "        This function performs the translation and rotation from world coordinates into generalized camera coordinates.\n",
     "        \"\"\"\n",
     "        pass\n",
     "    \n",
-    "    def estimate_pose(self,X_gcp,u_gcp):\n",
+    "    def estimate_pose(self, X_gcp, u_gcp):\n",
     "        \"\"\"\n",
     "        This function adjusts the pose vector such that the difference between the observed pixel coordinates u_gcp \n",
     "        and the projected pixels coordinates of X_gcp is minimized.\n",
@@ -238,7 +252,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.6.8"
   }
  },
  "nbformat": 4,