UMT-Computer-Vision-Spring-2019 · JosephPace · Mar 3, 2019
diff --git a/template_matching.ipynb b/template_matching.ipynb
@@ -31,6 +31,116 @@
     "**Waldo appears in every Where's Waldo image (obviously).  Try using the same technique on 'waldo_2.jpg'.  Does the algorithm work?**  I confess that I pulled the image of waldo for the template directly from 'waldo_1.jpg', so for the correct scale, there is something close to an exact match (i.e. SSE=0).  However, Waldo, while easily recognizable to the human eye after undergoing the small scale deformations associated with artistic license, is not so easily recognizable via template matching.  We will return to a similar problem when discussing object recognition, and hopefully this example will motivate the need to come up with representations of objects (like Waldo) that are more robust.\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import math as math\n",
+    "import skimage.io as img_io\n",
+    "import skimage.filters as filters\n",
+    "from skimage.transform import resize\n",
+    "from skimage.feature import match_template\n",
+    "from scipy.ndimage.filters import correlate\n",
+    "from sklearn.preprocessing import normalize\n",
+    "\n",
+    "### INFO TO PLEASE TAKE INTO ACCOUNT ###\n",
+    "# I did this project solo. I went to class to try to meet my partners on wednesday (I got snowed in Friday and couldn't \n",
+    "# make it to class at all, and they didn't show up. I don't know either of them, as well as not knowing their last names \n",
+    "# so I couldn't email them, and with no moodle page I couldn't find them that way either, so I took on the project solo.\n",
+    "# To make up for that, I decided using libraries where possible would be the better route to actually having something that\n",
+    "# would function, as well as not having to reuse code without permission from former group mates who made the code. This is\n",
+    "# detailed slightly further below as well.\n",
+    "\n",
+    "\n",
+    "img = img_io.imread(\"waldo_1.jpg\", as_gray=True)\n",
+    "template = img_io.imread(\"waldo_template.jpg\", as_gray=True)\n",
+    "\n",
+    "#Build pyramid by hand rather than using skimage gaussian_pyramid function, as well as label for output\n",
+    "#Used library functions for gaussian blur and resize as well for speed, and wasn't comfortable reusing code written by \n",
+    "#previous group members since they did the convolution parts.\n",
+    "pyramid = []\n",
+    "pyramid.append((\"Template\",template))\n",
+    "for i in range(1,4):\n",
+    "    x = filters.gaussian(pyramid[i-1][1], sigma=1,multichannel=True)\n",
+    "    x = resize(x,(math.floor(x.shape[0]/2),math.floor(x.shape[1]/2)))\n",
+    "    pyramid.append((\"Downscale #\" + str(i),x))\n",
+    "\n",
+    "#Show image and show each step of the pyramid\n",
+    "img_io.imshow(img)\n",
+    "plt.show()\n",
+    "for item in pyramid:\n",
+    "    print(item[0]+\":\")\n",
+    "    img_io.imshow(item[1])\n",
+    "    plt.show()\n",
+    "\n",
+    "\n",
+    "\n",
+    "#Using skimage template match since it is well implemented. Mine took a VERY long time, left as comment to show what I \n",
+    "#attempted But used the library function for speed and corretness. The library function uses a \n",
+    "#normalized cross-correlation so I needed to use argmax to get the guessed location. I generally code with the \n",
+    "#philosophy generally knowing what it is doing is the goal, so it is better to use libraries and not reinvent the wheel \n",
+    "#as long as you know the process, rather than having to know the specifics at all times.\n",
+    "outputs = []\n",
+    "for item in pyramid:\n",
+    "    cmp = item[1]\n",
+    "    guess = match_template(img, cmp)\n",
+    "    ij = np.unravel_index(np.argmax(guess), guess.shape)\n",
+    "    x, y = ij[::-1]\n",
+    "    outputs.append((item[0],x,y))\n",
+    "\n",
+    "print(\"Guesses per comparison in x,y format from skimage correlation:\")\n",
+    "for item in outputs:\n",
+    "    print(item[0],\" Guess:\",item[1],\",\",item[2])\n",
+    "    \n",
+    "print(\"\\n Starting second correlation \\n\")\n",
+    "#This is testing with scipy's correlate function instead and using sklearn to normalize to see if they are similar.\n",
+    "outputs2 = []\n",
+    "for item in pyramid:\n",
+    "    cmp = item[1]\n",
+    "    img = normalize(img)\n",
+    "    cmp = normalize(cmp)\n",
+    "    guess = correlate(img, cmp)\n",
+    "    ij = np.unravel_index(np.argmax(guess), guess.shape)\n",
+    "    x, y = ij[::-1]\n",
+    "    outputs2.append((item[0],x,y))\n",
+    "    \n",
+    "\n",
+    "    \n",
+    "print(\"Guesses per comparison in x,y format from scipy correlation:\")\n",
+    "for item in outputs2:\n",
+    "    print(item[0],\" Guess:\",item[1],\",\",item[2])\n",
+    "\n",
+    "#Based on manually checking with several pyramids, looks like after being downscaled twice it finds Waldo (Downscale #2) from\n",
+    "#the skimage template matching, but the scipy correlation isn't as accurate, while also taking significantly longer\n",
+    "\n",
+    "\n",
+    "#Image dimensions\n",
+    "# img_x = img.shape[0]\n",
+    "# img_y = img.shape[1]\n",
+    "\n",
+    "# My attempt, extremely slow and not 100% sure if right\n",
+    "# for item in pyramid:\n",
+    "#     x_dim = item.shape[0]\n",
+    "#     y_dim = item.shape[1]\n",
+    "#     out = np.zeros((img_y - y_dim,img_x - x_dim))\n",
+    "#     for i in range(img_y - y_dim):\n",
+    "#         for j in range(img_x - x_dim):\n",
+    "#             sum = 0.0\n",
+    "#             for x in range(x_dim):\n",
+    "#                 for y in range(y_dim):\n",
+    "#                     sum += (img[i + x][j + y] - item[x][y])\n",
+    "#             out[i][j] = sum\n",
+    "#     print(\"Template Done\")\n",
+    "#     min_ind = np.unravel_index(np.argmin(out, axis=None), out.shape)\n",
+    "#     print(min_ind)\n",
+    "            \n",
+    "\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -55,7 +165,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,