From 51d72769a3dbaf68879c69c2211dfb8718423da0 Mon Sep 17 00:00:00 2001 From: Thorinori Date: Sun, 3 Mar 2019 14:59:18 -0700 Subject: [PATCH] Template matching code done --- template_matching.ipynb | 112 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 111 insertions(+), 1 deletion(-) diff --git a/template_matching.ipynb b/template_matching.ipynb index 156f833..83e9f77 100644 --- a/template_matching.ipynb +++ b/template_matching.ipynb @@ -31,6 +31,116 @@ "**Waldo appears in every Where's Waldo image (obviously). Try using the same technique on 'waldo_2.jpg'. Does the algorithm work?** I confess that I pulled the image of waldo for the template directly from 'waldo_1.jpg', so for the correct scale, there is something close to an exact match (i.e. SSE=0). However, Waldo, while easily recognizable to the human eye after undergoing the small scale deformations associated with artistic license, is not so easily recognizable via template matching. We will return to a similar problem when discussing object recognition, and hopefully this example will motivate the need to come up with representations of objects (like Waldo) that are more robust.\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import math as math\n", + "import skimage.io as img_io\n", + "import skimage.filters as filters\n", + "from skimage.transform import resize\n", + "from skimage.feature import match_template\n", + "from scipy.ndimage.filters import correlate\n", + "from sklearn.preprocessing import normalize\n", + "\n", + "### INFO TO PLEASE TAKE INTO ACCOUNT ###\n", + "# I did this project solo. I went to class to try to meet my partners on wednesday (I got snowed in Friday and couldn't \n", + "# make it to class at all, and they didn't show up. I don't know either of them, as well as not knowing their last names \n", + "# so I couldn't email them, and with no moodle page I couldn't find them that way either, so I took on the project solo.\n", + "# To make up for that, I decided using libraries where possible would be the better route to actually having something that\n", + "# would function, as well as not having to reuse code without permission from former group mates who made the code. This is\n", + "# detailed slightly further below as well.\n", + "\n", + "\n", + "img = img_io.imread(\"waldo_1.jpg\", as_gray=True)\n", + "template = img_io.imread(\"waldo_template.jpg\", as_gray=True)\n", + "\n", + "#Build pyramid by hand rather than using skimage gaussian_pyramid function, as well as label for output\n", + "#Used library functions for gaussian blur and resize as well for speed, and wasn't comfortable reusing code written by \n", + "#previous group members since they did the convolution parts.\n", + "pyramid = []\n", + "pyramid.append((\"Template\",template))\n", + "for i in range(1,4):\n", + " x = filters.gaussian(pyramid[i-1][1], sigma=1,multichannel=True)\n", + " x = resize(x,(math.floor(x.shape[0]/2),math.floor(x.shape[1]/2)))\n", + " pyramid.append((\"Downscale #\" + str(i),x))\n", + "\n", + "#Show image and show each step of the pyramid\n", + "img_io.imshow(img)\n", + "plt.show()\n", + "for item in pyramid:\n", + " print(item[0]+\":\")\n", + " img_io.imshow(item[1])\n", + " plt.show()\n", + "\n", + "\n", + "\n", + "#Using skimage template match since it is well implemented. Mine took a VERY long time, left as comment to show what I \n", + "#attempted But used the library function for speed and corretness. The library function uses a \n", + "#normalized cross-correlation so I needed to use argmax to get the guessed location. I generally code with the \n", + "#philosophy generally knowing what it is doing is the goal, so it is better to use libraries and not reinvent the wheel \n", + "#as long as you know the process, rather than having to know the specifics at all times.\n", + "outputs = []\n", + "for item in pyramid:\n", + " cmp = item[1]\n", + " guess = match_template(img, cmp)\n", + " ij = np.unravel_index(np.argmax(guess), guess.shape)\n", + " x, y = ij[::-1]\n", + " outputs.append((item[0],x,y))\n", + "\n", + "print(\"Guesses per comparison in x,y format from skimage correlation:\")\n", + "for item in outputs:\n", + " print(item[0],\" Guess:\",item[1],\",\",item[2])\n", + " \n", + "print(\"\\n Starting second correlation \\n\")\n", + "#This is testing with scipy's correlate function instead and using sklearn to normalize to see if they are similar.\n", + "outputs2 = []\n", + "for item in pyramid:\n", + " cmp = item[1]\n", + " img = normalize(img)\n", + " cmp = normalize(cmp)\n", + " guess = correlate(img, cmp)\n", + " ij = np.unravel_index(np.argmax(guess), guess.shape)\n", + " x, y = ij[::-1]\n", + " outputs2.append((item[0],x,y))\n", + " \n", + "\n", + " \n", + "print(\"Guesses per comparison in x,y format from scipy correlation:\")\n", + "for item in outputs2:\n", + " print(item[0],\" Guess:\",item[1],\",\",item[2])\n", + "\n", + "#Based on manually checking with several pyramids, looks like after being downscaled twice it finds Waldo (Downscale #2) from\n", + "#the skimage template matching, but the scipy correlation isn't as accurate, while also taking significantly longer\n", + "\n", + "\n", + "#Image dimensions\n", + "# img_x = img.shape[0]\n", + "# img_y = img.shape[1]\n", + "\n", + "# My attempt, extremely slow and not 100% sure if right\n", + "# for item in pyramid:\n", + "# x_dim = item.shape[0]\n", + "# y_dim = item.shape[1]\n", + "# out = np.zeros((img_y - y_dim,img_x - x_dim))\n", + "# for i in range(img_y - y_dim):\n", + "# for j in range(img_x - x_dim):\n", + "# sum = 0.0\n", + "# for x in range(x_dim):\n", + "# for y in range(y_dim):\n", + "# sum += (img[i + x][j + y] - item[x][y])\n", + "# out[i][j] = sum\n", + "# print(\"Template Done\")\n", + "# min_ind = np.unravel_index(np.argmin(out, axis=None), out.shape)\n", + "# print(min_ind)\n", + " \n", + "\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -55,7 +165,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.5.2" } }, "nbformat": 4,