Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 111 additions & 1 deletion template_matching.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,116 @@
"**Waldo appears in every Where's Waldo image (obviously). Try using the same technique on 'waldo_2.jpg'. Does the algorithm work?** I confess that I pulled the image of waldo for the template directly from 'waldo_1.jpg', so for the correct scale, there is something close to an exact match (i.e. SSE=0). However, Waldo, while easily recognizable to the human eye after undergoing the small scale deformations associated with artistic license, is not so easily recognizable via template matching. We will return to a similar problem when discussing object recognition, and hopefully this example will motivate the need to come up with representations of objects (like Waldo) that are more robust.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import math as math\n",
"import skimage.io as img_io\n",
"import skimage.filters as filters\n",
"from skimage.transform import resize\n",
"from skimage.feature import match_template\n",
"from scipy.ndimage.filters import correlate\n",
"from sklearn.preprocessing import normalize\n",
"\n",
"### INFO TO PLEASE TAKE INTO ACCOUNT ###\n",
"# I did this project solo. I went to class to try to meet my partners on wednesday (I got snowed in Friday and couldn't \n",
"# make it to class at all, and they didn't show up. I don't know either of them, as well as not knowing their last names \n",
"# so I couldn't email them, and with no moodle page I couldn't find them that way either, so I took on the project solo.\n",
"# To make up for that, I decided using libraries where possible would be the better route to actually having something that\n",
"# would function, as well as not having to reuse code without permission from former group mates who made the code. This is\n",
"# detailed slightly further below as well.\n",
"\n",
"\n",
"img = img_io.imread(\"waldo_1.jpg\", as_gray=True)\n",
"template = img_io.imread(\"waldo_template.jpg\", as_gray=True)\n",
"\n",
"#Build pyramid by hand rather than using skimage gaussian_pyramid function, as well as label for output\n",
"#Used library functions for gaussian blur and resize as well for speed, and wasn't comfortable reusing code written by \n",
"#previous group members since they did the convolution parts.\n",
"pyramid = []\n",
"pyramid.append((\"Template\",template))\n",
"for i in range(1,4):\n",
" x = filters.gaussian(pyramid[i-1][1], sigma=1,multichannel=True)\n",
" x = resize(x,(math.floor(x.shape[0]/2),math.floor(x.shape[1]/2)))\n",
" pyramid.append((\"Downscale #\" + str(i),x))\n",
"\n",
"#Show image and show each step of the pyramid\n",
"img_io.imshow(img)\n",
"plt.show()\n",
"for item in pyramid:\n",
" print(item[0]+\":\")\n",
" img_io.imshow(item[1])\n",
" plt.show()\n",
"\n",
"\n",
"\n",
"#Using skimage template match since it is well implemented. Mine took a VERY long time, left as comment to show what I \n",
"#attempted But used the library function for speed and corretness. The library function uses a \n",
"#normalized cross-correlation so I needed to use argmax to get the guessed location. I generally code with the \n",
"#philosophy generally knowing what it is doing is the goal, so it is better to use libraries and not reinvent the wheel \n",
"#as long as you know the process, rather than having to know the specifics at all times.\n",
"outputs = []\n",
"for item in pyramid:\n",
" cmp = item[1]\n",
" guess = match_template(img, cmp)\n",
" ij = np.unravel_index(np.argmax(guess), guess.shape)\n",
" x, y = ij[::-1]\n",
" outputs.append((item[0],x,y))\n",
"\n",
"print(\"Guesses per comparison in x,y format from skimage correlation:\")\n",
"for item in outputs:\n",
" print(item[0],\" Guess:\",item[1],\",\",item[2])\n",
" \n",
"print(\"\\n Starting second correlation \\n\")\n",
"#This is testing with scipy's correlate function instead and using sklearn to normalize to see if they are similar.\n",
"outputs2 = []\n",
"for item in pyramid:\n",
" cmp = item[1]\n",
" img = normalize(img)\n",
" cmp = normalize(cmp)\n",
" guess = correlate(img, cmp)\n",
" ij = np.unravel_index(np.argmax(guess), guess.shape)\n",
" x, y = ij[::-1]\n",
" outputs2.append((item[0],x,y))\n",
" \n",
"\n",
" \n",
"print(\"Guesses per comparison in x,y format from scipy correlation:\")\n",
"for item in outputs2:\n",
" print(item[0],\" Guess:\",item[1],\",\",item[2])\n",
"\n",
"#Based on manually checking with several pyramids, looks like after being downscaled twice it finds Waldo (Downscale #2) from\n",
"#the skimage template matching, but the scipy correlation isn't as accurate, while also taking significantly longer\n",
"\n",
"\n",
"#Image dimensions\n",
"# img_x = img.shape[0]\n",
"# img_y = img.shape[1]\n",
"\n",
"# My attempt, extremely slow and not 100% sure if right\n",
"# for item in pyramid:\n",
"# x_dim = item.shape[0]\n",
"# y_dim = item.shape[1]\n",
"# out = np.zeros((img_y - y_dim,img_x - x_dim))\n",
"# for i in range(img_y - y_dim):\n",
"# for j in range(img_x - x_dim):\n",
"# sum = 0.0\n",
"# for x in range(x_dim):\n",
"# for y in range(y_dim):\n",
"# sum += (img[i + x][j + y] - item[x][y])\n",
"# out[i][j] = sum\n",
"# print(\"Template Done\")\n",
"# min_ind = np.unravel_index(np.argmin(out, axis=None), out.shape)\n",
"# print(min_ind)\n",
" \n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -55,7 +165,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
"version": "3.5.2"
}
},
"nbformat": 4,
Expand Down