-
Notifications
You must be signed in to change notification settings - Fork 59
/
edgedetect.html
657 lines (573 loc) · 65.7 KB
/
edgedetect.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
<!DOCTYPE html><html><head>
<title>edgedetect</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<script type="text/x-mathjax-config">
MathJax.Hub.Config({"extensions":["tex2jax.js"],"jax":["input/TeX","output/HTML-CSS"],"messageStyle":"none","tex2jax":{"processEnvironments":false,"processEscapes":true,"inlineMath":[["$","$"],["\\(","\\)"]],"displayMath":[["$$","$$"],["\\[","\\]"]]},"TeX":{"extensions":["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]},"HTML-CSS":{"availableFonts":["TeX"]}});
</script>
<script type="text/javascript" async src="file:////Users/samuel/.vscode/extensions/shd101wyy.markdown-preview-enhanced-0.5.0/node_modules/@shd101wyy/mume/dependencies/mathjax/MathJax.js" charset="UTF-8"></script>
<style>
/**
* prism.js Github theme based on GitHub's theme.
* @author Sam Clarke
*/
code[class*="language-"],
pre[class*="language-"] {
color: #333;
background: none;
font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
text-align: left;
white-space: pre;
word-spacing: normal;
word-break: normal;
word-wrap: normal;
line-height: 1.4;
-moz-tab-size: 8;
-o-tab-size: 8;
tab-size: 8;
-webkit-hyphens: none;
-moz-hyphens: none;
-ms-hyphens: none;
hyphens: none;
}
/* Code blocks */
pre[class*="language-"] {
padding: .8em;
overflow: auto;
/* border: 1px solid #ddd; */
border-radius: 3px;
/* background: #fff; */
background: #f5f5f5;
}
/* Inline code */
:not(pre) > code[class*="language-"] {
padding: .1em;
border-radius: .3em;
white-space: normal;
background: #f5f5f5;
}
.token.comment,
.token.blockquote {
color: #969896;
}
.token.cdata {
color: #183691;
}
.token.doctype,
.token.punctuation,
.token.variable,
.token.macro.property {
color: #333;
}
.token.operator,
.token.important,
.token.keyword,
.token.rule,
.token.builtin {
color: #a71d5d;
}
.token.string,
.token.url,
.token.regex,
.token.attr-value {
color: #183691;
}
.token.property,
.token.number,
.token.boolean,
.token.entity,
.token.atrule,
.token.constant,
.token.symbol,
.token.command,
.token.code {
color: #0086b3;
}
.token.tag,
.token.selector,
.token.prolog {
color: #63a35c;
}
.token.function,
.token.namespace,
.token.pseudo-element,
.token.class,
.token.class-name,
.token.pseudo-class,
.token.id,
.token.url-reference .token.variable,
.token.attr-name {
color: #795da3;
}
.token.entity {
cursor: help;
}
.token.title,
.token.title .token.punctuation {
font-weight: bold;
color: #1d3e81;
}
.token.list {
color: #ed6a43;
}
.token.inserted {
background-color: #eaffea;
color: #55a532;
}
.token.deleted {
background-color: #ffecec;
color: #bd2c00;
}
.token.bold {
font-weight: bold;
}
.token.italic {
font-style: italic;
}
/* JSON */
.language-json .token.property {
color: #183691;
}
.language-markup .token.tag .token.punctuation {
color: #333;
}
/* CSS */
code.language-css,
.language-css .token.function {
color: #0086b3;
}
/* YAML */
.language-yaml .token.atrule {
color: #63a35c;
}
code.language-yaml {
color: #183691;
}
/* Ruby */
.language-ruby .token.function {
color: #333;
}
/* Markdown */
.language-markdown .token.url {
color: #795da3;
}
/* Makefile */
.language-makefile .token.symbol {
color: #795da3;
}
.language-makefile .token.variable {
color: #183691;
}
.language-makefile .token.builtin {
color: #0086b3;
}
/* Bash */
.language-bash .token.keyword {
color: #0086b3;
}
/* highlight */
pre[data-line] {
position: relative;
padding: 1em 0 1em 3em;
}
pre[data-line] .line-highlight-wrapper {
position: absolute;
top: 0;
left: 0;
background-color: transparent;
display: block;
width: 100%;
}
pre[data-line] .line-highlight {
position: absolute;
left: 0;
right: 0;
padding: inherit 0;
margin-top: 1em;
background: hsla(24, 20%, 50%,.08);
background: linear-gradient(to right, hsla(24, 20%, 50%,.1) 70%, hsla(24, 20%, 50%,0));
pointer-events: none;
line-height: inherit;
white-space: pre;
}
pre[data-line] .line-highlight:before,
pre[data-line] .line-highlight[data-end]:after {
content: attr(data-start);
position: absolute;
top: .4em;
left: .6em;
min-width: 1em;
padding: 0 .5em;
background-color: hsla(24, 20%, 50%,.4);
color: hsl(24, 20%, 95%);
font: bold 65%/1.5 sans-serif;
text-align: center;
vertical-align: .3em;
border-radius: 999px;
text-shadow: none;
box-shadow: 0 1px white;
}
pre[data-line] .line-highlight[data-end]:after {
content: attr(data-end);
top: auto;
bottom: .4em;
}html body{font-family:"Helvetica Neue",Helvetica,"Segoe UI",Arial,freesans,sans-serif;font-size:16px;line-height:1.6;color:#333;background-color:#fff;overflow:initial;box-sizing:border-box;word-wrap:break-word}html body>:first-child{margin-top:0}html body h1,html body h2,html body h3,html body h4,html body h5,html body h6{line-height:1.2;margin-top:1em;margin-bottom:16px;color:#000}html body h1{font-size:2.25em;font-weight:300;padding-bottom:.3em}html body h2{font-size:1.75em;font-weight:400;padding-bottom:.3em}html body h3{font-size:1.5em;font-weight:500}html body h4{font-size:1.25em;font-weight:600}html body h5{font-size:1.1em;font-weight:600}html body h6{font-size:1em;font-weight:600}html body h1,html body h2,html body h3,html body h4,html body h5{font-weight:600}html body h5{font-size:1em}html body h6{color:#5c5c5c}html body strong{color:#000}html body del{color:#5c5c5c}html body a:not([href]){color:inherit;text-decoration:none}html body a{color:#08c;text-decoration:none}html body a:hover{color:#00a3f5;text-decoration:none}html body img{max-width:100%}html body>p{margin-top:0;margin-bottom:16px;word-wrap:break-word}html body>ul,html body>ol{margin-bottom:16px}html body ul,html body ol{padding-left:2em}html body ul.no-list,html body ol.no-list{padding:0;list-style-type:none}html body ul ul,html body ul ol,html body ol ol,html body ol ul{margin-top:0;margin-bottom:0}html body li{margin-bottom:0}html body li.task-list-item{list-style:none}html body li>p{margin-top:0;margin-bottom:0}html body .task-list-item-checkbox{margin:0 .2em .25em -1.8em;vertical-align:middle}html body .task-list-item-checkbox:hover{cursor:pointer}html body blockquote{margin:16px 0;font-size:inherit;padding:0 15px;color:#5c5c5c;border-left:4px solid #d6d6d6}html body blockquote>:first-child{margin-top:0}html body blockquote>:last-child{margin-bottom:0}html body hr{height:4px;margin:32px 0;background-color:#d6d6d6;border:0 none}html body table{margin:10px 0 15px 0;border-collapse:collapse;border-spacing:0;display:block;width:100%;overflow:auto;word-break:normal;word-break:keep-all}html body table th{font-weight:bold;color:#000}html body table td,html body table th{border:1px solid #d6d6d6;padding:6px 13px}html body dl{padding:0}html body dl dt{padding:0;margin-top:16px;font-size:1em;font-style:italic;font-weight:bold}html body dl dd{padding:0 16px;margin-bottom:16px}html body code{font-family:Menlo,Monaco,Consolas,'Courier New',monospace;font-size:.85em !important;color:#000;background-color:#f0f0f0;border-radius:3px;padding:.2em 0}html body code::before,html body code::after{letter-spacing:-0.2em;content:"\00a0"}html body pre>code{padding:0;margin:0;font-size:.85em !important;word-break:normal;white-space:pre;background:transparent;border:0}html body .highlight{margin-bottom:16px}html body .highlight pre,html body pre{padding:1em;overflow:auto;font-size:.85em !important;line-height:1.45;border:#d6d6d6;border-radius:3px}html body .highlight pre{margin-bottom:0;word-break:normal}html body pre code,html body pre tt{display:inline;max-width:initial;padding:0;margin:0;overflow:initial;line-height:inherit;word-wrap:normal;background-color:transparent;border:0}html body pre code:before,html body pre tt:before,html body pre code:after,html body pre tt:after{content:normal}html body p,html body blockquote,html body ul,html body ol,html body dl,html body pre{margin-top:0;margin-bottom:16px}html body kbd{color:#000;border:1px solid #d6d6d6;border-bottom:2px solid #c7c7c7;padding:2px 4px;background-color:#f0f0f0;border-radius:3px}@media print{html body{background-color:#fff}html body h1,html body h2,html body h3,html body h4,html body h5,html body h6{color:#000;page-break-after:avoid}html body blockquote{color:#5c5c5c}html body pre{page-break-inside:avoid}html body table{display:table}html body img{display:block;max-width:100%;max-height:100%}html body pre,html body code{word-wrap:break-word;white-space:pre}}.markdown-preview{width:100%;height:100%;box-sizing:border-box}.markdown-preview .pagebreak,.markdown-preview .newpage{page-break-before:always}.markdown-preview pre.line-numbers{position:relative;padding-left:3.8em;counter-reset:linenumber}.markdown-preview pre.line-numbers>code{position:relative}.markdown-preview pre.line-numbers .line-numbers-rows{position:absolute;pointer-events:none;top:1em;font-size:100%;left:0;width:3em;letter-spacing:-1px;border-right:1px solid #999;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.markdown-preview pre.line-numbers .line-numbers-rows>span{pointer-events:none;display:block;counter-increment:linenumber}.markdown-preview pre.line-numbers .line-numbers-rows>span:before{content:counter(linenumber);color:#999;display:block;padding-right:.8em;text-align:right}.markdown-preview .mathjax-exps .MathJax_Display{text-align:center !important}.markdown-preview:not([for="preview"]) .code-chunk .btn-group{display:none}.markdown-preview:not([for="preview"]) .code-chunk .status{display:none}.markdown-preview:not([for="preview"]) .code-chunk .output-div{margin-bottom:16px}.scrollbar-style::-webkit-scrollbar{width:8px}.scrollbar-style::-webkit-scrollbar-track{border-radius:10px;background-color:transparent}.scrollbar-style::-webkit-scrollbar-thumb{border-radius:5px;background-color:rgba(150,150,150,0.66);border:4px solid rgba(150,150,150,0.66);background-clip:content-box}html body[for="html-export"]:not([data-presentation-mode]){position:relative;width:100%;height:100%;top:0;left:0;margin:0;padding:0;overflow:auto}html body[for="html-export"]:not([data-presentation-mode]) .markdown-preview{position:relative;top:0}@media screen and (min-width:914px){html body[for="html-export"]:not([data-presentation-mode]) .markdown-preview{padding:2em calc(50% - 457px + 2em)}}@media screen and (max-width:914px){html body[for="html-export"]:not([data-presentation-mode]) .markdown-preview{padding:2em}}@media screen and (max-width:450px){html body[for="html-export"]:not([data-presentation-mode]) .markdown-preview{font-size:14px !important;padding:1em}}@media print{html body[for="html-export"]:not([data-presentation-mode]) #sidebar-toc-btn{display:none}}html body[for="html-export"]:not([data-presentation-mode]) #sidebar-toc-btn{position:fixed;bottom:8px;left:8px;font-size:28px;cursor:pointer;color:inherit;z-index:99;width:32px;text-align:center;opacity:.4}html body[for="html-export"]:not([data-presentation-mode])[html-show-sidebar-toc] #sidebar-toc-btn{opacity:1}html body[for="html-export"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc{position:fixed;top:0;left:0;width:300px;height:100%;padding:32px 0 48px 0;font-size:14px;box-shadow:0 0 4px rgba(150,150,150,0.33);box-sizing:border-box;overflow:auto;background-color:inherit}html body[for="html-export"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc::-webkit-scrollbar{width:8px}html body[for="html-export"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc::-webkit-scrollbar-track{border-radius:10px;background-color:transparent}html body[for="html-export"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc::-webkit-scrollbar-thumb{border-radius:5px;background-color:rgba(150,150,150,0.66);border:4px solid rgba(150,150,150,0.66);background-clip:content-box}html body[for="html-export"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc a{text-decoration:none}html body[for="html-export"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc ul{padding:0 1.6em;margin-top:.8em}html body[for="html-export"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc li{margin-bottom:.8em}html body[for="html-export"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc ul{list-style-type:none}html body[for="html-export"]:not([data-presentation-mode])[html-show-sidebar-toc] .markdown-preview{left:300px;width:calc(100% - 300px);padding:2em calc(50% - 457px - 150px);margin:0;box-sizing:border-box}@media screen and (max-width:1274px){html body[for="html-export"]:not([data-presentation-mode])[html-show-sidebar-toc] .markdown-preview{padding:2em}}@media screen and (max-width:450px){html body[for="html-export"]:not([data-presentation-mode])[html-show-sidebar-toc] .markdown-preview{width:100%}}html body[for="html-export"]:not([data-presentation-mode]):not([html-show-sidebar-toc]) .markdown-preview{left:50%;transform:translateX(-50%)}html body[for="html-export"]:not([data-presentation-mode]):not([html-show-sidebar-toc]) .md-sidebar-toc{display:none}
/* Please visit the URL below for more information: */
/* https://shd101wyy.github.io/markdown-preview-enhanced/#/customize-css */
.markdown-preview.markdown-preview h1,
.markdown-preview.markdown-preview h2,
.markdown-preview.markdown-preview h3,
.markdown-preview.markdown-preview h4,
.markdown-preview.markdown-preview h5,
.markdown-preview.markdown-preview h6 {
font-weight: bolder;
text-decoration-line: underline;
}
</style>
</head>
<body for="html-export">
<div class="mume markdown-preview ">
<h1 class="mume-header" id="definition">Definition</h1>
<p>An edge can be defined as boundary between regions in an image<sup class="footnote-ref"><a href="#fn1" id="fnref1">[1]</a></sup>. Edge detection techniques we'll learn in this course builds upon what we've learned from our lessons in kernel convolution. It is the process of using kernels to reduce the information in our data and preserving only the necessary structural properties in our image<sup class="footnote-ref"><a href="#fn1" id="fnref1:1">[1:1]</a></sup>.</p>
<h1 class="mume-header" id="gradient-based-edge-detection">Gradient-based Edge Detection</h1>
<p>Gradient points in the direction of the most rapid increase in intensity. When we apply a gradient based edge detection method, we are searching for the maximum and minimum in the first derivative of the image.</p>
<p>When we apply our convolution onto the image, we are finding for regions in the image where there's a sharp change in intensity or color. Arguably the most common edge detection method using this approach is the Sobel Operator.</p>
<h2 class="mume-header" id="sobel-operator">Sobel Operator</h2>
<p>The <code>Sobel</code> operator applies a filtering operation to produce an image output where the edge is emphasized. It convolves our original image using two 3x3 kernels to capture approximations of the derivatives in both the horizontal and vertical directions.</p>
<p>The x-direction and y-direction kernels would be:</p>
<p></p><div class="mathjax-exps">$$G_x = \begin{bmatrix} 1 & 0 & -1 \\ 2 & 0 & -2 \\ 1 & 0 & -1 \end{bmatrix} G_y = \begin{bmatrix} 1 & 2 & 1 \\ 0 & 0 & 0 \\ -1 & -2 & -1 \end{bmatrix}$$</div><p></p>
<p>Each kernel is applied separately to obtain the gradient component in each orientation, <span class="mathjax-exps">$G_x$</span> and <span class="mathjax-exps">$G_y$</span>. Expressed in formula, the gradient magnitude is:<br>
</p><div class="mathjax-exps">$$|G| = \sqrt{G^2_x + G^2_y}$$</div><p></p>
<p>Where the slope <span class="mathjax-exps">$\theta$</span> of the gradient is calculated as follow:<br>
</p><div class="mathjax-exps">$$\theta(x,y)=tan^{-1}(\frac{G_y}{G_x})$$</div><p></p>
<p>If the two formula above confuses you, read on as we unpack these ideas one at a time.</p>
<h3 class="mume-header" id="intuition-discrete-derivative">Intuition: Discrete Derivative</h3>
<p>In computer vision literature, you'll often hear about "taking the derivative" and this may erve as a source of confusion for beginning practitioners since "derivatives" is often thought of in the context of a continuous function. Images are a 2D matrix of discrete values, so how do we wrap our head around the idea of finding derivative?</p>
<p>But why do we even bother with derivatives when this course is suppopsed to be about edge detection in images?</p>
<p><img src="assets/derivatives.png" alt></p>
<p>Among the many ways to answer the question, my favorite being that image is really just a function. When it treat an image as a function, the utility of taking derivatives become a little more obvious. In the image below, supposed you want to count the number of windows in this area of Venezia Sestiere Cannaregio, your program can look for large derivatives since there are sharp changes in pixel intensity from the windows to the surrounding wall:</p>
<p><img src="assets/surface.png" alt></p>
<p>The code to generate the surface plot above is in <code>img2surface.py</code>.</p>
<p>Going back to our x-direction kernel in the Sobel Operator.<br>
This kernel has all 0 in the middle, which is quite easy to intuit about. Essentially, for each pixel in our image, we want to compute its derivative in the x-direction by approximating a formula that you may have come across in your calculus class:</p>
<p></p><div class="mathjax-exps">$$f'(x) = \lim_{h\to0}\frac{f(x+h)-f(x)}{h}$$</div><p></p>
<p>This approximation is also called 'forward difference', because we're taking a value of <span class="mathjax-exps">$x$</span>, and computing the difference in <span class="mathjax-exps">$f(x)$</span> as we increment it by a small amount forward, denoted as <span class="mathjax-exps">$h$</span>.</p>
<p>And as it turns out, using the 'central difference' to compute the derivative of our discrete signal can deliver better results<sup class="footnote-ref"><a href="#fn2" id="fnref2">[2]</a></sup>:</p>
<p></p><div class="mathjax-exps">$$f'(x) = \lim_{h\to0}\frac{f(x+0.5h)-f(x-0.5h)}{h}$$</div><p></p>
<p>To make this more concrete, we can plug the formula into an actual array of pixels:</p>
<p></p><div class="mathjax-exps">$$[0, 255, 65, \underline{180}, 255, 255, 255]$$</div><p></p>
<p>when we set <span class="mathjax-exps">$h=2$</span> at the center pixel (index of value 180), we have the following:</p>
<p></p><div class="mathjax-exps">$$\begin{aligned} f'(x) & = \lim_{h\to0}\frac{f(x+0.5h)-f(x-0.5h)}{h}\\ & = \frac{f(x+1)-f(x-1)}{2} \\ & = \frac{255-65}{2} \\ & = 95 \end{aligned}$$</div><p></p>
<p>Notice that a large part of the calculation we just perform is synonymous to a 1D convolution operation using a <span class="mathjax-exps">$\begin{bmatrix} -1 & 0 & 1 \end{bmatrix}$</span> kernel.</p>
<p>When the same 1x3 kernel <span class="mathjax-exps">$\begin{bmatrix} -1 & 0 & 1 \end{bmatrix}$</span> is applied on the right-most part of the image where its just white space ([..., 255, 255, 255]) the kernel would evaluate to 0. In other words, our derivative filter returns no response where it can't detect a sharp change in pixel intensity.</p>
<p>As a reminder, the x-direction kernel in our Sobel Operator is the following:<br>
</p><div class="mathjax-exps">$$G_x = \begin{bmatrix} 1 & 0 & -1 \\ 2 & 0 & -2 \\ 1 & 0 & -1 \end{bmatrix}$$</div><p></p>
<p>This takes our 1x3 kernel and instead of convolving one row of pixels at a time, extends it to convolve at 3x3 neighborhoods at a time using a weighted average approach.</p>
<h3 class="mume-header" id="code-illustrations-sobel-operator">Code Illustrations: Sobel Operator</h3>
<p>The two kernels (one for horizontal and another for vertical edge detection) can be constructed, respectively, like the following:</p>
<pre data-role="codeBlock" data-info="py" class="language-python">sobel_x <span class="token operator">=</span> np<span class="token punctuation">.</span>array<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
<span class="token punctuation">[</span><span class="token number">2</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token operator">-</span><span class="token number">2</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
<span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
sobel_y <span class="token operator">=</span> np<span class="token punctuation">.</span>array<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
<span class="token punctuation">[</span><span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">,</span> <span class="token operator">-</span><span class="token number">2</span><span class="token punctuation">,</span> <span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
</pre><p>You may have guessed that, given its role in digital image processing, <code>opencv</code> have included a method that performs our Sobel Operator for us, and thankfully there is. Here's an example of using the <code>cv2.Sobel(src, ddepth, dx, dy, dst=None, ksize)</code> method:</p>
<pre data-role="codeBlock" data-info="py" class="language-python">gradient_x <span class="token operator">=</span> cv2<span class="token punctuation">.</span>Sobel<span class="token punctuation">(</span>img<span class="token punctuation">,</span> cv2<span class="token punctuation">.</span>CV_64F<span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> ksize<span class="token operator">=</span><span class="token number">3</span><span class="token punctuation">)</span>
gradient_y <span class="token operator">=</span> cv2<span class="token punctuation">.</span>Sobel<span class="token punctuation">(</span>img<span class="token punctuation">,</span> cv2<span class="token punctuation">.</span>CV_64F<span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">,</span> ksize<span class="token operator">=</span><span class="token number">3</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"Range: </span><span class="token interpolation"><span class="token punctuation">{</span>np<span class="token punctuation">.</span><span class="token builtin">min</span><span class="token punctuation">(</span>gradient_x<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string"> | </span><span class="token interpolation"><span class="token punctuation">{</span>np<span class="token punctuation">.</span><span class="token builtin">max</span><span class="token punctuation">(</span>gradient_x<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span>
<span class="token comment"># Range: -177.0 | 204.0</span>
gradient_x <span class="token operator">=</span> np<span class="token punctuation">.</span>uint8<span class="token punctuation">(</span>np<span class="token punctuation">.</span>absolute<span class="token punctuation">(</span>gradient_x<span class="token punctuation">)</span><span class="token punctuation">)</span>
gradient_y <span class="token operator">=</span> np<span class="token punctuation">.</span>uint8<span class="token punctuation">(</span>np<span class="token punctuation">.</span>absolute<span class="token punctuation">(</span>gradient_y<span class="token punctuation">)</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"Range uint8: </span><span class="token interpolation"><span class="token punctuation">{</span>np<span class="token punctuation">.</span><span class="token builtin">min</span><span class="token punctuation">(</span>gradient_x<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string"> | </span><span class="token interpolation"><span class="token punctuation">{</span>np<span class="token punctuation">.</span><span class="token builtin">max</span><span class="token punctuation">(</span>gradient_x<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span>
<span class="token comment"># Range uint8: 0 | 204</span>
cv2<span class="token punctuation">.</span>imshow<span class="token punctuation">(</span><span class="token string">"Gradient X"</span><span class="token punctuation">,</span> gradient_x<span class="token punctuation">)</span>
cv2<span class="token punctuation">.</span>imshow<span class="token punctuation">(</span><span class="token string">"Gradient Y"</span><span class="token punctuation">,</span> gradient_y<span class="token punctuation">)</span>
</pre><p><img src="assets/sudokudemo.png" alt></p>
<p>The code above, extracted from <code>sobel_01.py</code> reinforces a couple of ideas that we've been working on. It shows that:</p>
<ul>
<li>the <span class="mathjax-exps">$G_x$</span> and <span class="mathjax-exps">$G_y$</span>, gradients of the image, are computed separately through the convolution of two different Sobel kernels</li>
<li><span class="mathjax-exps">$G_x$</span> and <span class="mathjax-exps">$G_y$</span> responded to the change in pixel values along the x-direction and y-direction respectively, as visualized in the illustration above</li>
<li>convolution using the two Sobel filters may, and often will, produce a value outside the range of 0 and 255. Given the presence of [-1, -2, -1] in one side of our kernel, mathematically this may lead to an output value of -1020. To store the values from these convolutions we use a 64-bit floating point (<code>cv2.CV_64F</code>). OpenCV suggests to "keep the output datatype to some higher form such as <code>cv2.CV_64F</code>, take its absolute value and then convert back to <code>cv2.CV_8U</code>.<sup class="footnote-ref"><a href="#fn3" id="fnref3">[3]</a></sup>"</li>
</ul>
<p>While the code above certainly works, OpenCV also has a method that scales, calculates absolute values and converts the result to 8-bit. <code>cv2.convertScaleAbs(src, dst, alpha=1, beta=0)</code> performs the following:<br>
</p><div class="mathjax-exps">$$dst(I) = cast<uchar>(|src(I) * \alpha + \beta|)$$</div><p></p>
<pre data-role="codeBlock" data-info="py" class="language-python">gradient_x <span class="token operator">=</span> cv2<span class="token punctuation">.</span>Sobel<span class="token punctuation">(</span>img<span class="token punctuation">,</span> cv2<span class="token punctuation">.</span>CV_64F<span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> ksize<span class="token operator">=</span><span class="token number">3</span><span class="token punctuation">)</span>
gradient_y <span class="token operator">=</span> cv2<span class="token punctuation">.</span>Sobel<span class="token punctuation">(</span>img<span class="token punctuation">,</span> cv2<span class="token punctuation">.</span>CV_64F<span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">,</span> ksize<span class="token operator">=</span><span class="token number">3</span><span class="token punctuation">)</span>
gradient_x <span class="token operator">=</span> cv2<span class="token punctuation">.</span>convertScaleAbs<span class="token punctuation">(</span>gradient_x<span class="token punctuation">)</span>
gradient_y <span class="token operator">=</span> cv2<span class="token punctuation">.</span>convertScaleAbs<span class="token punctuation">(</span>gradient_y<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"Range: </span><span class="token interpolation"><span class="token punctuation">{</span>np<span class="token punctuation">.</span><span class="token builtin">min</span><span class="token punctuation">(</span>gradient_x<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string"> | </span><span class="token interpolation"><span class="token punctuation">{</span>np<span class="token punctuation">.</span><span class="token builtin">max</span><span class="token punctuation">(</span>gradient_x<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span>
</pre><h3 class="mume-header" id="dive-deeper-gradient-orientation-magnitude">Dive Deeper: Gradient Orientation & Magnitude</h3>
<p>At the beginning of this course I said that images are really just 2d functions before showing you the intricacies of our Sobel kernels. We saw the clever design of both the x- and y-direction kernels, by borrowing from the concept of "taking the derivatives" you often see in calculus text books.</p>
<p>But on a really basic level, these kernels only return the x and y edge responses. These are <strong>not the image gradient</strong>, just pure arithmetic values from following the convolution process. To get to the final form (where the edges in our image are emphasized) we still need to compute the gradient direction and magnitude for each point in our image.</p>
<p>This brings us back to our original formula. Recall that the x-direction and y-direction kernels are:</p>
<p></p><div class="mathjax-exps">$$G_x = \begin{bmatrix} 1 & 0 & -1 \\ 2 & 0 & -2 \\ 1 & 0 & -1 \end{bmatrix} G_y = \begin{bmatrix} 1 & 2 & 1 \\ 0 & 0 & 0 \\ -1 & -2 & -1 \end{bmatrix}$$</div><p></p>
<p>We understand that each kernel is applied separately to obtain the gradient component in each orientation, <span class="mathjax-exps">$G_x$</span> and <span class="mathjax-exps">$G_y$</span>. What is the significance of this? Well as it turns out if we know the shift in the x-direction and the corresponding change in value in the y-direction, then we can use the pythagorean theorem to approximate the "length of the slope", a concept that many of you are familiar with.</p>
<p>Expressed in formula, the gradient magnitude is hence:<br>
</p><div class="mathjax-exps">$$|G| = \sqrt{G^2_x + G^2_y}$$</div><p></p>
<p>Along with the well-known mathematical formula that is Pythagorean theorem, some of you may also have some familiarity with the three trigonometric functions. Particularly, the tangent function tells us that in a right triangle, the <strong>tangent of an angle is the length of the opposite side divided by the length of the adjacent side</strong>.</p>
<p>This leads us to the following expression:<br>
</p><div class="mathjax-exps">$$tan(\theta_{(x,y)})=\frac{G_y}{G_x}$$</div><p></p>
<p>To rewrite the expression above, we arrive at the formula to capture the gradient's direction:<br>
</p><div class="mathjax-exps">$$\theta_{(x,y)}=tan^{-1}(\frac{G_y}{G_x})$$</div><p></p>
<p><img src="assets/2dfuncs.png" alt></p>
<p>This whole idea is also illustrated in code, and the script is provided to you:</p>
<ul>
<li><code>gradient.py</code> to generate the vector field in the picture above (right)</li>
<li><code>img2surface.py</code> on the penguin image in the <code>assets</code> folder generates the surface plot</li>
</ul>
<p>Succinctly, supposed the two 3x3 kernels do not fire a response (for example when no edges are detected in the white background of our penguin), both <span class="mathjax-exps">$G_x$</span> and <span class="mathjax-exps">$G_y$</span> will be 0, which leads to a gradient magnitude of 0. You can compute these by hand, let OpenCV's implementation handle that for you, or use <code>numpy</code> as illustrated in <code>gradient.py</code>:</p>
<pre data-role="codeBlock" data-info="py" class="language-python">dY<span class="token punctuation">,</span> dX <span class="token operator">=</span> np<span class="token punctuation">.</span>gradient<span class="token punctuation">(</span>img<span class="token punctuation">)</span>
</pre><h1 class="mume-header" id="image-segmentation">Image Segmentation</h1>
<p>Image segmentation is the process of decomposing an image into parts for further analysis. This has many utility:</p>
<ul>
<li>Background subtraction in human motion analysis</li>
<li>Multi-object classification</li>
<li>Find region of interest for OCR (optical character recognition)</li>
<li>Count pedestrians from a streamed video source</li>
<li>Isolating vehicle registration plates (license plate) and vehicle models from a busy highway scene</li>
</ul>
<p>Current literature on image segmentation techniques can be classified into<sup class="footnote-ref"><a href="#fn4" id="fnref4">[4]</a></sup>:</p>
<ul>
<li>Intensity-based segmentation</li>
<li>Edge-based segmentation</li>
<li>Region-based semantic segmentation</li>
</ul>
<p>It's important to note, however, that the rise in popularity of deep learning framework and techniques has ushered a proliferation of new methods to perform what was once a highly difficult task. In future lectures, we'll explore image segmentation in far greater details. In this course, we'll study intensity-based segmentation and edge-based segmentation methods.</p>
<h2 class="mume-header" id="intensity-based-segmentation">Intensity-based Segmentation</h2>
<p>Intensity-based method is perhaps the simplest as intensity is the simplest property that pixels can share.</p>
<p>To make a more concrete case of this, let's assume you're working with a team of researchers to build an AI-based "sudoku solver" that, unimaginatively, will compete against human sudoku players in an attempt to further stake the claim in an ongoing debate of AI superiority.</p>
<p>While your teammates work on the algorithmic design for the actual solver, your task is comparatively straightforward: write a script to scan newspaper images (or print magazines), binarize them to discard everything except the digits in the sudoku puzzle.</p>
<p>This presents a great opportunity to use an intensity-based segmentation technique we spoke about earlier.</p>
<p>In <code>intensitytresholding_01.py</code>, you'll find a code demonstration of the numerous thresholding methods provided by OpenCV. In total, there are 5 simple thresholding methods: <code>THRESH_BINARY</code>, <code>THRESH_BINARY_INV</code>, <code>THRESH_TRUNC</code>, <code>THRESH_TOZERO</code> and <code>THRESH_TOZERO_INV</code><sup class="footnote-ref"><a href="#fn5" id="fnref5">[5]</a></sup>.</p>
<h3 class="mume-header" id="simple-thresholding">Simple Thresholding</h3>
<p>The method call between all of them are identical:</p>
<pre data-role="codeBlock" data-info="py" class="language-python">cv2<span class="token punctuation">.</span>threshold<span class="token punctuation">(</span>img<span class="token punctuation">,</span> thresh<span class="token punctuation">,</span> maxval<span class="token punctuation">,</span> <span class="token builtin">type</span><span class="token punctuation">)</span>
</pre><p>We specify our source image <code>img</code> (usually in grayscale), a threshold value <code>thresh</code> used to binarize the image pixels, and a max value <code>maxval</code> for the pixel value to use for any pixel that crosses our threshold.</p>
<p>The mathematical functions for each one of them:<br>
<img src="assets/threshmethods.png" alt></p>
<p>They're collectively known as <strong>simple thresholding</strong> in OpenCV because they use a global threshold value; Any pixels smaller than the threshold is set to 0 otherwise it is set to the <code>maxval</code> value.</p>
<p>The probably sound too simplistic for anything beyond the simplest of real-world images, and for the majority of cases they are. They call for proper judgment of the task at hand.</p>
<p>Applying the various types of simple thresholding method on our sudoku image, we observe that the digits are for the most part extracted successfully while the background information are greatly reduced:</p>
<p><img src="assets/sudoku_simple.png" alt></p>
<p>Refer to<code>intensitythresholding_01.py</code> for the full code.</p>
<p>As a simple homework, try to practice <strong>simple thresholding</strong> on the <code>car2.png</code> located in your <code>homework</code> folder. To reduce noise, you may have to combine a blurring operation prior to thresholding. As you practice, pay attention to the interaction between your threshold values and the output. Later in the course, you'll learn how to draw contours, which would come in handy in producing the final output:</p>
<p><img src="assets/cars_hw.png" alt></p>
<p>As you work on your homework, you will notice that given the varying lighting condition across the different region of our image, regardless of the global value we pick we either have a threshold value that is too low or too high.</p>
<h3 class="mume-header" id="adaptive-thresholding">Adaptive Thresholding</h3>
<p>Using a global value as an intensity threshold may work in particular cases but may be overly naive to perform well when, say, an image has different lighting conditions in different areas. A great example of this case is the object extraction exercise you performed using <code>car2.png</code>.</p>
<p>Adaptive thresholding is not a lot different from the aforementioned thresholding techniques, except it determines the threshold for each pixel based on its neighborhood. This in effect mans that the image is assigned different thresholds across the different regions, leading to a cleaner output when our image has different degrees of illumination.</p>
<p><img src="assets/cars_adaptive.png" alt></p>
<p>The method is called with the source image (<code>src</code>), a max value (<code>maxValue</code>), the method (<code>adaptiveMethod</code>), a threshold type (<code>thresholdType</code>), the size of the neighborhood (<code>blockSize</code>) and a constant (<code>C</code>) that is subtracted from the mean or the weightted sum of the neighborhood pixels.</p>
<pre data-role="codeBlock" data-info="py" class="language-python">mean_adaptive <span class="token operator">=</span> cv2<span class="token punctuation">.</span>adaptiveThreshold<span class="token punctuation">(</span>
img<span class="token punctuation">,</span> <span class="token number">255</span><span class="token punctuation">,</span> cv2<span class="token punctuation">.</span>ADAPTIVE_THRESH_MEAN_C<span class="token punctuation">,</span> cv2<span class="token punctuation">.</span>THRESH_BINARY<span class="token punctuation">,</span> <span class="token number">11</span><span class="token punctuation">,</span> <span class="token number">2</span>
<span class="token punctuation">)</span>
gaussian_adaptive <span class="token operator">=</span> cv2<span class="token punctuation">.</span>adaptiveThreshold<span class="token punctuation">(</span>
img<span class="token punctuation">,</span> <span class="token number">255</span><span class="token punctuation">,</span> cv2<span class="token punctuation">.</span>ADAPTIVE_THRESH_GAUSSIAN_C<span class="token punctuation">,</span> cv2<span class="token punctuation">.</span>THRESH_BINARY<span class="token punctuation">,</span> <span class="token number">11</span><span class="token punctuation">,</span> <span class="token number">2</span>
<span class="token punctuation">)</span>
</pre><p>The code, taken from <code>adaptivethresholding_01.py</code> produces the following:<br>
<img src="assets/sudoku_binary.png" alt></p>
<h2 class="mume-header" id="edge-based-contour-estimation">Edge-based contour estimation</h2>
<p>Edge-based segmentation separates foreground objects by first identifying all edges in our image. Sobel Operator and other gradient-based filter function are good and well-known candidates for such an operation.<sup class="footnote-ref"><a href="#fn6" id="fnref6">[6]</a></sup></p>
<p>Once we obtain the edges, we perform the contour approximation operation using the <code>findContours</code> method in OpenCV. But what exactly are contours?</p>
<p>In OpenCV's words<sup class="footnote-ref"><a href="#fn7" id="fnref7">[7]</a></sup>,</p>
<blockquote>
<p>Contours can be explained simply as a curve joining all the continuous points (along the boundary), having same color or intensity. The contours are a useful tool for shape analysis and object detection and recognition.</p>
</blockquote>
<p>If we have "a curve joining all the continuous points along the boundary", then we are able to extract this object. If we wish to count the number of contours in our image, the method also convenient return a list of all the found contours, making it easy to perform <code>len()</code> on the list to retrieve the final value.</p>
<p>There are three arguments to the <code>findContours()</code> function, first being the source image, second is the retrieval mode and last is the contour approximation method. Both the contour retrieval mode and approximation method is discussed in the next sub-section.</p>
<pre data-role="codeBlock" data-info="py" class="language-python"><span class="token punctuation">(</span>cnts<span class="token punctuation">,</span> hierarchy<span class="token punctuation">)</span> <span class="token operator">=</span> cv2<span class="token punctuation">.</span>findContours<span class="token punctuation">(</span>
img<span class="token punctuation">,</span>
cv2<span class="token punctuation">.</span>RETR_EXTERNAL<span class="token punctuation">,</span>
cv2<span class="token punctuation">.</span>CHAIN_APPROX_SIMPLE<span class="token punctuation">,</span>
<span class="token punctuation">)</span>
</pre><p>The function returns the contours and hierarchy, with contours being a list of all the contours in the image. Each contour is a Numpy array of <code>(x,y)</code> coordinates of boundary points of the object, giving each contour a shape of <code>(n, x, y)</code>.</p>
<p>What this allow us to do, is to combine the contours we retrieved with the <code>cv2.drawContours()</code> function either individually, exhaustively in a for-loop fashion, or just everything in one go.</p>
<p>Assuming <code>img</code> being the image we want to draw our contours on, the following code demonstrates these different methods:</p>
<pre data-role="codeBlock" data-info="py" class="language-python"><span class="token comment"># draw all contours</span>
cv2<span class="token punctuation">.</span>drawContours<span class="token punctuation">(</span>img<span class="token punctuation">,</span> cnts<span class="token punctuation">,</span> <span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">,</span> <span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span><span class="token number">255</span><span class="token punctuation">,</span><span class="token number">0</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token number">3</span><span class="token punctuation">)</span>
<span class="token comment"># draw the 3rd contour</span>
cv2<span class="token punctuation">.</span>drawContours<span class="token punctuation">(</span>img<span class="token punctuation">,</span> cnts<span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">,</span> <span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span><span class="token number">255</span><span class="token punctuation">,</span><span class="token number">0</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token number">3</span><span class="token punctuation">)</span>
<span class="token comment"># draw the first, fourth and fifth contour</span>
cnt_selected <span class="token operator">=</span> <span class="token punctuation">[</span>cnts<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">,</span> cnts<span class="token punctuation">[</span><span class="token number">3</span><span class="token punctuation">]</span><span class="token punctuation">,</span> cnts<span class="token punctuation">[</span><span class="token number">4</span><span class="token punctuation">]</span><span class="token punctuation">]</span>
cv2<span class="token punctuation">.</span>drawContours<span class="token punctuation">(</span>canvas<span class="token punctuation">,</span> cnt_selected<span class="token punctuation">,</span> <span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">,</span> <span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">255</span><span class="token punctuation">,</span> <span class="token number">255</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">)</span>
<span class="token comment"># draw the fourth contour</span>
cv2<span class="token punctuation">.</span>drawContours<span class="token punctuation">(</span>img<span class="token punctuation">,</span> contours<span class="token punctuation">,</span> <span class="token number">3</span><span class="token punctuation">,</span> <span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span><span class="token number">255</span><span class="token punctuation">,</span><span class="token number">0</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token number">3</span><span class="token punctuation">)</span>
</pre><p>The first argument to this function being the source image, the second is the contours as a Python list, the third is the index of contours and remaining arguments are color and thickness of contour lines respectively.</p>
<p>One common problem beginners can run into is to perform the <code>findContours</code> operation on the grayscale image instead of the binary image, leading to poorer accuracy.</p>
<p>When we execute <code>contour_01.py</code>, we notice that the <code>drawContour</code> operation yields the following output:</p>
<p><img src="assets/handholding.png" alt></p>
<p>There are 5 occurrences where our <code>findContours</code> function incorrectly approximated the wrong contour because two penguins were too close to each other. When we execute <code>len(cnts)</code>, we will find that the returned value is 5 less than the actual count.</p>
<p>Try to fix <code>contour_01.py</code> by performing the contour approximation on our binary image using the thresholding technique you've learned in previous section.</p>
<h3 class="mume-header" id="contour-retrieval-and-approximation">Contour Retrieval and Approximation</h3>
<p>In the <code>findContours()</code> function call, we passed our image to <code>src</code> in the first argumet. The second argument is the contour retrieval mode, and there are documentation for 4 of them<sup class="footnote-ref"><a href="#fn8" id="fnref8">[8]</a></sup>:</p>
<ul>
<li><code>RETR_EXTERNAL</code>: retrieves only the extreme outer contours (see image below for reference)</li>
<li><code>RETR_LIST</code>: retrieves all contours without establishing any hierarchical relationships</li>
<li><code>RETR_CCOMP</code>: retrieves all contours and organize them into a two-level hierarchy (external boundary + boundaries of the holes)</li>
<li><code>RETR_TREE</code>: retrieves all of the contours and reconstructs a full hierarchy of nested contours</li>
</ul>
<p><img src="assets/outervsall.png" alt></p>
<p>In our case, we don't particularly care about the hierarchy, and so the second to fourth method all has the same effect. In other cases, you may experiment with a different contour retrieval method to obtain both the contours and the hierarchy for further processing.</p>
<p>What about the last parameter passed to our <code>findContours</code> method?</p>
<p>Recall that contours are just boundaries of a shape? In a sense, it is an array of <code>(x,y)</code> coordinates used to "record" the boundary of a shape. Given this collection of coordinates, we can then recreate the boundary of our shape. This begs the next question: how many set of coordinates do we need to store to recreate our boundary?</p>
<p>Supposed we perform the <code>findContour</code> operation on an image of two rectangles, one method it may use to achieve that is to store as many points around these rectangle boxes as possible? When we set <code>cv2.CHAIN_APPROX_NONE</code>, that is in fact what the algorithm would do, resulting in 658 points around the border of the top rectangle:<br>
<img src="homework/equal.png" alt></p>
<p>However, notice the more efficient solution would have been to store only the 4 coordinates at each corner of the rectangle. The contour is perfectly represented and recreated using just 4 points for each rectangle, resulting in a total number of 8 points compared to 1,316 points. <code>cv2.CHAIN_APPROX_SIMPLE</code><sup class="footnote-ref"><a href="#fn9" id="fnref9">[9]</a></sup> is an implementation of this, and you can find the sample code below:</p>
<p><img src="assets/approx.png" alt></p>
<pre data-role="codeBlock" data-info="py" class="language-python">cnts<span class="token punctuation">,</span> _ <span class="token operator">=</span> cv2<span class="token punctuation">.</span>findContours<span class="token punctuation">(</span>
<span class="token comment"># does this need to be changed?</span>
edged<span class="token punctuation">,</span>
cv2<span class="token punctuation">.</span>RETR_EXTERNAL<span class="token punctuation">,</span>
cv2<span class="token punctuation">.</span>CHAIN_APPROX_SIMPLE<span class="token punctuation">,</span>
<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"Cnts Simple Shape (1): </span><span class="token interpolation"><span class="token punctuation">{</span>cnts<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">.</span>shape<span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span>
<span class="token comment"># return: Cnts Simple Shape (1): (4, 1, 2)</span>
<span class="token comment"># output of cnts[0]:</span>
<span class="token comment"># array([[[ 47, 179]],</span>
<span class="token comment"># [[ 47, 259]],</span>
<span class="token comment"># [[296, 259]],</span>
<span class="token comment"># [[296, 179]]], dtype=int32)</span>
cnts2<span class="token punctuation">,</span> _ <span class="token operator">=</span> cv2<span class="token punctuation">.</span>findContours<span class="token punctuation">(</span>
<span class="token comment"># does this need to be changed?</span>
edged<span class="token punctuation">,</span>
cv2<span class="token punctuation">.</span>RETR_EXTERNAL<span class="token punctuation">,</span>
cv2<span class="token punctuation">.</span>CHAIN_APPROX_NONE<span class="token punctuation">,</span>
<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"Cnts NoApprox Shape:</span><span class="token interpolation"><span class="token punctuation">{</span>cnts2<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">.</span>shape<span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span>
<span class="token comment"># Cnts NoApprox Shape:(658, 1, 2)</span>
</pre><p>The full script for the experiment above is in <code>contourapprox.py</code>.</p>
<p>You may, at this point, hop to the Learn By Building section to attempt your homework.</p>
<h1 class="mume-header" id="canny-edge-detector">Canny Edge Detector</h1>
<p>John Canny developed a multi-stage procedure that, some 30 years later, is "still a state-of-the-art edge detector"<sup class="footnote-ref"><a href="#fn10" id="fnref10">[10]</a></sup>. Better edge detection algorithms usually require greater computational resources -- and consequently -- longer processing times -- or a greater number of parameters, in an area where algorithm speed is oftentimes the most important criteria. For the reasons above along with its general robustness, the canny edge algorithm has become one of the "most important methods to find edges" even in modern literature<sup class="footnote-ref"><a href="#fn1" id="fnref1:2">[1:2]</a></sup>.</p>
<p>I said it's a multi-stage procedure, because the technique as described in his original paper, <em>computational theory of edge detection</em>, works as follow<sup class="footnote-ref"><a href="#fn11" id="fnref11">[11]</a></sup>:</p>
<ol>
<li>Gaussian smoothing
<ul>
<li>Noise reduction using a 5x5 Gaussian filter</li>
</ul>
</li>
<li>Compute gradient magnitudes and angles</li>
<li>Apply non-maximum suppression (NMS)
<ul>
<li>Suppress close-by edges that are non-maximal, leaving only local maxima as edges</li>
</ul>
</li>
<li>Track edge by hysterisis
<ul>
<li>Suppress all other edges that are weak and not connected to strong edges and link the edges</li>
</ul>
</li>
</ol>
<p>Step (1) and (2) in the procedure above can be achieved using code we've written so far in our Sobel Operator scripts. We use the Sobel mask filters to compute <span class="mathjax-exps">$G_x$</span> and <span class="mathjax-exps">$G_y$</span>, respectively the gradient component in each orientation. We then compute the gradient magnitude and the angle <span class="mathjax-exps">$\theta$</span>:</p>
<p>Gradient magnitude:<br>
</p><div class="mathjax-exps">$$|G| = \sqrt{G^2_x + G^2_y}$$</div><p></p>
<p>And recall that the slope <span class="mathjax-exps">$\theta$</span> of the gradient is calculated as follow:<br>
</p><div class="mathjax-exps">$$\theta(x,y)=tan^{-1}(\frac{G_y}{G_x})$$</div><p></p>
<h2 class="mume-header" id="edge-thinning">Edge Thinning</h2>
<p>Step (3) in the procedure is another common technique in computer vision known as the non-maximum suppression (NMS). Let's begin by taking a look at the output of our Sobel edge detector from earlier exercises:<br>
<img src="assets/sobeledges.png" alt></p>
<p>Notice as we zoom in on the output image, we can see the gradient-based method did create our strong edges, but it also created "weak" edges it find in our image. Because it is not a parameterized function -- the edge is computed using values of the gradient magnitude and direction -- we have to rely on an additional mechanism for the edge thinning operation with the criterion being one accurate response to any given edge<sup class="footnote-ref"><a href="#fn12" id="fnref12">[12]</a></sup>.</p>
<p>Non-maximum suppression help us obtain the strongest edge by suppressing all the gradient values, i.e. setting them to 0 except for the local maxima, which indicate locations with the sharpest change of intensity value. In the words of <code>OpenCV</code>:</p>
<blockquote>
<p>After getting gradient magnitude and direction, a full scan of image is done to remove any unwanted pixels which may not constitute the edge. For this, at every pixel, pixel is checked if it is a local maximum in its neighborhood in the direction of gradient. If point A is on the edge, and point B and C are in gradient directions, point A is checked with point B and C to see if it forms a local maximum. If so, it is considered for next stage, otherwise, it is suppressed (put to zero).</p>
</blockquote>
<p>The output of step (3) is a binary image with thin edges.</p>
<p>The code<sup class="footnote-ref"><a href="#fn13" id="fnref13">[13]</a></sup> demonstrates how you would code such an NMS for the purpose of canny edge detection.</p>
<h2 class="mume-header" id="hysterisis-thresholding">Hysterisis Thresholding</h2>
<p>The final step of this multi-stage algorithm decides which among all edges are really edges and which of them are not. It accomplishes this using two threshold values, specified when we call the <code>cv2.Canny()</code> function:</p>
<pre data-role="codeBlock" data-info="py" class="language-python">canny <span class="token operator">=</span> cv2<span class="token punctuation">.</span>Canny<span class="token punctuation">(</span>img<span class="token punctuation">,</span> threshold1<span class="token operator">=</span><span class="token number">50</span><span class="token punctuation">,</span> threshold2<span class="token operator">=</span><span class="token number">180</span><span class="token punctuation">)</span>
</pre><p>Any edges with an intensity gradient above <code>threshold2</code> are considered edges and any edges below <code>threshold1</code> are considered non-edges and so are suppressed.</p>
<p>The edges that lie between these two values (in our code above, edges with intensity gradient between 50 and 180) are classified as edges <strong>if they are connected to sure-edge pixels</strong> (the ones above 180) otherwise they are also discarded.</p>
<p>This stage also removes small pixels ("noises") on the assumption that edges are long lines ("connected").</p>
<p>The full procedure is implemented in a single function, <code>cv2.Canny()</code> and the first three parameters are required, respectively being the input image, the first and second threshold value. <code>canny_01.py</code> implements this and compare that to the Sobel Edge detector we developed earlier:</p>
<p><img src="assets/sobelvscanny.png" alt></p>
<h2 class="mume-header" id="learn-by-building">Learn By Building</h2>
<p>In the <code>homework</code> directory, you'll find a picture of scattered lego bricks <code>lego.jpg</code>. Exactly the kind of stuff you don't want on your bedroom floor, as anyone living with kids at home would testify.</p>
<p>Your job is to apply what you've learned in this lesson to combine what you've learned from the class in kernel convolutions and Edge Detection (<code>kernel.md</code>) to build a lego brick counter.</p>
<p>Note that there are many ways you can build an edge detection. Given what you've learned so far, there are at least 3 equally adequate routines you can apply for this particular problem set.</p>
<p>For the sake of this exercise, your script should feature the use of a Sobel Operator (or a similar gradient-based edge detection method) since this is the main topic of this chapter.</p>
<p><img src="homework/lego.jpg" alt></p>
<h2 class="mume-header" id="references">References</h2>
<hr class="footnotes-sep">
<section class="footnotes">
<ol class="footnotes-list">
<li id="fn1" class="footnote-item"><p>S.Kaur, I.Singh, Comparison between Edge Detection Techniques, International Journal of Computer Applications, July 2016 <a href="#fnref1" class="footnote-backref">↩︎</a> <a href="#fnref1:1" class="footnote-backref">↩︎</a> <a href="#fnref1:2" class="footnote-backref">↩︎</a></p>
</li>
<li id="fn2" class="footnote-item"><p>Carnegie Mellon University, Image Gradients and Gradient Filtering (16-385 Computer Vision) <a href="#fnref2" class="footnote-backref">↩︎</a></p>
</li>
<li id="fn3" class="footnote-item"><p>Image Gradients, <a href="https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_gradients/py_gradients.html">OpenCV Documentation</a> <a href="#fnref3" class="footnote-backref">↩︎</a></p>
</li>
<li id="fn4" class="footnote-item"><p>University of Victoria, Electrical and Computer Engineering, Computer Vision: Image Segmentation <a href="#fnref4" class="footnote-backref">↩︎</a></p>
</li>
<li id="fn5" class="footnote-item"><p>Image Thresholding, <a href="https://docs.opencv.org/master/d7/d4d/tutorial_py_thresholding.html">OpenCV Documentation</a> <a href="#fnref5" class="footnote-backref">↩︎</a></p>
</li>
<li id="fn6" class="footnote-item"><p>C.Leubner, A Framework for Segmentation and Contour Approximation in Computer-Vision Systems, 2002 <a href="#fnref6" class="footnote-backref">↩︎</a></p>
</li>
<li id="fn7" class="footnote-item"><p>Contours: Getting Started, <a href="https://docs.opencv.org/trunk/d4/d73/tutorial_py_contours_begin.html">OpenCV Documentation</a> <a href="#fnref7" class="footnote-backref">↩︎</a></p>
</li>
<li id="fn8" class="footnote-item"><p>Structural Analysis and Shape Descriptors, <a href="https://docs.opencv.org/master/d3/dc0/group__imgproc__shape.html#ga819779b9857cc2f8601e6526a3a5bc71">OpenCV Documentation</a> <a href="#fnref8" class="footnote-backref">↩︎</a></p>
</li>
<li id="fn9" class="footnote-item"><p>Contours Hierarchy, <a href="https://docs.opencv.org/trunk/d9/d8b/tutorial_py_contours_hierarchy.html">OpenCV Documentation</a> <a href="#fnref9" class="footnote-backref">↩︎</a></p>
</li>
<li id="fn10" class="footnote-item"><p>Shapiro, L. G. and Stockman, G. C, Computer Vision, London etc, 2001 <a href="#fnref10" class="footnote-backref">↩︎</a></p>
</li>
<li id="fn11" class="footnote-item"><p>Bastan, M., Bukhari, S., and Breuel, T., Active Canny: Edge Detection and Recovery with Open Active Contour Models, Technical University of Kaiserslautern, 2016 <a href="#fnref11" class="footnote-backref">↩︎</a></p>
</li>
<li id="fn12" class="footnote-item"><p>Maini, R. and Aggarwal, H., Study and Comparison of various Image Edge Detection Techniques, Internal Jounral of Image Processing (IJIP) <a href="#fnref12" class="footnote-backref">↩︎</a></p>
</li>
<li id="fn13" class="footnote-item"><p><a href="https://github.com/onlyphantom/Canny-edge-detector/blob/master/nonmax_suppression.py">Example code for NMS, github</a> <a href="#fnref13" class="footnote-backref">↩︎</a></p>
</li>
</ol>
</section>
</div>
<div class="md-sidebar-toc"><ul>
<li><a href="#definition">Definition</a></li>
<li><a href="#gradient-based-edge-detection">Gradient-based Edge Detection</a>
<ul>
<li><a href="#sobel-operator">Sobel Operator</a>
<ul>
<li><a href="#intuition-discrete-derivative">Intuition: Discrete Derivative</a></li>
<li><a href="#code-illustrations-sobel-operator">Code Illustrations: Sobel Operator</a></li>
<li><a href="#dive-deeper-gradient-orientation-magnitude">Dive Deeper: Gradient Orientation & Magnitude</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#image-segmentation">Image Segmentation</a>
<ul>
<li><a href="#intensity-based-segmentation">Intensity-based Segmentation</a>
<ul>
<li><a href="#simple-thresholding">Simple Thresholding</a></li>
<li><a href="#adaptive-thresholding">Adaptive Thresholding</a></li>
</ul>
</li>
<li><a href="#edge-based-contour-estimation">Edge-based contour estimation</a>
<ul>
<li><a href="#contour-retrieval-and-approximation">Contour Retrieval and Approximation</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#canny-edge-detector">Canny Edge Detector</a>
<ul>
<li><a href="#edge-thinning">Edge Thinning</a></li>
<li><a href="#hysterisis-thresholding">Hysterisis Thresholding</a></li>
<li><a href="#learn-by-building">Learn By Building</a></li>
<li><a href="#references">References</a></li>
</ul>
</li>
</ul>
</div>
<a id="sidebar-toc-btn">≡</a>
<script>
var sidebarTOCBtn = document.getElementById('sidebar-toc-btn')
sidebarTOCBtn.addEventListener('click', function(event) {
event.stopPropagation()
if (document.body.hasAttribute('html-show-sidebar-toc')) {
document.body.removeAttribute('html-show-sidebar-toc')
} else {
document.body.setAttribute('html-show-sidebar-toc', true)
}
})
</script>
</body></html>