forked from xiahouzuoxin/notes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathOpenCV基础篇之图像频域.html
256 lines (229 loc) · 14.9 KB
/
OpenCV基础篇之图像频域.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="Content-Style-Type" content="text/css" />
<meta name="generator" content="pandoc" />
<title></title>
<style type="text/css">code{white-space: pre;}</style>
<style type="text/css">
table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
margin: 0; padding: 0; vertical-align: baseline; border: none; }
table.sourceCode { width: 100%; line-height: 100%; }
td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
td.sourceCode { padding-left: 5px; }
code > span.kw { color: #007020; font-weight: bold; }
code > span.dt { color: #902000; }
code > span.dv { color: #40a070; }
code > span.bn { color: #40a070; }
code > span.fl { color: #40a070; }
code > span.ch { color: #4070a0; }
code > span.st { color: #4070a0; }
code > span.co { color: #60a0b0; font-style: italic; }
code > span.ot { color: #007020; }
code > span.al { color: #ff0000; font-weight: bold; }
code > span.fu { color: #06287e; }
code > span.er { color: #ff0000; font-weight: bold; }
</style>
<link rel="stylesheet" href="../stylesheets/Github.css" type="text/css" />
<title>OpenCV基础篇之图像频域</title>
</head>
<body>
<div id="header"><center>
<p class="header_titleline">
<a href="../index.html" target="_self" title="主页">主页 </a><a href="../Search.html" target="_self" title="站内搜索">站内搜索 </a><a href="../Projects.html" target="_self" title="项目研究">项目研究 </a><a href="../Archives.html" target="_self" title="文章存档">文章存档 </a><a href="../README.html" target="_self" title="分类目录">分类目录 </a><a href="../AboutMe.html" target="_self" title="关于我">关于我 </a>
</p>
</center></div>
<h1>OpenCV基础篇之图像频域</h1>
<h4>2014-09-16 / xiahouzuoxin</h4>
<h4>Tags: OpenCV</h4>
转载请注明出处: <a href="http://xiahouzuoxin.github.io/notes/">http://xiahouzuoxin.github.io/notes/</a>
<div id="TOC">
<ul>
<li><a href="#程序及分析">程序及分析</a></li>
<li><a href="#效果">效果</a></li>
</ul>
</div>
<!---title:OpenCV基础篇之图像频域-->
<!---keywords:OpenCV-->
<!---date:2014-09-16-->
<h2 id="程序及分析">程序及分析</h2>
<pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*</span>
<span class="co"> * FileName : fft2.cpp</span>
<span class="co"> * Author : xiahouzuoxin @163.com</span>
<span class="co"> * Version : v1.0</span>
<span class="co"> * Date : Wed 30 Jul 2014 09:42:12 PM CST</span>
<span class="co"> * Brief : </span>
<span class="co"> * </span>
<span class="co"> * Copyright (C) MICL,USTB</span>
<span class="co"> */</span>
<span class="ot">#include <iostream></span>
<span class="ot">#include <cv.h></span>
<span class="ot">#include <highgui.h></span>
<span class="ot">#include "imgproc/imgproc.hpp"</span>
using namespace std;
using namespace cv;
<span class="dt">int</span> main(<span class="dt">int</span> argc, <span class="dt">char</span> *argv[])
{
<span class="kw">if</span> (argc < <span class="dv">2</span>) {
cout<<<span class="st">"Usage:./fft2 [image name]"</span><<endl;
<span class="kw">return</span> -<span class="dv">1</span>;
}
<span class="co">// Read as grayscale image</span>
Mat image = imread(argv[<span class="dv">1</span>], CV_LOAD_IMAGE_GRAYSCALE);
<span class="kw">if</span> (!image.data) {
cout << <span class="st">"Read image error"</span><<endl;
<span class="kw">return</span> -<span class="dv">1</span>;
}
Mat padded;
<span class="dt">int</span> m = getOptimalDFTSize(image.rows); <span class="co">// Return size of 2^x that suite for FFT</span>
<span class="dt">int</span> n = getOptimalDFTSize(image.cols);
<span class="co">// Padding 0, result is @padded</span>
copyMakeBorder(image, padded, <span class="dv">0</span>, m-image.rows, <span class="dv">0</span>, n-image.cols, BORDER_CONSTANT, Scalar::all(<span class="dv">0</span>));
<span class="co">// Create planes to storage REAL part and IMAGE part, IMAGE part init are 0</span>
Mat planes[] = {Mat_<<span class="dt">float</span>>(padded), Mat::zeros(padded.size(), CV_32F) };
Mat complexI;
merge(planes, <span class="dv">2</span>, complexI);
dft(complexI, complexI);
<span class="co">// compute the magnitude and switch to logarithmic scale</span>
split(complexI, planes);
magnitude(planes[<span class="dv">0</span>], planes[<span class="dv">0</span>], planes[<span class="dv">1</span>]);
Mat magI = planes[<span class="dv">0</span>];
<span class="co">// => log(1+sqrt(Re(DFT(I))^2+Im(DFT(I))^2))</span>
magI += Scalar::all(<span class="dv">1</span>);
log(magI, magI);
<span class="co">// crop the spectrum</span>
magI = magI(Rect(<span class="dv">0</span>, <span class="dv">0</span>, magI.cols & (-<span class="dv">2</span>), magI.rows & (-<span class="dv">2</span>)));
Mat _magI = magI.clone();
normalize(_magI, _magI, <span class="dv">0</span>, <span class="dv">1</span>, CV_MINMAX);
<span class="co">// rearrange the quadrants of Fourier image so that the origin is at the image center</span>
<span class="dt">int</span> cx = magI.cols/<span class="dv">2</span>;
<span class="dt">int</span> cy = magI.rows/<span class="dv">2</span>;
Mat q0(magI, Rect(<span class="dv">0</span>,<span class="dv">0</span>,cx,cy)); <span class="co">// Top-Left</span>
Mat q1(magI, Rect(cx,<span class="dv">0</span>,cx,cy)); <span class="co">// Top-Right</span>
Mat q2(magI, Rect(<span class="dv">0</span>,cy,cx,cy)); <span class="co">// Bottom-Left</span>
Mat q3(magI, Rect(cx,cy,cx,cy)); <span class="co">// Bottom-Right</span>
<span class="co">// exchange Top-Left and Bottom-Right</span>
Mat tmp;
q0.copyTo(tmp);
q3.copyTo(q0);
tmp.copyTo(q3);
<span class="co">// exchange Top-Right and Bottom-Left</span>
q1.copyTo(tmp);
q2.copyTo(q1);
tmp.copyTo(q2);
normalize(magI, magI, <span class="dv">0</span>, <span class="dv">1</span>, CV_MINMAX);
imshow(<span class="st">"Input image"</span>, image);
imshow(<span class="st">"Spectrum magnitude before shift frequency"</span>, _magI);
imshow(<span class="st">"Spectrum magnitude after shift frequency"</span>, magI);
waitKey();
<span class="kw">return</span> <span class="dv">0</span>;
}</code></pre>
<p>本程序的作用是:将图像从空间域转换到频率域,并绘制频域图像。</p>
<ol style="list-style-type: decimal">
<li><p>二维图像的DFT(离散傅里叶变换),</p>
<p><img src="https://latex.codecogs.com/png.latex? \Large F(k,l)={\sum_{i=0}^{i=N-1}}{\sum_{j=0}^{j=N-1}}f(i,j)e^{-j2\pi{(\frac{ki}{N}+\frac{lj}{N}})}"></p>
<p>图像的频域表示的是什么含义呢?又有什么用途呢?图像的频率是表征图像中灰度变化剧烈程度的指标,是灰度在平面空间上的梯度。图像的边缘部分是突变部分,变化较快,因此反应在频域上是高频分量;图像的噪声大部分情况下是高频部分;图像大部分平缓的灰度变化部分则为低频分量。也就是说,傅立叶变换提供另外一个角度来观察图像,可以将图像从灰度分布转化到频率分布上来观察图像的特征。</p>
<p>频域在图像处理中,就我所知的用途主要在两方面:图像压缩和图像去噪。关于这两点将在下面给出图片DFT的变换结果后说明。</p>
<p>有关DFT的更多性质请参考胡广书教授的《数字信号处理》教材。</p></li>
<li><p>请注意读图片的函数与之前有所不同:</p>
<pre class="sourceCode c"><code class="sourceCode c">Mat image = imread(argv[<span class="dv">1</span>], CV_LOAD_IMAGE_GRAYSCALE);</code></pre>
<p><code>CV_LOAD_IMAGE_GRAYSCALE</code>参数表示将原图像转换为灰度图后读入,这是因为后面的DFT变换都是基于二维信号的,而彩色图像是三维信号。当然,也可以对RGB每一通道都进行DFT运算。</p></li>
<li><p>DFT算法的原理要求输入信号的长度最好为2^n,这样可以使用快速傅里叶变换算法(FFT算法)进行加速。所以程序中使用</p>
<pre class="sourceCode c"><code class="sourceCode c">copyMakeBorder(image, padded, <span class="dv">0</span>, m-image.rows, <span class="dv">0</span>, n-image.cols, BORDER_CONSTANT, Scalar::all(<span class="dv">0</span>));</code></pre>
<p>填充0使横纵长度都为2^n。</p>
<p>对于一维信号,原DFT直接运算的复杂度是O(N^2),而快速傅里叶变换的复杂度降低到O(Nlog2(N)),假设N为512,足足提高了512/9≈57倍。</p></li>
<li><p>由DFT的性质知,输入为实信号(图像)的时候,频域输出为复数,因此将频域信息分为幅值和相位。频域的幅值高的代表高频分量,幅值低的地方代表低频分量,因此程序中使用</p>
<pre class="sourceCode c"><code class="sourceCode c"><span class="co">// => log(1+sqrt(Re(DFT(I))^2+Im(DFT(I))^2))</span>
magI += Scalar::all(<span class="dv">1</span>);
log(magI, magI);
<span class="co">// crop the spectrum</span>
magI = magI(Rect(<span class="dv">0</span>, <span class="dv">0</span>, magI.cols & (-<span class="dv">2</span>), magI.rows & (-<span class="dv">2</span>)));
Mat _magI = magI.clone();
normalize(_magI, _magI, <span class="dv">0</span>, <span class="dv">1</span>, CV_MINMAX);</code></pre>
<p>进行log幅值计算及归一化幅值(归一化目的主要是方便将频域通过图像的形式进行显示)。</p></li>
<li><p>关于频域中心平移:将图像的高频分量平移到图像的中心,便于观测。</p>
<pre><code>int cx = magI.cols/2;
int cy = magI.rows/2;
Mat q0(magI, Rect(0,0,cx,cy)); // Top-Left
Mat q1(magI, Rect(cx,0,cx,cy)); // Top-Right
Mat q2(magI, Rect(0,cy,cx,cy)); // Bottom-Left
Mat q3(magI, Rect(cx,cy,cx,cy)); // Bottom-Right
// exchange Top-Left and Bottom-Right
Mat tmp;
q0.copyTo(tmp);
q3.copyTo(q0);
tmp.copyTo(q3);
// exchange Top-Right and Bottom-Left
q1.copyTo(tmp);
q2.copyTo(q1);
tmp.copyTo(q2);</code></pre>
<p>其原理就是将左上角的频域和右下角的互换,右上角和左下角互换。</p>
<p>请注意:频域点和空域点的坐标没有一一对应的关系,两者的关系只是上面的DFT公式所见到的。</p></li>
<li><p>本程序因为使用到图像处理相关的函数,所以包含了头文件<code>imgproc/imgproc.hpp</code>,该文件位于opencv安装目录的include/opencv2/目录下,在编写Makefile时也要增加相关的头文件路径和库,本程序使用的Makefile如下:</p>
<pre class="sourceCode c"><code class="sourceCode c">TARG=fft2
SRC=fft2.cpp
LIB=-L/usr/local/lib/
INC=-I/usr/local/include/opencv/ -I/usr/local/include/opencv2
CFLAGS=
$(TARG):$(SRC)
g++ -g -o $@ ${CFLAGS} $(LIB) $(INC) \
-lopencv_core -lopencv_highgui -lopencv_imgproc \
$^
.PHONY:clean
clean:
-rm $(TARG) tags -f </code></pre>
<p>其中Makefile中的(反斜杠后不能再有任何字符,包括空格),如上库增加了<code>-lopencv_imgproc</code>,头文件路径增加了<code>-I/usr/local/include/opencv2</code>。</p></li>
</ol>
<h2 id="效果">效果</h2>
<div class="figure">
<img src="../images/OpenCV基础篇之图像频域/dft.png" alt="dft" /><p class="caption">dft</p>
</div>
<ol style="list-style-type: decimal">
<li><p>上图从左到右分别是:原始灰度图(我大爱的杨过啊)、频域平移前的频域图像、频域中心平移后的频域图像。</p></li>
<li><p>提到图像频域变换的用途:压缩和去噪。压缩的原理就是在频域中,大部分频域的值为0(或接近0,可以进行有损压缩,如jpeg图像),只要压缩频域中的少数非0值即可达到图片压缩的目的。去噪则是通过频域的滤波实现,因为噪声大部分情况下体现为高频信号,使用低通滤波器即可滤除高频噪声(当然,也会带来损失,那就是边缘会变得模糊(之前说过,边缘也是高频信号))。</p></li>
</ol>
<div class="ds-thread" data-thread-key="OpenCV基础篇之图像频域" data-title="OpenCV基础篇之图像频域" data-url="xiahouzuoxin.github.io/notes/html/OpenCV基础篇之图像频域.html"></div>
<script>window._bd_share_config={"common":{"bdSnsKey":{},"bdText":"","bdMini":"2","bdMiniList":false,"bdPic":"","bdStyle":"0","bdSize":"16"},"slide":{"type":"slide","bdImg":"5","bdPos":"right","bdTop":"300"},"image":{"viewList":["qzone","tsina","tqq","renren","weixin"],"viewText":"分享到:","viewSize":"16"},"selectShare":{"bdContainerClass":null,"bdSelectMiniList":["qzone","tsina","tqq","renren","weixin"]}};with(document)0[(getElementsByTagName('head')[0]||body).appendChild(createElement('script')).src='http://bdimg.share.baidu.com/static/api/js/share.js?v=89860593.js?cdnversion='+~(-new Date()/36e5)];</script>
<!-- 多说公共JS代码 start (一个网页只需插入一次) -->
<script type="text/javascript">
var duoshuoQuery = {short_name:"xiahouzuoxin"};
(function() {
var ds = document.createElement('script');
ds.type = 'text/javascript';ds.async = true;
ds.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') + '//static.duoshuo.com/embed.js';
ds.charset = 'UTF-8';
(document.getElementsByTagName('head')[0]
|| document.getElementsByTagName('body')[0]).appendChild(ds);
})();
</script>
<!-- 多说公共JS代码 end -->
<div id="footer">
<p class="footer_subline">联系邮箱: xiahouzuoxin@163.com</p>
<p class="footer_subline">声明: 本站所有文章如非特别说明均为原创,转载请注明出处!
<script type="text/javascript">var cnzz_protocol = (("https:" == document.location.protocol) ? " https://" : " http://");document.write(unescape("%3Cspan id='cnzz_stat_icon_1253219218'%3E%3C/span%3E%3Cscript src='" + cnzz_protocol + "s4.cnzz.com/z_stat.php%3Fid%3D1253219218%26show%3Dpic' type='text/javascript'%3E%3C/script%3E"));</script>
</p>
</div>
<!-- 回到顶部 -->
<script>
lastScrollY=0;
function heartBeat(){
var diffY;
if (document.documentElement && document.documentElement.scrollTop)
diffY = document.documentElement.scrollTop;
else if (document.body)
diffY = document.body.scrollTop
else
{/*Netscape stuff*/}
percent=.1*(diffY-lastScrollY);
if(percent>0)percent=Math.ceil(percent);
else percent=Math.floor(percent);
document.getElementById("full").style.top=parseInt(document.getElementById("full").style.top)+percent+"px";
lastScrollY=lastScrollY+percent;
}
suspendcode="<div id=\"full\" style='right:1px;POSITION:absolute;TOP:600px;z-index:100'><a onclick='window.scrollTo(0,0);'><img src='../images/top.png'></a><br></div>"
document.write(suspendcode);
window.setInterval("heartBeat()",1);
</script>
</body>
</html>