Skip to content
This repository was archived by the owner on Feb 10, 2025. It is now read-only.

Commit 5d36683

Browse files
committed
Added gaussian blur, some refactor, fast gaussian, fix MSVC M_PI build
1 parent 138a4f7 commit 5d36683

File tree

1 file changed

+11
-56
lines changed

1 file changed

+11
-56
lines changed

src/GaussianBlur-inl.h

Lines changed: 11 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,6 @@ GaussianBlurHorizontalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t sr
4343
const uint32_t startY, const uint32_t endY,
4444
const uint32_t width, const uint32_t /* height */,
4545
const std::vector<float> &mKernel) {
46-
float kernel[mKernel.size()];
47-
48-
for (size_t i = 0; i < mKernel.size(); ++i) {
49-
kernel[i] = mKernel[i];
50-
}
51-
5246
const int kernelSize = static_cast<int>(mKernel.size());
5347
const int halfOfKernel = kernelSize / 2;
5448
const bool isEven = kernelSize % 2 == 0;
@@ -82,21 +76,21 @@ GaussianBlurHorizontalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t sr
8276
auto i3 = ConvertTo(df, PromoteUpperTo(d32, PromoteTo(d16, LowerHalf(vx))));
8377
auto i4 = ConvertTo(df, PromoteUpperTo(d32, PromoteTo(d16, UpperHalf(dh8, vx))));
8478

85-
float weight1 = kernel[halfOfKernel + r];
79+
float weight1 = mKernel[halfOfKernel + r];
8680
acc = Add(acc, Mul(i1, Set(df, weight1)));
8781

88-
float weight2 = kernel[halfOfKernel + r + 1];
82+
float weight2 = mKernel[halfOfKernel + r + 1];
8983
acc = Add(acc, Mul(i2, Set(df, weight2)));
9084

91-
float weight3 = kernel[halfOfKernel + r + 2];
85+
float weight3 = mKernel[halfOfKernel + r + 2];
9286
acc = Add(acc, Mul(i3, Set(df, weight3)));
9387

94-
float weight4 = kernel[halfOfKernel + r + 3];
88+
float weight4 = mKernel[halfOfKernel + r + 3];
9589
acc = Add(acc, Mul(i4, Set(df, weight4)));
9690
}
9791

9892
for (; r <= maxKernel; ++r) {
99-
float weight = kernel[halfOfKernel + r];
93+
float weight = mKernel[halfOfKernel + r];
10094
int sourcePX = std::clamp(kx + r, sZero, maxWidth) * 4;
10195
auto vx = ConvertTo(df, PromoteTo(d32, LoadU(d8x4, &localSource[sourcePX])));
10296
acc = Add(acc, Mul(vx, Set(df, weight)));
@@ -168,12 +162,6 @@ GaussianBlurHorizontalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t sr
168162
const uint32_t startY, const uint32_t endY,
169163
const uint32_t width, const uint32_t /* height */,
170164
const std::vector<float> &mKernel) {
171-
float kernel[mKernel.size()];
172-
173-
for (size_t i = 0; i < mKernel.size(); ++i) {
174-
kernel[i] = mKernel[i];
175-
}
176-
177165
const int kernelSize = static_cast<int>(mKernel.size());
178166
const int halfOfKernel = kernelSize / 2;
179167
const bool isEven = kernelSize % 2 == 0;
@@ -193,7 +181,7 @@ GaussianBlurHorizontalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t sr
193181
auto localSource = reinterpret_cast<const T *>(reinterpret_cast<const uint8_t *>(mSource) + y * srcStride);
194182
auto kx = static_cast<int>(x);
195183
for (; r <= maxKernel; ++r) {
196-
float weight = kernel[halfOfKernel + r];
184+
float weight = mKernel[halfOfKernel + r];
197185
int sourcePX = std::clamp(kx + r, sZero, maxWidth) * 3;
198186
accumulator1 += localSource[sourcePX] * weight;
199187
accumulator2 += localSource[sourcePX + 1] * weight;
@@ -216,12 +204,6 @@ GaussianBlurHorizontalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t sr
216204
const uint32_t width, const uint32_t /* height */,
217205
const std::vector<float> &mKernel) {
218206

219-
float kernel[mKernel.size()];
220-
221-
for (size_t i = 0; i < mKernel.size(); ++i) {
222-
kernel[i] = mKernel[i];
223-
}
224-
225207
const int kernelSize = static_cast<int>(mKernel.size());
226208
const int halfOfKernel = kernelSize / 2;
227209
const bool isEven = kernelSize % 2 == 0;
@@ -239,7 +221,7 @@ GaussianBlurHorizontalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t sr
239221
auto localSource = reinterpret_cast<const T *>(reinterpret_cast<const uint8_t *>(mSource) + y * srcStride);
240222
auto kx = static_cast<int>(x);
241223
for (; r <= maxKernel; ++r) {
242-
accumulator += localSource[std::clamp(kx + r, sZero, maxWidth)] * kernel[halfOfKernel + r];
224+
accumulator += localSource[std::clamp(kx + r, sZero, maxWidth)] * mKernel[halfOfKernel + r];
243225
}
244226
dst[0] = static_cast<T>(::roundf(accumulator));
245227
dst += 1;
@@ -255,13 +237,6 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
255237
const uint32_t startY, const uint32_t endY,
256238
const uint32_t width, const uint32_t height,
257239
const std::vector<float> &mKernel) {
258-
259-
float kernel[mKernel.size()];
260-
261-
for (size_t i = 0; i < mKernel.size(); ++i) {
262-
kernel[i] = mKernel[i];
263-
}
264-
265240
const int kernelSize = static_cast<int>(mKernel.size());
266241
const int halfOfKernel = kernelSize / 2;
267242
const bool isEven = kernelSize % 2 == 0;
@@ -281,7 +256,7 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
281256
static_cast<int64_t>(0),
282257
static_cast<int64_t>(maxHeight));
283258
auto localSource = reinterpret_cast<const T *>(reinterpret_cast<const uint8_t *>(mSource) + shiftX * srcStride);
284-
accumulator += localSource[kx] * kernel[halfOfKernel + r];
259+
accumulator += localSource[kx] * mKernel[halfOfKernel + r];
285260
}
286261
dst[0] = static_cast<T>(::roundf(accumulator));
287262
dst += 1;
@@ -297,13 +272,6 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
297272
const uint32_t startY, const uint32_t endY,
298273
const uint32_t width, const uint32_t height,
299274
const std::vector<float> &mKernel) {
300-
301-
float kernel[mKernel.size()];
302-
303-
for (size_t i = 0; i < mKernel.size(); ++i) {
304-
kernel[i] = mKernel[i];
305-
}
306-
307275
const int kernelSize = static_cast<int>(mKernel.size());
308276
const int halfOfKernel = kernelSize / 2;
309277
const bool isEven = kernelSize % 2 == 0;
@@ -325,7 +293,7 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
325293
static_cast<int64_t>(0),
326294
static_cast<int64_t>(maxHeight));
327295
auto localSource = reinterpret_cast<const T *>(reinterpret_cast<const uint8_t *>(mSource) + shiftX * srcStride);
328-
float weight = kernel[halfOfKernel + r];
296+
float weight = mKernel[halfOfKernel + r];
329297
accumulator += localSource[kx] * weight;
330298
accumulator1 += localSource[kx + 1] * weight;
331299
accumulator2 += localSource[kx + 2] * weight;
@@ -347,13 +315,6 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
347315
const uint32_t startY, const uint32_t endY,
348316
const uint32_t width, const uint32_t height,
349317
const std::vector<float> &mKernel) {
350-
351-
float kernel[mKernel.size()];
352-
353-
for (size_t i = 0; i < mKernel.size(); ++i) {
354-
kernel[i] = mKernel[i];
355-
}
356-
357318
const int kernelSize = static_cast<int>(mKernel.size());
358319
const int halfOfKernel = kernelSize / 2;
359320
const bool isEven = kernelSize % 2 == 0;
@@ -376,7 +337,7 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
376337
static_cast<int64_t>(0),
377338
static_cast<int64_t>(maxHeight));
378339
auto localSource = reinterpret_cast<const T *>(reinterpret_cast<const uint8_t *>(mSource) + shiftX * srcStride);
379-
float weight = kernel[halfOfKernel + r];
340+
float weight = mKernel[halfOfKernel + r];
380341
accumulator += localSource[kx] * weight;
381342
accumulator1 += localSource[kx + 1] * weight;
382343
accumulator2 += localSource[kx + 2] * weight;
@@ -400,12 +361,6 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
400361
const uint32_t startY, const uint32_t endY,
401362
const uint32_t width, const uint32_t height,
402363
const std::vector<float> &mKernel) {
403-
float kernel[mKernel.size()];
404-
405-
for (size_t i = 0; i < mKernel.size(); ++i) {
406-
kernel[i] = mKernel[i];
407-
}
408-
409364
const int kernelSize = static_cast<int>(mKernel.size());
410365
const int halfOfKernel = kernelSize / 2;
411366
const bool isEven = kernelSize % 2 == 0;
@@ -430,7 +385,7 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
430385
static_cast<int64_t>(0),
431386
static_cast<int64_t>(maxHeight));
432387
auto localSource = reinterpret_cast<const T *>(reinterpret_cast<const uint8_t *>(mSource) + shiftX * srcStride);
433-
float weight = kernel[halfOfKernel + r];
388+
float weight = mKernel[halfOfKernel + r];
434389
uint32_t sourcePX = x * 4;
435390
auto vx = ConvertTo(df, PromoteTo(d32, LoadU(d8x4, &localSource[sourcePX])));
436391
acc = Add(acc, Mul(vx, Set(df, weight)));

0 commit comments

Comments
 (0)