@@ -43,12 +43,6 @@ GaussianBlurHorizontalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t sr
43
43
const uint32_t startY, const uint32_t endY,
44
44
const uint32_t width, const uint32_t /* height */ ,
45
45
const std::vector<float > &mKernel ) {
46
- float kernel[mKernel .size ()];
47
-
48
- for (size_t i = 0 ; i < mKernel .size (); ++i) {
49
- kernel[i] = mKernel [i];
50
- }
51
-
52
46
const int kernelSize = static_cast <int >(mKernel .size ());
53
47
const int halfOfKernel = kernelSize / 2 ;
54
48
const bool isEven = kernelSize % 2 == 0 ;
@@ -82,21 +76,21 @@ GaussianBlurHorizontalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t sr
82
76
auto i3 = ConvertTo (df, PromoteUpperTo (d32, PromoteTo (d16, LowerHalf (vx))));
83
77
auto i4 = ConvertTo (df, PromoteUpperTo (d32, PromoteTo (d16, UpperHalf (dh8, vx))));
84
78
85
- float weight1 = kernel [halfOfKernel + r];
79
+ float weight1 = mKernel [halfOfKernel + r];
86
80
acc = Add (acc, Mul (i1, Set (df, weight1)));
87
81
88
- float weight2 = kernel [halfOfKernel + r + 1 ];
82
+ float weight2 = mKernel [halfOfKernel + r + 1 ];
89
83
acc = Add (acc, Mul (i2, Set (df, weight2)));
90
84
91
- float weight3 = kernel [halfOfKernel + r + 2 ];
85
+ float weight3 = mKernel [halfOfKernel + r + 2 ];
92
86
acc = Add (acc, Mul (i3, Set (df, weight3)));
93
87
94
- float weight4 = kernel [halfOfKernel + r + 3 ];
88
+ float weight4 = mKernel [halfOfKernel + r + 3 ];
95
89
acc = Add (acc, Mul (i4, Set (df, weight4)));
96
90
}
97
91
98
92
for (; r <= maxKernel; ++r) {
99
- float weight = kernel [halfOfKernel + r];
93
+ float weight = mKernel [halfOfKernel + r];
100
94
int sourcePX = std::clamp (kx + r, sZero , maxWidth) * 4 ;
101
95
auto vx = ConvertTo (df, PromoteTo (d32, LoadU (d8x4, &localSource[sourcePX])));
102
96
acc = Add (acc, Mul (vx, Set (df, weight)));
@@ -168,12 +162,6 @@ GaussianBlurHorizontalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t sr
168
162
const uint32_t startY, const uint32_t endY,
169
163
const uint32_t width, const uint32_t /* height */ ,
170
164
const std::vector<float > &mKernel ) {
171
- float kernel[mKernel .size ()];
172
-
173
- for (size_t i = 0 ; i < mKernel .size (); ++i) {
174
- kernel[i] = mKernel [i];
175
- }
176
-
177
165
const int kernelSize = static_cast <int >(mKernel .size ());
178
166
const int halfOfKernel = kernelSize / 2 ;
179
167
const bool isEven = kernelSize % 2 == 0 ;
@@ -193,7 +181,7 @@ GaussianBlurHorizontalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t sr
193
181
auto localSource = reinterpret_cast <const T *>(reinterpret_cast <const uint8_t *>(mSource ) + y * srcStride);
194
182
auto kx = static_cast <int >(x);
195
183
for (; r <= maxKernel; ++r) {
196
- float weight = kernel [halfOfKernel + r];
184
+ float weight = mKernel [halfOfKernel + r];
197
185
int sourcePX = std::clamp (kx + r, sZero , maxWidth) * 3 ;
198
186
accumulator1 += localSource[sourcePX] * weight;
199
187
accumulator2 += localSource[sourcePX + 1 ] * weight;
@@ -216,12 +204,6 @@ GaussianBlurHorizontalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t sr
216
204
const uint32_t width, const uint32_t /* height */ ,
217
205
const std::vector<float > &mKernel ) {
218
206
219
- float kernel[mKernel .size ()];
220
-
221
- for (size_t i = 0 ; i < mKernel .size (); ++i) {
222
- kernel[i] = mKernel [i];
223
- }
224
-
225
207
const int kernelSize = static_cast <int >(mKernel .size ());
226
208
const int halfOfKernel = kernelSize / 2 ;
227
209
const bool isEven = kernelSize % 2 == 0 ;
@@ -239,7 +221,7 @@ GaussianBlurHorizontalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t sr
239
221
auto localSource = reinterpret_cast <const T *>(reinterpret_cast <const uint8_t *>(mSource ) + y * srcStride);
240
222
auto kx = static_cast <int >(x);
241
223
for (; r <= maxKernel; ++r) {
242
- accumulator += localSource[std::clamp (kx + r, sZero , maxWidth)] * kernel [halfOfKernel + r];
224
+ accumulator += localSource[std::clamp (kx + r, sZero , maxWidth)] * mKernel [halfOfKernel + r];
243
225
}
244
226
dst[0 ] = static_cast <T>(::roundf (accumulator));
245
227
dst += 1 ;
@@ -255,13 +237,6 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
255
237
const uint32_t startY, const uint32_t endY,
256
238
const uint32_t width, const uint32_t height,
257
239
const std::vector<float > &mKernel ) {
258
-
259
- float kernel[mKernel .size ()];
260
-
261
- for (size_t i = 0 ; i < mKernel .size (); ++i) {
262
- kernel[i] = mKernel [i];
263
- }
264
-
265
240
const int kernelSize = static_cast <int >(mKernel .size ());
266
241
const int halfOfKernel = kernelSize / 2 ;
267
242
const bool isEven = kernelSize % 2 == 0 ;
@@ -281,7 +256,7 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
281
256
static_cast <int64_t >(0 ),
282
257
static_cast <int64_t >(maxHeight));
283
258
auto localSource = reinterpret_cast <const T *>(reinterpret_cast <const uint8_t *>(mSource ) + shiftX * srcStride);
284
- accumulator += localSource[kx] * kernel [halfOfKernel + r];
259
+ accumulator += localSource[kx] * mKernel [halfOfKernel + r];
285
260
}
286
261
dst[0 ] = static_cast <T>(::roundf (accumulator));
287
262
dst += 1 ;
@@ -297,13 +272,6 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
297
272
const uint32_t startY, const uint32_t endY,
298
273
const uint32_t width, const uint32_t height,
299
274
const std::vector<float > &mKernel ) {
300
-
301
- float kernel[mKernel .size ()];
302
-
303
- for (size_t i = 0 ; i < mKernel .size (); ++i) {
304
- kernel[i] = mKernel [i];
305
- }
306
-
307
275
const int kernelSize = static_cast <int >(mKernel .size ());
308
276
const int halfOfKernel = kernelSize / 2 ;
309
277
const bool isEven = kernelSize % 2 == 0 ;
@@ -325,7 +293,7 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
325
293
static_cast <int64_t >(0 ),
326
294
static_cast <int64_t >(maxHeight));
327
295
auto localSource = reinterpret_cast <const T *>(reinterpret_cast <const uint8_t *>(mSource ) + shiftX * srcStride);
328
- float weight = kernel [halfOfKernel + r];
296
+ float weight = mKernel [halfOfKernel + r];
329
297
accumulator += localSource[kx] * weight;
330
298
accumulator1 += localSource[kx + 1 ] * weight;
331
299
accumulator2 += localSource[kx + 2 ] * weight;
@@ -347,13 +315,6 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
347
315
const uint32_t startY, const uint32_t endY,
348
316
const uint32_t width, const uint32_t height,
349
317
const std::vector<float > &mKernel ) {
350
-
351
- float kernel[mKernel .size ()];
352
-
353
- for (size_t i = 0 ; i < mKernel .size (); ++i) {
354
- kernel[i] = mKernel [i];
355
- }
356
-
357
318
const int kernelSize = static_cast <int >(mKernel .size ());
358
319
const int halfOfKernel = kernelSize / 2 ;
359
320
const bool isEven = kernelSize % 2 == 0 ;
@@ -376,7 +337,7 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
376
337
static_cast <int64_t >(0 ),
377
338
static_cast <int64_t >(maxHeight));
378
339
auto localSource = reinterpret_cast <const T *>(reinterpret_cast <const uint8_t *>(mSource ) + shiftX * srcStride);
379
- float weight = kernel [halfOfKernel + r];
340
+ float weight = mKernel [halfOfKernel + r];
380
341
accumulator += localSource[kx] * weight;
381
342
accumulator1 += localSource[kx + 1 ] * weight;
382
343
accumulator2 += localSource[kx + 2 ] * weight;
@@ -400,12 +361,6 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
400
361
const uint32_t startY, const uint32_t endY,
401
362
const uint32_t width, const uint32_t height,
402
363
const std::vector<float > &mKernel ) {
403
- float kernel[mKernel .size ()];
404
-
405
- for (size_t i = 0 ; i < mKernel .size (); ++i) {
406
- kernel[i] = mKernel [i];
407
- }
408
-
409
364
const int kernelSize = static_cast <int >(mKernel .size ());
410
365
const int halfOfKernel = kernelSize / 2 ;
411
366
const bool isEven = kernelSize % 2 == 0 ;
@@ -430,7 +385,7 @@ GaussianBlurVerticalPass(const T *SPARKYUV_RESTRICT mSource, const uint32_t srcS
430
385
static_cast <int64_t >(0 ),
431
386
static_cast <int64_t >(maxHeight));
432
387
auto localSource = reinterpret_cast <const T *>(reinterpret_cast <const uint8_t *>(mSource ) + shiftX * srcStride);
433
- float weight = kernel [halfOfKernel + r];
388
+ float weight = mKernel [halfOfKernel + r];
434
389
uint32_t sourcePX = x * 4 ;
435
390
auto vx = ConvertTo (df, PromoteTo (d32, LoadU (d8x4, &localSource[sourcePX])));
436
391
acc = Add (acc, Mul (vx, Set (df, weight)));
0 commit comments