From 2ebd9d9ec72cfe9d0f698fd4528a513b19e45306 Mon Sep 17 00:00:00 2001 From: Gin <> Date: Sun, 3 Dec 2023 12:59:51 -0800 Subject: [PATCH] NEON binary image filter --- .../Kernels_BinaryImage_BasicFilters.h | 4 +- ...mage_BasicFilters_Core_64x8_arm64_NEON.cpp | 15 +- ...nels_BinaryImage_BasicFilters_arm64_NEON.h | 332 +++++++++--------- .../Kernels_PackedBinaryMatrixCore.h | 3 +- .../Kernels_PartialWordAccess_arm64_NEON.h | 12 +- SerialPrograms/Source/Tests/Kernels_Tests.cpp | 279 ++++++++++++--- SerialPrograms/Source/Tests/Kernels_Tests.h | 4 + SerialPrograms/Source/Tests/TestMap.cpp | 2 + 8 files changed, 422 insertions(+), 229 deletions(-) diff --git a/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters.h b/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters.h index 731068b48..69fe01004 100644 --- a/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters.h +++ b/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters.h @@ -81,11 +81,11 @@ void compress_rgb32_to_binary_range( // Compress (image, bytes_per_row) into a binary_image. -// For each pixel, set to 1 if distance is within the expected value. +// For each pixel, set to 1 if the Euclidean distance of the pixel color to the expected color <= max distance. void compress_rgb32_to_binary_euclidean( const uint32_t* image, size_t bytes_per_row, PackedBinaryMatrix_IB& matrix, - uint32_t expected, double max_euclidean_distance + uint32_t expected_color, double max_euclidean_distance ); diff --git a/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_Core_64x8_arm64_NEON.cpp b/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_Core_64x8_arm64_NEON.cpp index 3554902a8..716259ebe 100644 --- a/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_Core_64x8_arm64_NEON.cpp +++ b/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_Core_64x8_arm64_NEON.cpp @@ -17,9 +17,9 @@ namespace Kernels{ void filter_by_mask_64x8_arm64_NEON( const PackedBinaryMatrix_IB& matrix, uint32_t* image, size_t bytes_per_row, - uint32_t replace_with, bool replace_if_zero + uint32_t replacement_color, bool replace_zero_bits ){ - FilterByMask_arm64_NEON filter(replace_with, replace_if_zero); + FilterByMask_arm64_NEON filter(replacement_color, replace_zero_bits); filter_by_mask(static_cast(matrix).get(), image, bytes_per_row, filter); } @@ -49,12 +49,11 @@ void compress_rgb32_to_binary_euclidean_64x8_arm64_NEON( PackedBinaryMatrix_IB& matrix, uint32_t expected, double max_euclidean_distance ){ - // TODO: - // Compressor_RgbEuclidean_arm64_NEON compressor(expected, max_euclidean_distance); - // compress_rgb32_to_binary( - // image, bytes_per_row, - // static_cast(matrix).get(), compressor - // ); + Compressor_RgbEuclidean_arm64_NEON compressor(expected, max_euclidean_distance); + compress_rgb32_to_binary( + image, bytes_per_row, + static_cast(matrix).get(), compressor + ); } diff --git a/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_arm64_NEON.h b/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_arm64_NEON.h index 9dc8418a1..a01a4de2f 100644 --- a/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_arm64_NEON.h +++ b/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_arm64_NEON.h @@ -9,215 +9,144 @@ #include "Kernels/PartialWordAccess/Kernels_PartialWordAccess_arm64_NEON.h" +#include +using std::cout; +using std::endl; + namespace PokemonAutomation{ namespace Kernels{ // Change color of an array of pixels based on values from a bitmap that corresponds to the pixel array. -// If `replace_if_zero` is true, change color of pixels that correspond to 0 bits. +// If `replace_zero_bits` is true, change color of pixels that correspond to 0 bits. // Otherwise, chagne color of pixels that correspond to 1 bits. class FilterByMask_arm64_NEON{ public: - FilterByMask_arm64_NEON(uint32_t replacement, bool replace_if_zero) - : m_replacement_pixel(replacement) - , m_replacement(vdupq_n_u32(replacement)) - , m_replace_if_zero_bool(replace_if_zero) - , m_replace_if_zero(vreinterpretq_u32_u8(vdupq_n_u8(replace_if_zero ? 0xff: 0))) + FilterByMask_arm64_NEON(uint32_t replacement, bool replace_zero_bits) + : m_replacement_u32(vdupq_n_u32(replacement)) + , m_replace_if_zero(replace_zero_bits) , m_zeros(vreinterpretq_u32_u8(vdupq_n_u8(0))) - , m_lasts(vdupq_n_u32(1)) {} // Given 64 bits stored in `uint64_t`, use it to set colors to 64 pixels in `pixels`. - // If filter constructor parameter `replace_if_zero` is true, the pixels corresponding to + // If filter constructor parameter `replace_zero_bits` is true, the pixels corresponding to // 0-bits are set to color `replacement` (another filter constructor parameter). // Otherwise, pixels corresponding to 1-bits are set to the color. PA_FORCE_INLINE void filter64(uint64_t bits, uint32_t* pixels) const{ - for(int i = 0; i < 64; i+=8){ - filter8((bits >> i) & 0xFF, pixels + i); + for(int i = 0; i < 64; i+=16){ + filter16((bits >> i) & 0xFFFF, pixels + i); } } - // partial version of filter64(bits, pixels) + // partial version of filter64(bits, pixels): instead of setting colors to 64 pixels, + // only setting `count` (count <= 64) pixels. PA_FORCE_INLINE void filter64(uint64_t bits, uint32_t* pixels, size_t count) const{ - size_t lc = count / 4; - while (lc--){ - uint32x4_t pixel = vld1q_u32(pixels); - // Change color in the four pixels according to the lowest four bits in `bits` - uint32x4_t filtered_pixel = filter4(bits & 0xF, pixel); - - vst1q_u32(pixels, filtered_pixel); - pixels += 4; - bits >>= 4; + const size_t count_round_4 = (count / 4) * 4; + for(size_t i = 0; i < count_round_4; i+=4){ + filter4((bits >> i) & 0xF, pixels + i); } - count %= 4; - if (count){ - // Load a partial of 4-pixel long data - PartialWordAccess_arm64_NEON loader(count * sizeof(uint32_t)); - uint32x4_t pixel = vreinterpretq_u32_u8(loader.load(pixels)); - // Change color in the pixels according to the lowest bits in `bits` - pixel = filter4(bits & 0xF, pixel); - switch (count){ - case 1: - pixels[0] = vgetq_lane_u32(pixel, 0); - break; - case 2: - pixels[0] = vgetq_lane_u32(pixel, 0); - pixels[1] = vgetq_lane_u32(pixel, 1); - break; - default: - pixels[0] = vgetq_lane_u32(pixel, 0); - pixels[1] = vgetq_lane_u32(pixel, 1); - pixels[2] = vgetq_lane_u32(pixel, 2); - } + + size_t left = count % 4; + if (left){ + uint32_t buffer[4]; + memcpy(buffer, pixels + count_round_4, sizeof(uint32_t) * left); + filter4((bits >> count_round_4) & 0xF, buffer); + memcpy(pixels + count_round_4, buffer, sizeof(uint32_t) * left); } } private: + // Change color in the 8 pixels according to the lowest 4 bits in `bits` + PA_FORCE_INLINE void filter4(uint64_t bits64, uint32_t* pixels) const{ + // Duplicate 4-bit pattern into four uint16_t places in `bits` + bits64 *= 0x0001000100010001; + // convert each uint16_t to be one bit from the lowest four bits in input `bits` + bits64 &= 0x0008000400020001; - // Change color in the 8 pixels according to the lowest 8 bits in `bits` - PA_FORCE_INLINE void filter8(uint32_t bits64, uint32_t* pixels) const{ - const uint32_t uint32_max = 0xFFFFFFFF; - if (m_replace_if_zero_bool){ - uint32_t mask0 = (bits64 & 0x1) - 1; - uint32_t mask1 = ((bits64 >> 1) & 0x1) - 1; - uint32_t mask2 = ((bits64 >> 2) & 0x1) - 1; - uint32_t mask3 = ((bits64 >> 3) & 0x1) - 1; - uint32_t mask4 = ((bits64 >> 4) & 0x1) - 1; - uint32_t mask5 = ((bits64 >> 5) & 0x1) - 1; - uint32_t mask6 = ((bits64 >> 6) & 0x1) - 1; - uint32_t mask7 = ((bits64 >> 7) & 0x1) - 1; - - pixels[0] = (mask0 & m_replacement_pixel) | (pixels[0] & (uint32_max - mask0)); - pixels[1] = (mask1 & m_replacement_pixel) | (pixels[1] & (uint32_max - mask1)); - pixels[2] = (mask2 & m_replacement_pixel) | (pixels[2] & (uint32_max - mask2)); - pixels[3] = (mask3 & m_replacement_pixel) | (pixels[3] & (uint32_max - mask3)); - pixels[4] = (mask4 & m_replacement_pixel) | (pixels[4] & (uint32_max - mask4)); - pixels[5] = (mask5 & m_replacement_pixel) | (pixels[5] & (uint32_max - mask5)); - pixels[6] = (mask6 & m_replacement_pixel) | (pixels[6] & (uint32_max - mask6)); - pixels[7] = (mask7 & m_replacement_pixel) | (pixels[7] & (uint32_max - mask7)); - } else{ - uint32_t mask0 = (bits64 & 0x1) - 1; - uint32_t mask1 = ((bits64 >> 1) & 0x1) - 1; - uint32_t mask2 = ((bits64 >> 2) & 0x1) - 1; - uint32_t mask3 = ((bits64 >> 3) & 0x1) - 1; - uint32_t mask4 = ((bits64 >> 4) & 0x1) - 1; - uint32_t mask5 = ((bits64 >> 5) & 0x1) - 1; - uint32_t mask6 = ((bits64 >> 6) & 0x1) - 1; - uint32_t mask7 = ((bits64 >> 7) & 0x1) - 1; - - pixels[0] = (mask0 & pixels[0]) | (m_replacement_pixel & (uint32_max - mask0)); - pixels[1] = (mask1 & pixels[1]) | (m_replacement_pixel & (uint32_max - mask1)); - pixels[2] = (mask2 & pixels[2]) | (m_replacement_pixel & (uint32_max - mask2)); - pixels[3] = (mask3 & pixels[3]) | (m_replacement_pixel & (uint32_max - mask3)); - pixels[4] = (mask4 & pixels[4]) | (m_replacement_pixel & (uint32_max - mask4)); - pixels[5] = (mask5 & pixels[5]) | (m_replacement_pixel & (uint32_max - mask5)); - pixels[6] = (mask6 & pixels[6]) | (m_replacement_pixel & (uint32_max - mask6)); - pixels[7] = (mask7 & pixels[7]) | (m_replacement_pixel & (uint32_max - mask7)); - } - } + uint32x4_t pixels_u32 = vld1q_u32(pixels); - // Change color in the 16 pixels according to the lowest 16 bits in `bits` - PA_FORCE_INLINE void filter16_NEON(uint32_t bits64, uint32_t* pixels) const{ - // const uint32_t uint32_max = 0xFFFFFFFF; - if (m_replace_if_zero_bool){ - - uint32x4x4_t pixel_32x4x4 = vld1q_u32_x4(pixels); - - uint32_t bit_rs_00 = (bits64 >> 0); - uint32_t bit_rs_01 = (bits64 >> 1); - uint32_t bit_rs_02 = (bits64 >> 2); - uint32_t bit_rs_03 = (bits64 >> 3); - uint32x4_t mask0 = vtstq_u32(uint32x4_t{bit_rs_00, bit_rs_01, bit_rs_02, bit_rs_03}, m_lasts); - uint32_t bit_rs_04 = (bits64 >> 4); - uint32_t bit_rs_05 = (bits64 >> 5); - uint32_t bit_rs_06 = (bits64 >> 6); - uint32_t bit_rs_07 = (bits64 >> 7); - uint32x4_t mask1 = vtstq_u32(uint32x4_t{bit_rs_04, bit_rs_05, bit_rs_06, bit_rs_07}, m_lasts); - uint32_t bit_rs_08 = (bits64 >> 8); - uint32_t bit_rs_09 = (bits64 >> 9); - uint32_t bit_rs_10 = (bits64 >> 10); - uint32_t bit_rs_11 = (bits64 >> 11); - uint32x4_t mask2 = vtstq_u32(uint32x4_t{bit_rs_08, bit_rs_09, bit_rs_10, bit_rs_11}, m_lasts); - uint32_t bit_rs_12 = (bits64 >> 12); - uint32_t bit_rs_13 = (bits64 >> 13); - uint32_t bit_rs_14 = (bits64 >> 14); - uint32_t bit_rs_15 = (bits64 >> 15); - uint32x4_t mask3 = vtstq_u32(uint32x4_t{bit_rs_12, bit_rs_13, bit_rs_14, bit_rs_15}, m_lasts); - - uint32x4x4_t filtered_pixel_32x4x4; - - filtered_pixel_32x4x4.val[0] = ((m_replace_if_zero - mask0) & m_replacement) + ((mask0) & pixel_32x4x4.val[0]); - filtered_pixel_32x4x4.val[1] = ((m_replace_if_zero - mask1) & m_replacement) + ((mask1) & pixel_32x4x4.val[1]); - filtered_pixel_32x4x4.val[2] = ((m_replace_if_zero - mask2) & m_replacement) + ((mask2) & pixel_32x4x4.val[2]); - filtered_pixel_32x4x4.val[3] = ((m_replace_if_zero - mask3) & m_replacement) + ((mask3) & pixel_32x4x4.val[3]); + // Load `bits` into simd 64-bit vector register + uint16x4_t mask_u16 = vcreate_u16(bits64); + // Expand mask to cover each pixel (uint32_t) + uint32x4_t mask_u32 = vmovl_u16(mask_u16); + // Expand mask to be all-1 or all-0 mask for each pixel + mask_u32 = vcgtq_u32(mask_u32, m_zeros); + uint32x4_t out_u32; + if (m_replace_if_zero){ + // bit select intrinsic: // vbslq_u32(a, b, c), for 1 bits in a, choose b; for 0 bits in a, choose c - // filtered_pixel_32x4x4.val[0] = vbslq_u32(vsubq_u32(mask0, m_lasts), m_replacement, pixel_32x4x4.val[0]); - // filtered_pixel_32x4x4.val[1] = vbslq_u32(vsubq_u32(mask1, m_lasts), m_replacement, pixel_32x4x4.val[1]); - // filtered_pixel_32x4x4.val[2] = vbslq_u32(vsubq_u32(mask2, m_lasts), m_replacement, pixel_32x4x4.val[2]); - // filtered_pixel_32x4x4.val[3] = vbslq_u32(vsubq_u32(mask3, m_lasts), m_replacement, pixel_32x4x4.val[3]); - - vst1q_u32_x4(pixels, filtered_pixel_32x4x4); - } else{ - // TODO: + out_u32 = vbslq_u32(mask_u32, pixels_u32, m_replacement_u32); + } else { + out_u32 = vbslq_u32(mask_u32, m_replacement_u32, pixels_u32); } + vst1q_u32(pixels, out_u32); } - // Change color in the four pixels according to the lowest four bits in `bits` - PA_FORCE_INLINE void filter4(uint32_t bits64, uint32_t* pixels) const{ - // uint32x4_t pixel = vld1q_u32(pixels); - if (m_replace_if_zero_bool){ - uint32_t check0 = (bits64 & 0x1); - pixels[0] = (1 - check0) * m_replacement_pixel + pixels[0] * check0; - uint32_t check1 = ((bits64 >> 1) & 0x1); - pixels[1] = (1 - check1) * m_replacement_pixel + pixels[1] * check1; - uint32_t check2 = ((bits64 >> 2) & 0x1); - pixels[2] = (1 - check2) * m_replacement_pixel + pixels[2] * check2; - uint32_t check3 = ((bits64 >> 3) & 0x1); - pixels[3] = (1 - check3) * m_replacement_pixel + pixels[3] * check3; - } - else{ - if ((bits64 & 0x1) != 0){ - pixels[0] = m_replacement_pixel; - } - if ((bits64 & 0x2) != 0){ - pixels[1] = m_replacement_pixel; - } - if ((bits64 & 0x4) != 0){ - pixels[2] = m_replacement_pixel; - } - if ((bits64 & 0x8) != 0){ - pixels[3] = m_replacement_pixel; - } - } - } + PA_FORCE_INLINE void filter16(uint64_t bits64, uint32_t* pixels) const{ + uint64_t bits_0 = bits64 & 0xF; + uint64_t bits_1 = (bits64 >> 4) & 0xF; + uint64_t bits_2 = (bits64 >> 8) & 0xF; + uint64_t bits_3 = bits64 >> 12; - // Change color in the four pixels according to the lowest four bits in `bits` - PA_FORCE_INLINE uint32x4_t filter4(uint64_t bits64, uint32x4_t pixel) const{ // Duplicate 4-bit pattern into four uint16_t places in `bits` - bits64 *= 0x0001000100010001; + bits_0 *= 0x0001000100010001; // convert each uint16_t to be one bit from the lowest four bits in input `bits` - bits64 &= 0x0008000400020001; + bits_0 &= 0x0008000400020001; + + bits_1 *= 0x0001000100010001; + bits_1 &= 0x0008000400020001; + + bits_2 *= 0x0001000100010001; + bits_2 &= 0x0008000400020001; + + bits_3 *= 0x0001000100010001; + bits_3 &= 0x0008000400020001; + + uint32x4_t pixels_0_u32 = vld1q_u32(pixels); + uint32x4_t pixels_1_u32 = vld1q_u32(pixels + 4); + uint32x4_t pixels_2_u32 = vld1q_u32(pixels + 8); + uint32x4_t pixels_3_u32 = vld1q_u32(pixels + 12); // Load `bits` into simd 64-bit vector register - uint16x4_t mask_u16x4 = vcreate_u16(bits64); + uint16x4_t mask_0_u16x4 = vcreate_u16(bits_0); + uint16x4_t mask_1_u16x4 = vcreate_u16(bits_1); + uint16x4_t mask_2_u16x4 = vcreate_u16(bits_2); + uint16x4_t mask_3_u16x4 = vcreate_u16(bits_3); // Expand mask to cover each pixel (uint32_t) - uint32x4_t mask = vmovl_u16(mask_u16x4); + uint32x4_t mask_0_u32 = vmovl_u16(mask_0_u16x4); + uint32x4_t mask_1_u32 = vmovl_u16(mask_1_u16x4); + uint32x4_t mask_2_u32 = vmovl_u16(mask_2_u16x4); + uint32x4_t mask_3_u32 = vmovl_u16(mask_3_u16x4); // Expand mask to be all-1 or all-0 mask for each pixel - mask = vcgtq_u32(mask, m_zeros); - // If m_replace_if_zero is 0xFF...FF, invert the mask - mask = veorq_u32(mask, m_replace_if_zero); - // bit select intrinsic: - // vbslq_u32(a, b, c), for 1 bits in a, choose b; for 0 bits in a, choose c - return vbslq_u32(mask, m_replacement, pixel); + mask_0_u32 = vcgtq_u32(mask_0_u32, m_zeros); + mask_1_u32 = vcgtq_u32(mask_1_u32, m_zeros); + mask_2_u32 = vcgtq_u32(mask_2_u32, m_zeros); + mask_3_u32 = vcgtq_u32(mask_3_u32, m_zeros); + + uint32x4_t out_0_u32, out_1_u32, out_2_u32, out_3_u32; + if (m_replace_if_zero){ + // bit select intrinsic: + // vbslq_u32(a, b, c), for 1 bits in a, choose b; for 0 bits in a, choose c + out_0_u32 = vbslq_u32(mask_0_u32, pixels_0_u32, m_replacement_u32); + out_1_u32 = vbslq_u32(mask_1_u32, pixels_1_u32, m_replacement_u32); + out_2_u32 = vbslq_u32(mask_2_u32, pixels_2_u32, m_replacement_u32); + out_3_u32 = vbslq_u32(mask_3_u32, pixels_3_u32, m_replacement_u32); + } else { + out_0_u32 = vbslq_u32(mask_0_u32, m_replacement_u32, pixels_0_u32); + out_1_u32 = vbslq_u32(mask_1_u32, m_replacement_u32, pixels_1_u32); + out_2_u32 = vbslq_u32(mask_2_u32, m_replacement_u32, pixels_2_u32); + out_3_u32 = vbslq_u32(mask_3_u32, m_replacement_u32, pixels_3_u32); + } + vst1q_u32(pixels, out_0_u32); + vst1q_u32(pixels + 4, out_1_u32); + vst1q_u32(pixels + 8, out_2_u32); + vst1q_u32(pixels + 12, out_3_u32); } private: - const uint32_t m_replacement_pixel; - const uint32x4_t m_replacement; - const bool m_replace_if_zero_bool; - const uint32x4_t m_replace_if_zero; + const uint32x4_t m_replacement_u32; + const bool m_replace_if_zero; const uint32x4_t m_zeros; - const uint32x4_t m_lasts; }; @@ -323,6 +252,73 @@ class Compressor_RgbRange_arm64_NEON{ }; +class Compressor_RgbEuclidean_arm64_NEON{ +public: + Compressor_RgbEuclidean_arm64_NEON(uint32_t expected_color, double max_euclidean_distance) + : m_expected_color_rgb_u8(vreinterpretq_u8_u32(vdupq_n_u32(expected_color & 0x00ffffff))) + , m_distance_squared_u32(vdupq_n_u32((uint32_t)(max_euclidean_distance * max_euclidean_distance))) + {} + + PA_FORCE_INLINE uint64_t convert64(const uint32_t* pixels) const{ + uint64_t bits = 0; + for(size_t c = 0; c < 64; c += 4){ + bits |= convert4(pixels + c) << c; + } + return bits; + } + PA_FORCE_INLINE uint64_t convert64(const uint32_t* pixels, size_t count) const{ + uint64_t bits = 0; + size_t c = 0; + + for(size_t i = 0; i < count / 4; i++, c+=4){ + bits |= convert4(pixels + c) << c; + } + if (count){ + PartialWordAccess_arm64_NEON loader(count * sizeof(uint32_t)); + const uint8x16_t pixel = loader.load(pixels + c); + const uint64_t mask = ((uint64_t)1 << count) - 1; + bits |= (convert4(pixel) & mask) << c; + } + return bits; + } + +private: + PA_FORCE_INLINE uint64_t convert4(const uint32_t* pixel) const{ + uint32x4_t in_u32 = vld1q_u32(pixel); + return convert4(vreinterpretq_u8_u32(in_u32)); + } + PA_FORCE_INLINE uint64_t convert4(const uint32x4_t& in_u32) const{ + // subtract the expected values + uint32x4_t in_dif_u32 = vreinterpretq_u32_u8(vabdq_u8(vreinterpretq_u8_u32(in_u32), m_expected_color_rgb_u8)); + + // Get green channel + uint32x4_t in_g_u32 = vandq_u32(in_dif_u32, vdupq_n_u32(0x0000ff00)); + // Move green channel to the lower end of the 16-bit regions + uint16x8_t in_g_u16 = vshrq_n_u16(vreinterpretq_u16_u32(in_g_u32), 8); + // in_rb_u16 contains the red and blue channels. Each channel occupies a 16-bit region + uint16x8_t in_rb_u16 = vandq_u16(vreinterpretq_u16_u32(in_dif_u32), vdupq_n_u16(0x00ff)); + + // Square operation + uint16x8_t in_g2_u16 = vmulq_u16(in_g_u16, in_g_u16); + uint16x8_t in_r2b2_u16 = vmulq_u16(in_rb_u16, in_rb_u16); + + uint32x4_t in_g2_u32 = vreinterpretq_u32_u16(in_g2_u16); + // Use pairwise addition and accumulate to add r2, g2, and b2 together + uint32x4_t sum_sqr_u32 = vpadalq_u16(in_g2_u32, in_r2b2_u16); + + // cmp_u32: if each pixel is within range (sum_sqr <= max_distance_squared), its uint32_t in `cmp_u32` is all 1 bits, + // otherwise, all 0 bits + uint32x4_t cmp_u32 = vcleq_u32(sum_sqr_u32, m_distance_squared_u32); + return (cmp_u32[0] & 0x01) | (cmp_u32[1] & 0x02) | (cmp_u32[2] & 0x04) | (cmp_u32[3] & 0x08); + } + +private: + uint8x16_t m_expected_color_rgb_u8; + uint32x4_t m_distance_squared_u32; +}; + + + } } #endif diff --git a/SerialPrograms/Source/Kernels/BinaryMatrix/Kernels_PackedBinaryMatrixCore.h b/SerialPrograms/Source/Kernels/BinaryMatrix/Kernels_PackedBinaryMatrixCore.h index da32485b2..ec93953dd 100644 --- a/SerialPrograms/Source/Kernels/BinaryMatrix/Kernels_PackedBinaryMatrixCore.h +++ b/SerialPrograms/Source/Kernels/BinaryMatrix/Kernels_PackedBinaryMatrixCore.h @@ -111,8 +111,9 @@ class PackedBinaryMatrixCore{ // Word Access. How many words in a column. One word is 8 bytes (aka 64 bits). size_t word64_height() const{ return m_logical_height; } - // Get (x-th, y-th) word. One word is 16 bytes (aka 64 bits). + // Get (x-th, y-th) word. One word is 8 bytes (aka 64 bits), one row in a tile. uint64_t word64(size_t x, size_t y) const; + // Get (x-th, y-th) word. One word is 8 bytes (aka 64 bits), one row in a tile. uint64_t& word64(size_t x, size_t y); private: diff --git a/SerialPrograms/Source/Kernels/PartialWordAccess/Kernels_PartialWordAccess_arm64_NEON.h b/SerialPrograms/Source/Kernels/PartialWordAccess/Kernels_PartialWordAccess_arm64_NEON.h index 9c426a878..4af5d0401 100644 --- a/SerialPrograms/Source/Kernels/PartialWordAccess/Kernels_PartialWordAccess_arm64_NEON.h +++ b/SerialPrograms/Source/Kernels/PartialWordAccess/Kernels_PartialWordAccess_arm64_NEON.h @@ -33,11 +33,11 @@ class PartialWordAccess_arm64_NEON{ // If `bytes` is 3, then `m_back_mask` is from low bytes to high bytes: [0xFF, 0xFF, 0xFF, ... 0xFF, 0, 0, 0] m_back_mask = vcgtq_u8(vdupq_n_u8((uint8_t)m_shift), seq_u8x16); - // If `bytes` is 3, then `m_shift_front` is from low bytes to high bytes: [3, 4, 5, 6, 7, ..., 18] - m_shift_front = vaddq_u8(vdupq_n_u8(uint8_t(bytes)), seq_u8x16); + // If `bytes` is 3, then `m_shift_front` is from low bytes to high bytes: [13, 14, 15, 16, 17, ..., 28] + m_shift_front = vaddq_u8(vdupq_n_u8(uint8_t(m_shift)), seq_u8x16); - // IF `bytes` is 3, then `m_shift_back` is from low bytes to hight bytes: [253, 254, 255, 0, 1, 2, 3] - m_shift_back = vsubq_u8(seq_u8x16, vdupq_n_u8((uint8_t)bytes)); + // IF `bytes` is 3, then `m_shift_back` is from low bytes to high bytes: [243, 244, 245,... 0, 1, 2] + m_shift_back = vsubq_u8(seq_u8x16, vdupq_n_u8((uint8_t)m_shift)); } // load() function that does not read past end of buffer @@ -50,8 +50,8 @@ class PartialWordAccess_arm64_NEON{ // for each uint8 in the result, ret_u8[i], get the index from `m_shift_front`: m_shift_front[i] // use the value of m_shift_front[i] as an index to get a value in x: // ret_u8[i] = x[m_shift_front[i]] - // since `m_shift_front` stores [`bytes`, `bytes+1`, `bytes+2`, ...] - // the resulting operation is to shift the bytes in x to the lower bytes by `bytes` bytes. + // since `m_shift_front` stores [`16-bytes`, `16-bytes+1`, `16-bytes+2`, ...] + // the resulting operation is to shift the bytes in x to the lower bytes by `16-bytes` bytes. // For the index values >= 16 in m_shift_front[i], `vqtbl1q_u8()` returns 0. return vqtbl1q_u8(x, m_shift_front); } diff --git a/SerialPrograms/Source/Tests/Kernels_Tests.cpp b/SerialPrograms/Source/Tests/Kernels_Tests.cpp index 335db779d..ac8d8d8ea 100644 --- a/SerialPrograms/Source/Tests/Kernels_Tests.cpp +++ b/SerialPrograms/Source/Tests/Kernels_Tests.cpp @@ -16,6 +16,7 @@ #include "Kernels/BinaryMatrix/Kernels_BinaryMatrix.h" #ifdef PA_AutoDispatch_arm64_20_M1 #include "Kernels/BinaryMatrix/Kernels_BinaryMatrixTile_64x8_arm64_NEON.h" + #include "Kernels/PartialWordAccess/Kernels_PartialWordAccess_arm64_NEON.h" #endif #include "Kernels/BinaryMatrix/Kernels_BinaryMatrixTile_64xH_Default.h" #include "Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters.h" @@ -71,19 +72,16 @@ int test_kernels_BinaryMatrix(const ImageViewRGB32& image){ return 1; } - const size_t width = image.width(); - const size_t height = image.height(); + const size_t width = image.width(), height = image.height(); - const Color min_color(0, 0, 0); - const Color max_color(63, 63, 63); - const uint32_t mins = uint32_t(min_color); - const uint32_t maxs = uint32_t(max_color); + const Color min_color(0, 0, 0), max_color(63, 63, 63); + const uint32_t mins = uint32_t(min_color), maxs = uint32_t(max_color); - auto matrix_default = make_PackedBinaryMatrix(get_BinaryMatrixType(), width, height); + auto binary_matrix = make_PackedBinaryMatrix(get_BinaryMatrixType(), width, height); auto time_start = current_time(); compress_rgb32_to_binary_range( - image.data(), image.bytes_per_row(), *matrix_default, mins, maxs + image.data(), image.bytes_per_row(), *binary_matrix, mins, maxs ); auto time_end = current_time(); auto ns = std::chrono::duration_cast(time_end - time_start).count(); @@ -91,19 +89,19 @@ int test_kernels_BinaryMatrix(const ImageViewRGB32& image){ cout << "One binary matrix creation. time: " << ms << " ms" << endl; size_t error_count = 0; - for (size_t r = 0; r < height; r++){ - for (size_t c = 0; c < width; c++){ - const Color color(image.pixel(c, r)); + for (size_t y = 0; y < height; y++){ + for (size_t x = 0; x < width; x++){ + const Color color(image.pixel(x, y)); bool in_range = (min_color.alpha() <= color.alpha() && color.alpha() <= max_color.alpha()); in_range = in_range && (min_color.red() <= color.red() && color.red() <= max_color.red()); in_range = in_range && (min_color.green() <= color.green() && color.green() <= max_color.green()); in_range = in_range && (min_color.blue() <= color.blue() && color.blue() <= max_color.blue()); - const bool v_default = matrix_default->get(c, r); + const bool v_default = binary_matrix->get(x, y); if (error_count < 10){ if (v_default != in_range){ - cout << "Error: matrix (" << c << ", " << r << ") got " + cout << "Error: matrix (" << x << ", " << y << ") got " << v_default << " but GT is " << in_range << endl; ++error_count; } @@ -120,21 +118,20 @@ int test_kernels_BinaryMatrix(const ImageViewRGB32& image){ time_start = current_time(); for(size_t i = 0; i < num_iters; i++){ compress_rgb32_to_binary_range( - image.data(), image.bytes_per_row(), *matrix_default, mins, maxs + image.data(), image.bytes_per_row(), *binary_matrix, mins, maxs ); } time_end = current_time(); ms = std::chrono::duration_cast(time_end - time_start).count(); cout << "Running " << num_iters << " iters, average creation impl. time: " << ms / (double)num_iters << " ms" << endl; - // cout << matrix_default->dump() << flush; + // cout << binary_matrix->dump() << flush; return 0; } int test_kernels_FilterRGB32Range(const ImageViewRGB32& image){ - const size_t width = image.width(); - const size_t height = image.height(); + const size_t width = image.width(), height = image.height(); cout << "Testing filter_rgb32_range(), image size " << width << " x " << height << endl; Color min_color(0, 0, 0); @@ -169,11 +166,11 @@ int test_kernels_FilterRGB32Range(const ImageViewRGB32& image){ size_t actual_num_pixels_in_range = 0; size_t error_count = 0; - for (size_t r = 0; r < height; r++){ - for (size_t c = 0; c < width; c++){ - const Color color(image.pixel(c, r)); - const Color new_color(image_out.pixel(c, r)); - const Color new_color_2(image_out_2.pixel(c, r)); + for (size_t y = 0; y < height; y++){ + for (size_t x = 0; x < width; x++){ + const Color color(image.pixel(x, y)); + const Color new_color(image_out.pixel(x, y)); + const Color new_color_2(image_out_2.pixel(x, y)); bool in_range = (min_color.alpha() <= color.alpha() && color.alpha() <= max_color.alpha()); in_range = in_range && (min_color.red() <= color.red() && color.red() <= max_color.red()); in_range = in_range && (min_color.green() <= color.green() && color.green() <= max_color.green()); @@ -183,23 +180,23 @@ int test_kernels_FilterRGB32Range(const ImageViewRGB32& image){ // Print first 10 errors: if (in_range && new_color != COLOR_WHITE){ cout << "Error: wrong filter result: old color " << color.to_string() << ", (x,y) = " - << c << ", " << r << ", should be in range but not found by the function" << endl; + << x << ", " << y << ", should be in range but not found by the function" << endl; ++error_count; } else if (in_range == false && new_color != color){ cout << "Error: wrong filter result: old color " << color.to_string() << ", (x,y) = " - << c << ", " << r << ", should not be changed by the function" << endl; + << x << ", " << y << ", should not be changed by the function" << endl; ++error_count; } if (in_range && new_color_2 != color){ cout << "Error: wrong inverse filter result: old color " << color.to_string() << ", (x,y) = " - << c << ", " << r << ", should not be changed by the function" << endl; + << x << ", " << y << ", should not be changed by the function" << endl; ++error_count; } else if (in_range == false && new_color_2 != COLOR_WHITE){ cout << "Error: wrong inverse filter result: old color " << color.to_string() << ", (x,y) = " - << c << ", " << r << ", should not be in range but not found by the function" << endl; + << x << ", " << y << ", should not be in range but not found by the function" << endl; ++error_count; } } @@ -269,11 +266,11 @@ int test_kernels_FilterRGB32Euclidean(const ImageViewRGB32& image){ size_t actual_num_pixels_in_range = 0; size_t error_count = 0; - for (size_t r = 0; r < height; r++){ - for (size_t c = 0; c < width; c++){ - const Color color(image.pixel(c, r)); - const Color new_color(image_out.pixel(c, r)); - const Color new_color_2(image_out_2.pixel(c, r)); + for (size_t y = 0; y < height; y++){ + for (size_t x = 0; x < width; x++){ + const Color color(image.pixel(x, y)); + const Color new_color(image_out.pixel(x, y)); + const Color new_color_2(image_out_2.pixel(x, y)); int red_dif = (color.red() - middle_color.red()); int green_dif = (color.green() - middle_color.green()); int blue_dif = (color.blue() - middle_color.blue()); @@ -284,14 +281,14 @@ int test_kernels_FilterRGB32Euclidean(const ImageViewRGB32& image){ // Print first 10 errors: if (in_range && new_color != COLOR_WHITE){ cout << "Error: wrong filter result: old color " << color.to_string() - << ", (x,y) = (" << c << ", " << r << ")" + << ", (x,y) = (" << x << ", " << y << ")" << ", dist2 " << color_dist2 << ", max dist2 " << max_dist2 << ", should be in range but not found by the function" << endl; ++error_count; } else if (in_range == false && new_color != color){ cout << "Error: wrong filter result: old color " << color.to_string() - << ", (x,y) = (" << c << ", " << r << ")" + << ", (x,y) = (" << x << ", " << y << ")" << ", dist2 " << color_dist2 << ", max dist2 " << max_dist2 << ", should not be changed by the function" << endl; ++error_count; @@ -299,14 +296,14 @@ int test_kernels_FilterRGB32Euclidean(const ImageViewRGB32& image){ if (in_range && new_color_2 != color){ cout << "Error: wrong inverse filter result: old color " << color.to_string() - << ", (x,y) = (" << c << ", " << r << ")" + << ", (x,y) = (" << x << ", " << y << ")" << ", dist2 " << color_dist2 << ", max dist2 " << max_dist2 << ", should not be changed by the function" << endl; ++error_count; } else if (in_range == false && new_color_2 != COLOR_WHITE){ cout << "Error: wrong inverse filter result: old color " << color.to_string() - << ", (x,y) = (" << c << ", " << r << ")" + << ", (x,y) = (" << x << ", " << y << ")" << ", dist2 " << color_dist2 << ", max dist2 " << max_dist2 << ", should not be in range but not found by the function" << endl; ++error_count; @@ -378,11 +375,11 @@ int test_kernels_ToBlackWhiteRGB32Range(const ImageViewRGB32& image){ size_t actual_num_pixels_in_range = 0; size_t error_count = 0; - for (size_t r = 0; r < height; r++){ - for (size_t c = 0; c < width; c++){ - const Color color(image.pixel(c, r)); - const Color new_color(image_out.pixel(c, r)); - const Color new_color_2(image_out_2.pixel(c, r)); + for (size_t y = 0; y < height; y++){ + for (size_t x = 0; x < width; x++){ + const Color color(image.pixel(x, y)); + const Color new_color(image_out.pixel(x, y)); + const Color new_color_2(image_out_2.pixel(x, y)); bool in_range = (min_color.alpha() <= color.alpha() && color.alpha() <= max_color.alpha()); in_range = in_range && (min_color.red() <= color.red() && color.red() <= max_color.red()); in_range = in_range && (min_color.green() <= color.green() && color.green() <= max_color.green()); @@ -392,23 +389,23 @@ int test_kernels_ToBlackWhiteRGB32Range(const ImageViewRGB32& image){ // Print first 10 errors: if (in_range && new_color != COLOR_BLACK){ cout << "Error: wrong filter result: old color " << color.to_string() << ", (x,y) = " - << c << ", " << r << ", should be black due to in range but not so" << endl; + << x << ", " << y << ", should be black due to in range but not so" << endl; ++error_count; } else if (in_range == false && new_color != COLOR_WHITE){ cout << "Error: wrong filter result: old color " << color.to_string() << ", (x,y) = " - << c << ", " << r << ", should be white due to out of range but not so" << endl; + << x << ", " << y << ", should be white due to out of range but not so" << endl; ++error_count; } if (in_range && new_color_2 != COLOR_WHITE){ cout << "Error: wrong inverse filter result: old color " << color.to_string() << ", (x,y) = " - << c << ", " << r << ", should be white due to in range but not so" << endl; + << x << ", " << y << ", should be white due to in range but not so" << endl; ++error_count; } else if (in_range == false && new_color_2 != COLOR_BLACK){ cout << "Error: wrong inverse filter result: old color " << color.to_string() << ", (x,y) = " - << c << ", " << r << ", should be black due to out of range but not so" << endl; + << x << ", " << y << ", should be black due to out of range but not so" << endl; ++error_count; } } @@ -440,6 +437,159 @@ int test_kernels_ToBlackWhiteRGB32Range(const ImageViewRGB32& image){ return 0; } +int test_kernels_FilterByMask(const ImageViewRGB32& image){ + const size_t width = image.width(), height = image.height(); + cout << "Image width " << width << " height " << height << endl; + + const Color min_color(0, 0, 0), max_color(63, 63, 63); + const uint32_t mins = uint32_t(min_color), maxs = uint32_t(max_color); + + auto binary_matrix = make_PackedBinaryMatrix(get_BinaryMatrixType(), width, height); + compress_rgb32_to_binary_range( + image.data(), image.bytes_per_row(), *binary_matrix, mins, maxs + ); + ImageRGB32 new_image = image.copy(); + ImageRGB32 new_image_2 = image.copy(); + + Color replacement_color = COLOR_WHITE; + bool replace_zero_bits = true; + + auto time_start = current_time(); + filter_by_mask(*binary_matrix, new_image.data(), new_image.bytes_per_row(), uint32_t(replacement_color), replace_zero_bits); + auto time_end = current_time(); + auto ns = std::chrono::duration_cast(time_end - time_start).count(); + auto ms = ns / 1000000.; + cout << "One Filter by mask call. time: " << ms << " ms" << endl; + + filter_by_mask(*binary_matrix, new_image_2.data(), new_image_2.bytes_per_row(), uint32_t(replacement_color), !replace_zero_bits); + + size_t error_count = 0; + + for (size_t y = 0; y < height; y++){ + for (size_t x = 0; x < width; x++){ + const Color color(image.pixel(x, y)); + const Color new_color(new_image.pixel(x, y)); + const Color new_color_2(new_image_2.pixel(x, y)); + bool in_range = (min_color.alpha() <= color.alpha() && color.alpha() <= max_color.alpha()); + in_range = in_range && (min_color.red() <= color.red() && color.red() <= max_color.red()); + in_range = in_range && (min_color.green() <= color.green() && color.green() <= max_color.green()); + in_range = in_range && (min_color.blue() <= color.blue() && color.blue() <= max_color.blue()); + + if (error_count <= 10){ + if (in_range && new_color != color){ + cout << "Error: wrong filter(replace_zero_bits) result: old color " << color.to_string() << ", (x,y) = " + << x << ", " << y << ", should not be changed due to being one bit but not so" << endl; + ++error_count; + } + else if (!in_range && new_color != replacement_color){ + cout << "Error: wrong filter(replace_zero_bits) result: old color " << color.to_string() << ", (x,y) = " + << x << ", " << y << ", should be changed due to being zero bit but not so" << endl; + ++error_count; + } + + if (in_range && new_color_2 != replacement_color){ + cout << "Error: wrong filter(replace_one_bits) result: old color " << color.to_string() << ", (x,y) = " + << x << ", " << y << ", should be changed due to being one bit but not so" << endl; + ++error_count; + } + else if (!in_range && new_color_2 != color){ + cout << "Error: wrong filter(replace_one_bits) result: old color " << color.to_string() << ", (x,y) = " + << x << ", " << y << ", should not be changed due to being zero bit but not so" << endl; + ++error_count; + } + } + } + } + + if (error_count){ + return 1; + } + + // We try to wait for three seconds: + const size_t num_iters = size_t(3000 / ms); + time_start = current_time(); + for(size_t i = 0; i < num_iters; i++){ + filter_by_mask(*binary_matrix, new_image.data(), new_image.bytes_per_row(), uint32_t(replacement_color), replace_zero_bits); + } + time_end = current_time(); + ms = (double)std::chrono::duration_cast(time_end - time_start).count(); + cout << "Running " << num_iters << " iters, avg filter time: " << ms / num_iters << " ms" << endl; + + return 0; +} + +int test_kernels_CompressRGB32ToBinaryEuclidean(const ImageViewRGB32& image){ + const size_t width = image.width(); + const size_t height = image.height(); + cout << "Testing test_kernels_CompressRGB32ToBinaryEuclidean(), image size " << width << " x " << height << endl; + + Color middle_color = Color(image.pixel(width/2, height/2)); + cout << "Expected color: " << middle_color.to_string() << endl; + + double max_dist = 50.0; + size_t max_dist2 = size_t(max_dist * max_dist); + + PackedBinaryMatrix matrix(image.width(), image.height()); + + auto time_start = current_time(); + Kernels::compress_rgb32_to_binary_euclidean( + image.data(), image.bytes_per_row(), matrix, + uint32_t(middle_color), max_dist + ); + auto time_end = current_time(); + auto ns = std::chrono::duration_cast(time_end - time_start).count(); + auto ms = ns / 1000000.; + cout << "One filter time: " << ms << " ms" << endl; + + size_t error_count = 0; + for (size_t y = 0; y < height; y++){ + for (size_t x = 0; x < width; x++){ + const Color color(image.pixel(x, y)); + int red_dif = (color.red() - middle_color.red()); + int green_dif = (color.green() - middle_color.green()); + int blue_dif = (color.blue() - middle_color.blue()); + size_t color_dist2 = red_dif * red_dif + green_dif * green_dif + blue_dif * blue_dif; + bool in_range = color_dist2 <= max_dist2; + if (error_count < 10){ + // Print first 10 errors: + if (in_range && matrix.get(x, y) == false){ + cout << "Error: wrong filter result: old color " << color.to_string() + << ", (x,y) = (" << x << ", " << y << ")" + << ", dist2 " << color_dist2 << ", max dist2 " << max_dist2 + << ", should be in range but not set on matrix" << endl; + ++error_count; + } + else if (in_range == false && matrix.get(x, y) == true){ + cout << "Error: wrong filter result: old color " << color.to_string() + << ", (x,y) = (" << x << ", " << y << ")" + << ", dist2 " << color_dist2 << ", max dist2 " << max_dist2 + << ", should not be in range but set on matrix" << endl; + ++error_count; + } + } + } + } + if (error_count){ + return 1; + } + + // We try to wait for three seconds: + const size_t num_iters = size_t(3000 / ms); + time_start = current_time(); + for(size_t i = 0; i < num_iters; i++){ + Kernels::compress_rgb32_to_binary_euclidean( + image.data(), image.bytes_per_row(), matrix, + uint32_t(middle_color), max_dist + ); + } + time_end = current_time(); + ms = (double)std::chrono::duration_cast(time_end - time_start).count(); + cout << "Running " << num_iters << " iters, avg filter time: " << ms / num_iters << " ms" << endl; + + return 0; +} + + int test_kernels_Waterfill(const ImageViewRGB32& image){ @@ -724,6 +874,47 @@ int test_binary_matrix_tile(){ if (test_binary_matrix_tile_t() != 0){ return 1; } + + for(size_t num_bytes = 0; num_bytes <= 16; ++num_bytes){ + PartialWordAccess_arm64_NEON partial(num_bytes); + + uint8_t buffer[48]; + for(int i = 0; i < 48; i++){ + buffer[i] = 99; + } + for(uint8_t i = 0; i < 16; i++){ + buffer[i+16] = i; + } + + // uint8x16_t x = partial.load(buffer+16); + uint8x16_t x = partial.load_int_no_read_past_end(buffer+16); + for(size_t i = 0; i < num_bytes; ++i){ + if (x[i] != buffer[i+16]){ + cout << "Error: PartialWordAccess_arm64_NEON(" << num_bytes << ")::load_int_no_read_past_end(), i = " << i << " is " << int(x[i]) + << ", but should be " << int(buffer[i+16]) << endl; + return 1; + } + } + x = partial.load_int_no_read_before_ptr(buffer+16); + for(size_t i = 0; i < num_bytes; ++i){ + if (x[i] != buffer[i+16]){ + cout << "Error: PartialWordAccess_arm64_NEON(" << num_bytes << ")::load_int_no_read_before_ptr(), i = " << i << " is " << int(x[i]) + << ", but should be " << int(buffer[i+16]) << endl; + return 1; + } + } + for(int i = 0; i < 48; i++){ + buffer[i] = 99; + } + partial.store_int_no_past_end(buffer+16, x); + for(size_t i = 0; i < num_bytes; ++i){ + if (x[i] != buffer[i+16]){ + cout << "Error: PartialWordAccess_arm64_NEON(" << num_bytes << ")::store_int_no_past_end(), i = " << i << " is " << int(buffer[i+16]) + << ", but should be " << int(x[i+16]) << endl; + return 1; + } + } + } #endif return 0; } diff --git a/SerialPrograms/Source/Tests/Kernels_Tests.h b/SerialPrograms/Source/Tests/Kernels_Tests.h index 6a2138dfe..d09fd9a57 100644 --- a/SerialPrograms/Source/Tests/Kernels_Tests.h +++ b/SerialPrograms/Source/Tests/Kernels_Tests.h @@ -25,6 +25,10 @@ int test_kernels_FilterRGB32Euclidean(const ImageViewRGB32& image); int test_kernels_ToBlackWhiteRGB32Range(const ImageViewRGB32& image); +int test_kernels_FilterByMask(const ImageViewRGB32& image); + +int test_kernels_CompressRGB32ToBinaryEuclidean(const ImageViewRGB32& image); + int test_kernels_Waterfill(const ImageViewRGB32& image); diff --git a/SerialPrograms/Source/Tests/TestMap.cpp b/SerialPrograms/Source/Tests/TestMap.cpp index 3ecc221f9..ce0426e9d 100644 --- a/SerialPrograms/Source/Tests/TestMap.cpp +++ b/SerialPrograms/Source/Tests/TestMap.cpp @@ -224,6 +224,8 @@ const std::map TEST_MAP = { {"Kernels_FilterRGB32Range", std::bind(image_void_detector_helper, test_kernels_FilterRGB32Range, _1)}, {"Kernels_FilterRGB32Euclidean", std::bind(image_void_detector_helper, test_kernels_FilterRGB32Euclidean, _1)}, {"Kernels_ToBlackWhiteRGB32Range", std::bind(image_void_detector_helper, test_kernels_ToBlackWhiteRGB32Range, _1)}, + {"Kernels_FilterByMask", std::bind(image_void_detector_helper, test_kernels_FilterByMask, _1)}, + {"Kernels_CompressRGB32ToBinaryEuclidean", std::bind(image_void_detector_helper, test_kernels_CompressRGB32ToBinaryEuclidean, _1)}, {"Kernels_Waterfill", std::bind(image_void_detector_helper, test_kernels_Waterfill, _1)}, {"CommonFramework_BlackBorderDetector", std::bind(image_bool_detector_helper, test_CommonFramework_BlackBorderDetector, _1)}, {"NintendoSwitch_UpdateMenuDetector", std::bind(image_bool_detector_helper, test_NintendoSwitch_UpdateMenuDetector, _1)},