From 6ed4cf490ed30e7f12f380dbd5e36681f02b0d35 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 14 Dec 2023 09:42:31 +0100 Subject: [PATCH] Fix numpy build on Sapphire Rapids CPUs in SciPy-bundle-2023.07-gfbf-2023a --- .../SciPy-bundle-2023.07-gfbf-2023a.eb | 21 +- ...-1.25.1_fix-duplicate-avx512-symbols.patch | 322 ++++++++++++++++++ .../numpy-1.25.1_fix-test_features.patch | 17 + .../numpy-1.25.1_fix-test_half.patch | 69 ++++ ....25.1_fix-undefined-avx512-reference.patch | 20 ++ 5 files changed, 445 insertions(+), 4 deletions(-) create mode 100644 easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-duplicate-avx512-symbols.patch create mode 100644 easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-test_features.patch create mode 100644 easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-test_half.patch create mode 100644 easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-undefined-avx512-reference.patch diff --git a/easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2023.07-gfbf-2023a.eb b/easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2023.07-gfbf-2023a.eb index d634617be9f..44b5725fe7d 100644 --- a/easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2023.07-gfbf-2023a.eb +++ b/easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2023.07-gfbf-2023a.eb @@ -29,11 +29,24 @@ use_pip = True # order is important! exts_list = [ ('numpy', '1.25.1', { - 'patches': ['numpy-1.22.3_disable-broken-override-test.patch'], + 'patches': [ + 'numpy-1.22.3_disable-broken-override-test.patch', + ('numpy-1.25.1_fix-duplicate-avx512-symbols.patch', 'numpy/core/src/npysort/x86-simd-sort'), + 'numpy-1.25.1_fix-undefined-avx512-reference.patch', + 'numpy-1.25.1_fix-test_features.patch', + 'numpy-1.25.1_fix-test_half.patch', + ], 'checksums': [ {'numpy-1.25.1.tar.gz': '9a3a9f3a61480cc086117b426a8bd86869c213fc4072e606f01c4e4b66eb92bf'}, {'numpy-1.22.3_disable-broken-override-test.patch': '9c589bb073b28b25ff45eb3c63c57966aa508dd8b318d0b885b6295271e4983c'}, + {'numpy-1.25.1_fix-duplicate-avx512-symbols.patch': + '8e32087c279b7193ae3507953480601200c9eff021819f3001d78c232c5852e6'}, + {'numpy-1.25.1_fix-undefined-avx512-reference.patch': + 'c4b66da93bf36071663f122de1ae668386cc6ab0154d21fa3e14ed7ddfe2a72c'}, + {'numpy-1.25.1_fix-test_features.patch': + '1c05ee5d105fe2f824416dd6dd5c64ed0c1cd710a002b4e6dbfafff19203adc5'}, + {'numpy-1.25.1_fix-test_half.patch': '341b99ae1801feebf382c92591794eeefdf451bc34b98f20aa985ea897488951'}, ], }), ('ply', '3.11', { @@ -52,6 +65,8 @@ exts_list = [ 'checksums': ['5ab283b9857211d61b53318b7c792cf68e798e765ee17c27ade9f6c924235731'], }), ('scipy', '1.11.1', { + 'enable_slow_tests': True, + 'ignore_test_result': False, 'patches': [ 'scipy-1.11.1_disable-tests.patch', 'scipy-1.11.1_xfail-aarch64_test_maxiter_worsening.patch', @@ -62,8 +77,6 @@ exts_list = [ {'scipy-1.11.1_xfail-aarch64_test_maxiter_worsening.patch': '918c8e6fa8215d459126f267764c961bde729ea4a116c7f6287cddfdc58ffcea'}, ], - 'enable_slow_tests': True, - 'ignore_test_result': False, }), ('numexpr', '2.8.4', { 'checksums': ['d5432537418d18691b9115d615d6daa17ee8275baef3edf1afbbf8bc69806147'], @@ -82,8 +95,8 @@ exts_list = [ 'checksums': ['7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f'], }), ('deap', '1.4.0', { - 'checksums': ['ffef2921932a0edbe634fcb6d156189e7a364bf638a2af4ae5d59931a9a4c8cc'], 'modulename': 'deap.base', + 'checksums': ['ffef2921932a0edbe634fcb6d156189e7a364bf638a2af4ae5d59931a9a4c8cc'], }), ] diff --git a/easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-duplicate-avx512-symbols.patch b/easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-duplicate-avx512-symbols.patch new file mode 100644 index 00000000000..931654128ff --- /dev/null +++ b/easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-duplicate-avx512-symbols.patch @@ -0,0 +1,322 @@ +The new dispatch method in numpy includes headers of the x86-simd-sort submodule C++ files +compiled multiple times (with different architecture flags). +This leads to linker errors such as +> numpy/numpy-1.26.2/build/../numpy/core/src/npysort/x86-simd-sort/src/avx512fp16-16bit-qsort.hpp:161: multiple definition of `void avx512_qsort<_Float16>(_Float16*, long) +See https://github.com/numpy/numpy/issues/25274 + +Mark those functions inline, see https://github.com/intel/x86-simd-sort/pull/112 + +Author: Alexander Grund (TU Dresden) + +diff --git a/src/avx512-16bit-qsort.hpp b/src/avx512-16bit-qsort.hpp +index 606f870..bf8cf7e 100644 +--- a/src/avx512-16bit-qsort.hpp ++++ b/src/avx512-16bit-qsort.hpp +@@ -350,7 +350,7 @@ struct zmm_vector { + }; + + template <> +-bool comparison_func>(const uint16_t &a, const uint16_t &b) ++inline bool comparison_func>(const uint16_t &a, const uint16_t &b) + { + uint16_t signa = a & 0x8000, signb = b & 0x8000; + uint16_t expa = a & 0x7c00, expb = b & 0x7c00; +@@ -406,7 +406,7 @@ replace_inf_with_nan(uint16_t *arr, int64_t arrsize, int64_t nan_count) + } + + template <> +-void avx512_qselect(int16_t *arr, int64_t k, int64_t arrsize) ++inline void avx512_qselect(int16_t *arr, int64_t k, int64_t arrsize) + { + if (arrsize > 1) { + qselect_16bit_, int16_t>( +@@ -415,7 +415,7 @@ void avx512_qselect(int16_t *arr, int64_t k, int64_t arrsize) + } + + template <> +-void avx512_qselect(uint16_t *arr, int64_t k, int64_t arrsize) ++inline void avx512_qselect(uint16_t *arr, int64_t k, int64_t arrsize) + { + if (arrsize > 1) { + qselect_16bit_, uint16_t>( +@@ -423,7 +423,7 @@ void avx512_qselect(uint16_t *arr, int64_t k, int64_t arrsize) + } + } + +-void avx512_qselect_fp16(uint16_t *arr, int64_t k, int64_t arrsize) ++X86_SIMD_SORT_INLINE void avx512_qselect_fp16(uint16_t *arr, int64_t k, int64_t arrsize) + { + if (arrsize > 1) { + int64_t nan_count = replace_nan_with_inf(arr, arrsize); +@@ -434,7 +434,7 @@ void avx512_qselect_fp16(uint16_t *arr, int64_t k, int64_t arrsize) + } + + template <> +-void avx512_qsort(int16_t *arr, int64_t arrsize) ++inline void avx512_qsort(int16_t *arr, int64_t arrsize) + { + if (arrsize > 1) { + qsort_16bit_, int16_t>( +@@ -443,7 +443,7 @@ void avx512_qsort(int16_t *arr, int64_t arrsize) + } + + template <> +-void avx512_qsort(uint16_t *arr, int64_t arrsize) ++inline void avx512_qsort(uint16_t *arr, int64_t arrsize) + { + if (arrsize > 1) { + qsort_16bit_, uint16_t>( +@@ -451,7 +451,7 @@ void avx512_qsort(uint16_t *arr, int64_t arrsize) + } + } + +-void avx512_qsort_fp16(uint16_t *arr, int64_t arrsize) ++X86_SIMD_SORT_INLINE void avx512_qsort_fp16(uint16_t *arr, int64_t arrsize) + { + if (arrsize > 1) { + int64_t nan_count = replace_nan_with_inf(arr, arrsize); +diff --git a/src/avx512-32bit-qsort.hpp b/src/avx512-32bit-qsort.hpp +index c4061dd..9dc3e18 100644 +--- a/src/avx512-32bit-qsort.hpp ++++ b/src/avx512-32bit-qsort.hpp +@@ -715,7 +715,7 @@ replace_inf_with_nan(float *arr, int64_t arrsize, int64_t nan_count) + } + + template <> +-void avx512_qselect(int32_t *arr, int64_t k, int64_t arrsize) ++inline void avx512_qselect(int32_t *arr, int64_t k, int64_t arrsize) + { + if (arrsize > 1) { + qselect_32bit_, int32_t>( +@@ -724,7 +724,7 @@ void avx512_qselect(int32_t *arr, int64_t k, int64_t arrsize) + } + + template <> +-void avx512_qselect(uint32_t *arr, int64_t k, int64_t arrsize) ++inline void avx512_qselect(uint32_t *arr, int64_t k, int64_t arrsize) + { + if (arrsize > 1) { + qselect_32bit_, uint32_t>( +@@ -733,7 +733,7 @@ void avx512_qselect(uint32_t *arr, int64_t k, int64_t arrsize) + } + + template <> +-void avx512_qselect(float *arr, int64_t k, int64_t arrsize) ++inline void avx512_qselect(float *arr, int64_t k, int64_t arrsize) + { + if (arrsize > 1) { + int64_t nan_count = replace_nan_with_inf(arr, arrsize); +@@ -744,7 +744,7 @@ void avx512_qselect(float *arr, int64_t k, int64_t arrsize) + } + + template <> +-void avx512_qsort(int32_t *arr, int64_t arrsize) ++inline void avx512_qsort(int32_t *arr, int64_t arrsize) + { + if (arrsize > 1) { + qsort_32bit_, int32_t>( +@@ -753,7 +753,7 @@ void avx512_qsort(int32_t *arr, int64_t arrsize) + } + + template <> +-void avx512_qsort(uint32_t *arr, int64_t arrsize) ++inline void avx512_qsort(uint32_t *arr, int64_t arrsize) + { + if (arrsize > 1) { + qsort_32bit_, uint32_t>( +@@ -762,7 +762,7 @@ void avx512_qsort(uint32_t *arr, int64_t arrsize) + } + + template <> +-void avx512_qsort(float *arr, int64_t arrsize) ++inline void avx512_qsort(float *arr, int64_t arrsize) + { + if (arrsize > 1) { + int64_t nan_count = replace_nan_with_inf(arr, arrsize); +diff --git a/src/avx512-64bit-argsort.hpp b/src/avx512-64bit-argsort.hpp +index 80c6ce4..4687860 100644 +--- a/src/avx512-64bit-argsort.hpp ++++ b/src/avx512-64bit-argsort.hpp +@@ -311,7 +311,7 @@ bool has_nan(type_t* arr, int64_t arrsize) + } + + template +-void avx512_argsort(T* arr, int64_t *arg, int64_t arrsize) ++inline void avx512_argsort(T* arr, int64_t *arg, int64_t arrsize) + { + if (arrsize > 1) { + argsort_64bit_>( +@@ -320,7 +320,7 @@ void avx512_argsort(T* arr, int64_t *arg, int64_t arrsize) + } + + template <> +-void avx512_argsort(double* arr, int64_t *arg, int64_t arrsize) ++inline void avx512_argsort(double* arr, int64_t *arg, int64_t arrsize) + { + if (arrsize > 1) { + if (has_nan>(arr, arrsize)) { +@@ -335,7 +335,7 @@ void avx512_argsort(double* arr, int64_t *arg, int64_t arrsize) + + + template <> +-void avx512_argsort(int32_t* arr, int64_t *arg, int64_t arrsize) ++inline void avx512_argsort(int32_t* arr, int64_t *arg, int64_t arrsize) + { + if (arrsize > 1) { + argsort_64bit_>( +@@ -344,7 +344,7 @@ void avx512_argsort(int32_t* arr, int64_t *arg, int64_t arrsize) + } + + template <> +-void avx512_argsort(uint32_t* arr, int64_t *arg, int64_t arrsize) ++inline void avx512_argsort(uint32_t* arr, int64_t *arg, int64_t arrsize) + { + if (arrsize > 1) { + argsort_64bit_>( +@@ -353,7 +353,7 @@ void avx512_argsort(uint32_t* arr, int64_t *arg, int64_t arrsize) + } + + template <> +-void avx512_argsort(float* arr, int64_t *arg, int64_t arrsize) ++inline void avx512_argsort(float* arr, int64_t *arg, int64_t arrsize) + { + if (arrsize > 1) { + if (has_nan>(arr, arrsize)) { +@@ -367,7 +367,7 @@ void avx512_argsort(float* arr, int64_t *arg, int64_t arrsize) + } + + template +-std::vector avx512_argsort(T* arr, int64_t arrsize) ++inline std::vector avx512_argsort(T* arr, int64_t arrsize) + { + std::vector indices(arrsize); + std::iota(indices.begin(), indices.end(), 0); +diff --git a/src/avx512-64bit-keyvaluesort.hpp b/src/avx512-64bit-keyvaluesort.hpp +index f721f5c..26153c9 100644 +--- a/src/avx512-64bit-keyvaluesort.hpp ++++ b/src/avx512-64bit-keyvaluesort.hpp +@@ -440,7 +440,7 @@ void qsort_64bit_(type1_t *keys, + } + + template <> +-void avx512_qsort_kv(int64_t *keys, uint64_t *indexes, int64_t arrsize) ++inline void avx512_qsort_kv(int64_t *keys, uint64_t *indexes, int64_t arrsize) + { + if (arrsize > 1) { + qsort_64bit_, zmm_vector>( +@@ -449,7 +449,7 @@ void avx512_qsort_kv(int64_t *keys, uint64_t *indexes, int64_t arrsize) + } + + template <> +-void avx512_qsort_kv(uint64_t *keys, ++inline void avx512_qsort_kv(uint64_t *keys, + uint64_t *indexes, + int64_t arrsize) + { +@@ -460,7 +460,7 @@ void avx512_qsort_kv(uint64_t *keys, + } + + template <> +-void avx512_qsort_kv(double *keys, uint64_t *indexes, int64_t arrsize) ++inline void avx512_qsort_kv(double *keys, uint64_t *indexes, int64_t arrsize) + { + if (arrsize > 1) { + int64_t nan_count = replace_nan_with_inf(keys, arrsize); +diff --git a/src/avx512-64bit-qsort.hpp b/src/avx512-64bit-qsort.hpp +index 1cbcd38..1928bb2 100644 +--- a/src/avx512-64bit-qsort.hpp ++++ b/src/avx512-64bit-qsort.hpp +@@ -784,7 +784,7 @@ static void qselect_64bit_(type_t *arr, + } + + template <> +-void avx512_qselect(int64_t *arr, int64_t k, int64_t arrsize) ++inline void avx512_qselect(int64_t *arr, int64_t k, int64_t arrsize) + { + if (arrsize > 1) { + qselect_64bit_, int64_t>( +@@ -793,7 +793,7 @@ void avx512_qselect(int64_t *arr, int64_t k, int64_t arrsize) + } + + template <> +-void avx512_qselect(uint64_t *arr, int64_t k, int64_t arrsize) ++inline void avx512_qselect(uint64_t *arr, int64_t k, int64_t arrsize) + { + if (arrsize > 1) { + qselect_64bit_, uint64_t>( +@@ -802,7 +802,7 @@ void avx512_qselect(uint64_t *arr, int64_t k, int64_t arrsize) + } + + template <> +-void avx512_qselect(double *arr, int64_t k, int64_t arrsize) ++inline void avx512_qselect(double *arr, int64_t k, int64_t arrsize) + { + if (arrsize > 1) { + int64_t nan_count = replace_nan_with_inf(arr, arrsize); +@@ -813,7 +813,7 @@ void avx512_qselect(double *arr, int64_t k, int64_t arrsize) + } + + template <> +-void avx512_qsort(int64_t *arr, int64_t arrsize) ++inline void avx512_qsort(int64_t *arr, int64_t arrsize) + { + if (arrsize > 1) { + qsort_64bit_, int64_t>( +@@ -822,7 +822,7 @@ void avx512_qsort(int64_t *arr, int64_t arrsize) + } + + template <> +-void avx512_qsort(uint64_t *arr, int64_t arrsize) ++inline void avx512_qsort(uint64_t *arr, int64_t arrsize) + { + if (arrsize > 1) { + qsort_64bit_, uint64_t>( +@@ -831,7 +831,7 @@ void avx512_qsort(uint64_t *arr, int64_t arrsize) + } + + template <> +-void avx512_qsort(double *arr, int64_t arrsize) ++inline void avx512_qsort(double *arr, int64_t arrsize) + { + if (arrsize > 1) { + int64_t nan_count = replace_nan_with_inf(arr, arrsize); +diff --git a/src/avx512-common-qsort.h b/src/avx512-common-qsort.h +index 959352e..9421de5 100644 +--- a/src/avx512-common-qsort.h ++++ b/src/avx512-common-qsort.h +@@ -94,11 +94,11 @@ struct ymm_vector; + // Regular quicksort routines: + template + void avx512_qsort(T *arr, int64_t arrsize); +-void avx512_qsort_fp16(uint16_t *arr, int64_t arrsize); ++X86_SIMD_SORT_INLINE void avx512_qsort_fp16(uint16_t *arr, int64_t arrsize); + + template + void avx512_qselect(T *arr, int64_t k, int64_t arrsize); +-void avx512_qselect_fp16(uint16_t *arr, int64_t k, int64_t arrsize); ++X86_SIMD_SORT_INLINE void avx512_qselect_fp16(uint16_t *arr, int64_t k, int64_t arrsize); + + template + inline void avx512_partial_qsort(T *arr, int64_t k, int64_t arrsize) +diff --git a/src/avx512fp16-16bit-qsort.hpp b/src/avx512fp16-16bit-qsort.hpp +index 8a9a49e..1206f82 100644 +--- a/src/avx512fp16-16bit-qsort.hpp ++++ b/src/avx512fp16-16bit-qsort.hpp +@@ -145,7 +145,7 @@ replace_inf_with_nan(_Float16 *arr, int64_t arrsize, int64_t nan_count) + } + + template <> +-void avx512_qselect(_Float16 *arr, int64_t k, int64_t arrsize) ++inline void avx512_qselect(_Float16 *arr, int64_t k, int64_t arrsize) + { + if (arrsize > 1) { + int64_t nan_count = replace_nan_with_inf(arr, arrsize); +@@ -156,7 +156,7 @@ void avx512_qselect(_Float16 *arr, int64_t k, int64_t arrsize) + } + + template <> +-void avx512_qsort(_Float16 *arr, int64_t arrsize) ++inline void avx512_qsort(_Float16 *arr, int64_t arrsize) + { + if (arrsize > 1) { + int64_t nan_count = replace_nan_with_inf(arr, arrsize); diff --git a/easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-test_features.patch b/easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-test_features.patch new file mode 100644 index 00000000000..e96193bd917 --- /dev/null +++ b/easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-test_features.patch @@ -0,0 +1,17 @@ +The /proc/cpuinfo flag for AVX512FP16 is spelled avx512_fp16 Add the underscore to the mapping to make the test pass +See https://github.com/numpy/numpy/pull/25372 + +Author: Alexander Grund (TU Dresden) + +diff --git a/numpy/core/tests/test_cpu_features.py b/numpy/core/tests/test_cpu_features.py +index 2fad4dfd9..48ab30a4a 100644 +--- a/numpy/core/tests/test_cpu_features.py ++++ b/numpy/core/tests/test_cpu_features.py +@@ -351,6 +351,7 @@ class Test_X86_Features(AbstractTest): + SSE3="PNI", SSE41="SSE4_1", SSE42="SSE4_2", FMA3="FMA", + AVX512VNNI="AVX512_VNNI", AVX512BITALG="AVX512_BITALG", AVX512VBMI2="AVX512_VBMI2", + AVX5124FMAPS="AVX512_4FMAPS", AVX5124VNNIW="AVX512_4VNNIW", AVX512VPOPCNTDQ="AVX512_VPOPCNTDQ", ++ AVX512FP16="AVX512_FP16", + ) + def load_flags(self): + self.load_flags_cpuinfo("flags") diff --git a/easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-test_half.patch b/easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-test_half.patch new file mode 100644 index 00000000000..9489373e30b --- /dev/null +++ b/easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-test_half.patch @@ -0,0 +1,69 @@ +test_half_conversions fails when there is hardware support for Float16 (e.g. AVX512) as NaNs are not kept the same. +Error looks like: + # Convert from float64 back to float16 + b = np.array(self.all_f64, dtype=float16) + bv = b.view(dtype=uint16) + fv = self.all_f16.view(dtype=uint16) +> assert_equal(self.all_f16.view(dtype=uint16), + b.view(dtype=uint16)) +... +E AssertionError: +E Arrays are not equal +E +E Mismatched elements: 1022 / 65536 (1.56%) +E Max absolute difference: 512 +E Max relative difference: 0.01587252 +E x: array([ 0, 1, 2, ..., 65533, 65534, 65535], dtype=uint16) +E y: array([ 0, 1, 2, ..., 65533, 65534, 65535], dtype=uint16) + +Deeper investigation shows a difference of exactly 512 in indices 31745-32255 & 64513-65023 + +Fix using https://github.com/numpy/numpy/commit/7a84442b1caa4904a9b8e58bd6b93045b4ad350f +from Sayed Adel + +Author: Alexander Grund (TU Dresden) + +diff --git a/numpy/core/tests/test_half.py b/numpy/core/tests/test_half.py +index ca849ad52..3e72eba89 100644 +--- a/numpy/core/tests/test_half.py ++++ b/numpy/core/tests/test_half.py +@@ -21,8 +21,11 @@ def setup_method(self): + # An array of all possible float16 values + self.all_f16 = np.arange(0x10000, dtype=uint16) + self.all_f16.dtype = float16 +- self.all_f32 = np.array(self.all_f16, dtype=float32) +- self.all_f64 = np.array(self.all_f16, dtype=float64) ++ ++ # NaN value can cause an invalid FP exception if HW is been used ++ with np.errstate(invalid='ignore'): ++ self.all_f32 = np.array(self.all_f16, dtype=float32) ++ self.all_f64 = np.array(self.all_f16, dtype=float64) + + # An array of all non-NaN float16 values, in sorted order + self.nonan_f16 = np.concatenate( +@@ -44,14 +47,19 @@ def test_half_conversions(self): + # value is preserved when converting to/from other floats. + + # Convert from float32 back to float16 +- b = np.array(self.all_f32, dtype=float16) +- assert_equal(self.all_f16.view(dtype=uint16), +- b.view(dtype=uint16)) ++ with np.errstate(invalid='ignore'): ++ b = np.array(self.all_f32, dtype=float16) ++ # avoid testing NaNs due to differ bits wither Q/SNaNs ++ b_nn = b == b ++ assert_equal(self.all_f16[b_nn].view(dtype=uint16), ++ b[b_nn].view(dtype=uint16)) + + # Convert from float64 back to float16 +- b = np.array(self.all_f64, dtype=float16) +- assert_equal(self.all_f16.view(dtype=uint16), +- b.view(dtype=uint16)) ++ with np.errstate(invalid='ignore'): ++ b = np.array(self.all_f64, dtype=float16) ++ b_nn = b == b ++ assert_equal(self.all_f16[b_nn].view(dtype=uint16), ++ b[b_nn].view(dtype=uint16)) + + # Convert float16 to longdouble and back + # This doesn't necessarily preserve the extra NaN bits, diff --git a/easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-undefined-avx512-reference.patch b/easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-undefined-avx512-reference.patch new file mode 100644 index 00000000000..a5da1028b01 --- /dev/null +++ b/easybuild/easyconfigs/s/SciPy-bundle/numpy-1.25.1_fix-undefined-avx512-reference.patch @@ -0,0 +1,20 @@ +The change to the x86-simd-sort submodule now causes an undefined reference +to _ZN12_GLOBAL__N_112avx512_qsortItEEvPT_l on `import numpy` +Reason is that `avx512_qsort` is used but defined in a header not included +by simd_qsort_16bit.dispatch.cpp when the "AVX512_SPR" version is built. +Fix by including the header. + +Author: Alexander Grund (TU Dresden) + +diff --git a/numpy/core/src/npysort/simd_qsort_16bit.dispatch.cpp b/numpy/core/src/npysort/simd_qsort_16bit.dispatch.cpp +index 3f5099758..a75f882ff 100644 +--- a/numpy/core/src/npysort/simd_qsort_16bit.dispatch.cpp ++++ b/numpy/core/src/npysort/simd_qsort_16bit.dispatch.cpp +@@ -9,6 +9,7 @@ + + #if defined(NPY_HAVE_AVX512_SPR) && !defined(_MSC_VER) + #include "x86-simd-sort/src/avx512fp16-16bit-qsort.hpp" ++ #include "x86-simd-sort/src/avx512-16bit-qsort.hpp" + #elif defined(NPY_HAVE_AVX512_ICL) && !defined(_MSC_VER) + #include "x86-simd-sort/src/avx512-16bit-qsort.hpp" + #endif