Skip to content

Commit

Permalink
Merge branch 'cxx98' into libsimdpp-cxx98-2.x
Browse files Browse the repository at this point in the history
  • Loading branch information
p12tic committed Apr 3, 2016
2 parents 28cb71b + e81bc0e commit db31917
Show file tree
Hide file tree
Showing 19 changed files with 171 additions and 39 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ intrinsics and even higher-level SIMD libraries. As much control as possible
is given to the developer, so that it's possible to exactly predict what code
the compiler will generate.

No API-breaking changes are planned for the foreseeable future.

Compiler and instruction set support
------------------------------------

Expand Down Expand Up @@ -54,7 +56,9 @@ version of the library is provided on the
- ICC: 2013, 2015

Clang 3.3 is not supported on ARM. MSVC and ICC are only supported on x86 and
x86-64.
x86-64. Any compiler bugs are either worked-around or support for broken
instruction set on the particular compiler version is disabled -- the bugs are
not exposed under any circumstances.

Newer versions of the aforementioned compilers will generally work with either
C++11 or C++98 version of the library. Older versions of these compilers will
Expand All @@ -64,7 +68,7 @@ Documentation
-------------

Online documentation is provided
[here](http://p12tic.github.io/libsimdpp/v2.0~rc1/libsimdpp/).
[here](http://p12tic.github.io/libsimdpp/v2.0~rc2/libsimdpp/).

License
-------
Expand Down
8 changes: 8 additions & 0 deletions cmake/SimdppMultiarch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ set(SIMDPP_X86_AVX_DEFINE "SIMDPP_ARCH_X86_AVX")
set(SIMDPP_X86_AVX_SUFFIX "-x86_avx")
set(SIMDPP_X86_AVX_TEST_CODE
"#include <immintrin.h>
#if (__clang_major__ == 3) && (__clang_minor__ == 6)
#error Not supported. See simdpp/detail/workarounds.h
#endif
int main()
{
union {
Expand All @@ -169,6 +173,10 @@ set(SIMDPP_X86_AVX2_DEFINE "SIMDPP_ARCH_X86_AVX2")
set(SIMDPP_X86_AVX2_SUFFIX "-x86_avx2")
set(SIMDPP_X86_AVX2_TEST_CODE
"#include <immintrin.h>
#if (__clang_major__ == 3) && (__clang_minor__ == 6)
#error Not supported. See simdpp/detail/workarounds.h
#endif
int main()
{
union {
Expand Down
11 changes: 11 additions & 0 deletions examples/dynamic_dispatch/test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,18 @@
#include <simdpp/simd.h>
#include <iostream>
#include <simdpp/dispatch/get_arch_gcc_builtin_cpu_supports.h>
#include <simdpp/dispatch/get_arch_raw_cpuid.h>
#include <simdpp/dispatch/get_arch_linux_cpuinfo.h>

#if SIMDPP_HAS_GET_ARCH_RAW_CPUID
#define SIMDPP_USER_ARCH_INFO ::simdpp::get_arch_raw_cpuid()
#elif SIMDPP_HAS_GET_ARCH_GCC_BUILTIN_CPU_SUPPORTS
#define SIMDPP_USER_ARCH_INFO ::simdpp::get_arch_gcc_builtin_cpu_supports()
#elif SIMDPP_HAS_GET_ARCH_LINUX_CPUINFO
#define SIMDPP_USER_ARCH_INFO ::simdpp::get_arch_linux_cpuinfo()
#else
#error "Unsupported platform"
#endif

namespace SIMDPP_ARCH_NAMESPACE {

Expand Down
21 changes: 21 additions & 0 deletions simdpp/detail/insn/i_shift_l.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,14 @@ SIMDPP_INL uint16x8 i_shift_l(const uint16x8& a, unsigned count)
#if SIMDPP_USE_AVX2
SIMDPP_INL uint16x16 i_shift_l(const uint16x16& a, unsigned count)
{
#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
__m256i r = a;
__m128i x = _mm_cvtsi32_si128(count);
__asm("vpsllw %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
return r;
#else
return _mm256_sll_epi16(a, _mm_cvtsi32_si128(count));
#endif
}
#endif

Expand All @@ -108,7 +115,14 @@ SIMDPP_INL uint32x4 i_shift_l(const uint32x4& a, unsigned count)
#if SIMDPP_USE_AVX2
SIMDPP_INL uint32x8 i_shift_l(const uint32x8& a, unsigned count)
{
#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
__m256i r = a;
__m128i x = _mm_cvtsi32_si128(count);
__asm("vpslld %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
return r;
#else
return _mm256_sll_epi32(a, _mm_cvtsi32_si128(count));
#endif
}
#endif

Expand Down Expand Up @@ -136,7 +150,14 @@ SIMDPP_INL uint64x2 i_shift_l(const uint64x2& a, unsigned count)
#if SIMDPP_USE_AVX2
SIMDPP_INL uint64x4 i_shift_l(const uint64x4& a, unsigned count)
{
#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
__m256i r = a;
__m128i x = _mm_cvtsi32_si128(count);
__asm("vpsllq %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
return r;
#else
return _mm256_sll_epi64(a, _mm_cvtsi32_si128(count));
#endif
}
#endif

Expand Down
35 changes: 35 additions & 0 deletions simdpp/detail/insn/i_shift_r.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,14 @@ SIMDPP_INL int16x8 i_shift_r(const int16x8& a, unsigned count)
#if SIMDPP_USE_AVX2
SIMDPP_INL int16x16 i_shift_r(const int16x16& a, unsigned count)
{
#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
__m256i r = a;
__m128i x = _mm_cvtsi32_si128(count);
__asm("vpsraw %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
return r;
#else
return _mm256_sra_epi16(a, _mm_cvtsi32_si128(count));
#endif
}
#endif

Expand All @@ -154,7 +161,14 @@ SIMDPP_INL uint16x8 i_shift_r(const uint16x8& a, unsigned count)
#if SIMDPP_USE_AVX2
SIMDPP_INL uint16x16 i_shift_r(const uint16x16& a, unsigned count)
{
#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
__m256i r = a;
__m128i x = _mm_cvtsi32_si128(count);
__asm("vpsrlw %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
return r;
#else
return _mm256_srl_epi16(a, _mm_cvtsi32_si128(count));
#endif
}
#endif

Expand All @@ -178,7 +192,14 @@ SIMDPP_INL int32x4 i_shift_r(const int32x4& a, unsigned count)
#if SIMDPP_USE_AVX2
SIMDPP_INL int32x8 i_shift_r(const int32x8& a, unsigned count)
{
#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
__m256i r = a;
__m128i x = _mm_cvtsi32_si128(count);
__asm("vpsrad %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
return r;
#else
return _mm256_sra_epi32(a, _mm_cvtsi32_si128(count));
#endif
}
#endif

Expand Down Expand Up @@ -209,7 +230,14 @@ SIMDPP_INL uint32x4 i_shift_r(const uint32x4& a, unsigned count)
#if SIMDPP_USE_AVX2
SIMDPP_INL uint32x8 i_shift_r(const uint32x8& a, unsigned count)
{
#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
__m256i r = a;
__m128i x = _mm_cvtsi32_si128(count);
__asm("vpsrld %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
return r;
#else
return _mm256_srl_epi32(a, _mm_cvtsi32_si128(count));
#endif
}
#endif

Expand Down Expand Up @@ -307,7 +335,14 @@ SIMDPP_INL uint64x2 i_shift_r(const uint64x2& a, unsigned count)
#if SIMDPP_USE_AVX2
SIMDPP_INL uint64x4 i_shift_r(const uint64x4& a, unsigned count)
{
#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
__m256i r = a;
__m128i x = _mm_cvtsi32_si128(count);
__asm("vpsrlq %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
return r;
#else
return _mm256_srl_epi64(a, _mm_cvtsi32_si128(count));
#endif
}
#endif

Expand Down
2 changes: 1 addition & 1 deletion simdpp/detail/insn/load.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ void i_load(V& a, const char* p)
}
}

template<class V>
template<class V> SIMDPP_INL
V i_load_any(const char* p)
{
typename detail::remove_sign<V>::type r;
Expand Down
13 changes: 13 additions & 0 deletions simdpp/detail/preprocess_single_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,42 +197,55 @@

#ifdef SIMDPP_ARCH_X86_SSE2
#define SIMDPP_ARCH_PP_USE_SSE2 1
#undef SIMDPP_ARCH_X86_SSE2
#endif
#ifdef SIMDPP_ARCH_X86_SSE3
#define SIMDPP_ARCH_PP_USE_SSE3 1
#undef SIMDPP_ARCH_X86_SSE3
#endif
#ifdef SIMDPP_ARCH_X86_SSSE3
#define SIMDPP_ARCH_PP_USE_SSSE3 1
#undef SIMDPP_ARCH_X86_SSSE3
#endif
#ifdef SIMDPP_ARCH_X86_SSE4_1
#define SIMDPP_ARCH_PP_USE_SSE4_1 1
#undef SIMDPP_ARCH_X86_SSE4_1
#endif
#ifdef SIMDPP_ARCH_X86_AVX
#define SIMDPP_ARCH_PP_USE_AVX 1
#undef SIMDPP_ARCH_X86_AVX
#endif
#ifdef SIMDPP_ARCH_X86_AVX2
#define SIMDPP_ARCH_PP_USE_AVX2 1
#undef SIMDPP_ARCH_X86_AVX2
#endif
#ifdef SIMDPP_ARCH_X86_FMA3
#define SIMDPP_ARCH_PP_USE_FMA3 1
#undef SIMDPP_ARCH_X86_FMA3
#endif
#ifdef SIMDPP_ARCH_X86_FMA4
#define SIMDPP_ARCH_PP_USE_FMA4 1
#undef SIMDPP_ARCH_X86_FMA4
#endif
#ifdef SIMDPP_ARCH_X86_XOP
#define SIMDPP_ARCH_PP_USE_XOP 1
#undef SIMDPP_ARCH_X86_XOP
#endif
#ifdef SIMDPP_ARCH_X86_AVX512F
#define SIMDPP_ARCH_PP_USE_AVX512F 1
#undef SIMDPP_ARCH_X86_AVX512F
#endif
#ifdef SIMDPP_ARCH_ARM_NEON
#define SIMDPP_ARCH_PP_USE_NEON 1
#undef SIMDPP_ARCH_ARM_NEON
#endif
#ifdef SIMDPP_ARCH_ARM_NEON_FLT_SP
#define SIMDPP_ARCH_PP_USE_NEON_FLT_SP 1
#undef SIMDPP_ARCH_ARM_NEON_FLT_SP
#endif
#ifdef SIMDPP_ARCH_POWER_ALTIVEC
#define SIMDPP_ARCH_PP_USE_ALTIVEC 1
#undef SIMDPP_ARCH_POWER_ALTIVEC
#endif
#endif

Expand Down
25 changes: 25 additions & 0 deletions simdpp/detail/workarounds.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@

#if SIMDPP_USE_AVX512F
#if (__GNUC__ == 4) && !__INTEL_COMPILER
/* See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70059.
_mm512_inserti64x4(x, y, 0) and related intrinsics result in wrong code.
_mm512_castsi256_si512 is not available in GCC 4.9, thus there's no way
to convert between 256-bit and 512-bit vectors.
*/
#error "The first supported GCC version for AVX512F is 5.0"
#endif

Expand All @@ -45,6 +50,26 @@
#define SIMDPP_ATTRIBUTE_UNUSED
#endif

#if SIMDPP_USE_AVX || SIMDPP_USE_AVX2
#if (__clang_major__ == 3) && (__clang_minor__ == 6)
/* See https://llvm.org/bugs/show_bug.cgi?id=23441. Clang does not generate
correct floating-point code for basic 256-bit floating-point operations,
such as those resulting from _mm256_set_ps, _mm256_load_ps. Due to the
nature of affected operations, the bug is almost impossible to work around
reliably.
*/
#error AVX and AVX2 are not supported on clang 3.6 due to compiler bugs
#endif
#endif

#if (__clang_major__ == 3) && (__clang_minor <= 4)
#define SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS 1
/* Clang 3.4 and older may crash when the following intrinsics are used with
arguments that are known at compile time: _mm256_sll_epi{16,32,64},
_mm256_srl_epi{16,32,64}, _mm256_sra_epi{16,32}
*/
#endif

namespace simdpp {
namespace SIMDPP_ARCH_NAMESPACE {
namespace detail {
Expand Down
16 changes: 9 additions & 7 deletions simdpp/dispatch/get_arch_gcc_builtin_cpu_supports.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#ifndef LIBSIMDPP_DISPATCH_GET_ARCH_GCC_BUILTIN_CPU_SUPPORTS_H
#define LIBSIMDPP_DISPATCH_GET_ARCH_GCC_BUILTIN_CPU_SUPPORTS_H

#if ((__GNUC__ >= 4) && (__GNUC_MINOR__ >= 8)) && (__i386__ || __amd64__)
#define SIMDPP_HAS_GET_ARCH_GCC_BUILTIN_CPU_SUPPORTS 1

#include <simdpp/dispatch/arch.h>

namespace simdpp {
Expand All @@ -18,10 +21,8 @@ namespace simdpp {
inline Arch get_arch_gcc_builtin_cpu_supports()
{
Arch arch_info;
#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 8)
#if __i386__ || __amd64__
#define SIMDPP_GCC_BUILTIN_CPU_SUPPORTS_ENABLED 1
if (__builtin_cpu_supports("avx512f")) {
#if (__GNUC__ > 4)
if (__builtin_cpu_supports("avx512f")) { // since 5.0
arch_info |= Arch::X86_SSE2;
arch_info |= Arch::X86_SSE3;
arch_info |= Arch::X86_SSSE3;
Expand All @@ -30,7 +31,9 @@ inline Arch get_arch_gcc_builtin_cpu_supports()
arch_info |= Arch::X86_AVX2;
arch_info |= Arch::X86_FMA3;
arch_info |= Arch::X86_AVX512F;
} else if (__builtin_cpu_supports("avx2")) {
} else
#endif
if (__builtin_cpu_supports("avx2")) {
arch_info |= Arch::X86_SSE2;
arch_info |= Arch::X86_SSE3;
arch_info |= Arch::X86_SSSE3;
Expand Down Expand Up @@ -58,10 +61,9 @@ inline Arch get_arch_gcc_builtin_cpu_supports()
} else if (__builtin_cpu_supports("sse2")) {
arch_info |= Arch::X86_SSE2;
}
#endif
#endif
return arch_info;
}
} // namespace simdpp

#endif
#endif
8 changes: 6 additions & 2 deletions simdpp/dispatch/get_arch_linux_cpuinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#ifndef LIBSIMDPP_DISPATCH_GET_ARCH_LINUX_CPUINFO_H
#define LIBSIMDPP_DISPATCH_GET_ARCH_LINUX_CPUINFO_H

#if __linux__ && (__arm__ || __i386__ || __amd64__)
#define SIMDPP_HAS_GET_ARCH_LINUX_CPUINFO 1

#include <algorithm>
#include <iostream>
#include <sstream>
Expand All @@ -31,11 +34,11 @@ inline Arch get_arch_linux_cpuinfo()
std::map<std::string, Arch> features;
std::string ident;

#if defined(__arm__)
#if __arm__
ident = "Features\t";
features["neon"] = Arch::ARM_NEON | Arch::ARM_NEON_FLT_SP;

#elif defined(__i386__) || defined(__amd64__)
#elif __i386__ || __amd64__
Arch a_sse2 = Arch::X86_SSE2;
Arch a_sse3 = a_sse2 | Arch::X86_SSE3;
Arch a_ssse3 = a_sse3 | Arch::X86_SSSE3;
Expand Down Expand Up @@ -103,3 +106,4 @@ inline Arch get_arch_linux_cpuinfo()
} // namespace simdpp

#endif
#endif
8 changes: 7 additions & 1 deletion simdpp/dispatch/get_arch_raw_cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#ifndef LIBSIMDPP_DISPATCH_GET_ARCH_RAW_CPUID_H
#define LIBSIMDPP_DISPATCH_GET_ARCH_RAW_CPUID_H

#if (__i386__ || __amd64__) && (__clang__ || __GNUC__ || __INTEL_COMPILER || _MSC_VER)
#define SIMDPP_HAS_GET_ARCH_RAW_CPUID 1

#include <simdpp/dispatch/arch.h>
#include <stdint.h>

Expand All @@ -21,7 +24,7 @@ namespace detail {
inline void get_cpuid(unsigned level, unsigned subleaf, unsigned* eax, unsigned* ebx,
unsigned* ecx, unsigned* edx)
{
#if defined(__clang__) || defined (__INTEL_COMPILER)
#if __clang__ || __INTEL_COMPILER
// Older versions of clang don't support subleafs, which leads to inability
// to detect AVX2 for example. On ICC there's no proper cpuid intrinsic.
#if __i386__
Expand Down Expand Up @@ -121,6 +124,9 @@ inline Arch get_arch_raw_cpuid()

return arch_info;
}

} // namespace simdpp

#endif // #if (__i386__ || __amd64__) && ...

#endif
Loading

0 comments on commit db31917

Please sign in to comment.