diff --git a/README.md b/README.md
index a3202614..bd87ae0c 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,8 @@ intrinsics and even higher-level SIMD libraries. As much control as possible
 is given to the developer, so that it's possible to exactly predict what code
 the compiler will generate.
 
+No API-breaking changes are planned for the foreseeable future.
+
 Compiler and instruction set support
 ------------------------------------
 
@@ -54,7 +56,9 @@ version of the library is provided on the
    - ICC: 2013, 2015
 
 Clang 3.3 is not supported on ARM. MSVC and ICC are only supported on x86 and
-x86-64.
+x86-64. Any compiler bugs are either worked-around or support for broken
+instruction set on the particular compiler version is disabled -- the bugs are
+not exposed under any circumstances.
 
 Newer versions of the aforementioned compilers will generally work with either
 C++11 or C++98 version of the library. Older versions of these compilers will
@@ -64,7 +68,7 @@ Documentation
 -------------
 
 Online documentation is provided
-[here](http://p12tic.github.io/libsimdpp/v2.0~rc1/libsimdpp/).
+[here](http://p12tic.github.io/libsimdpp/v2.0~rc2/libsimdpp/).
 
 License
 -------
diff --git a/cmake/SimdppMultiarch.cmake b/cmake/SimdppMultiarch.cmake
index d6614d7f..ad725643 100644
--- a/cmake/SimdppMultiarch.cmake
+++ b/cmake/SimdppMultiarch.cmake
@@ -145,6 +145,10 @@ set(SIMDPP_X86_AVX_DEFINE "SIMDPP_ARCH_X86_AVX")
 set(SIMDPP_X86_AVX_SUFFIX "-x86_avx")
 set(SIMDPP_X86_AVX_TEST_CODE
     "#include <immintrin.h>
+    #if (__clang_major__ == 3) && (__clang_minor__ == 6)
+    #error Not supported. See simdpp/detail/workarounds.h
+    #endif
+
     int main()
     {
         union {
@@ -169,6 +173,10 @@ set(SIMDPP_X86_AVX2_DEFINE "SIMDPP_ARCH_X86_AVX2")
 set(SIMDPP_X86_AVX2_SUFFIX "-x86_avx2")
 set(SIMDPP_X86_AVX2_TEST_CODE
     "#include <immintrin.h>
+    #if (__clang_major__ == 3) && (__clang_minor__ == 6)
+    #error Not supported. See simdpp/detail/workarounds.h
+    #endif
+
     int main()
     {
         union {
diff --git a/examples/dynamic_dispatch/test.cc b/examples/dynamic_dispatch/test.cc
index 009a63d7..efe60ae6 100644
--- a/examples/dynamic_dispatch/test.cc
+++ b/examples/dynamic_dispatch/test.cc
@@ -2,7 +2,18 @@
 #include <simdpp/simd.h>
 #include <iostream>
 #include <simdpp/dispatch/get_arch_gcc_builtin_cpu_supports.h>
+#include <simdpp/dispatch/get_arch_raw_cpuid.h>
+#include <simdpp/dispatch/get_arch_linux_cpuinfo.h>
+
+#if SIMDPP_HAS_GET_ARCH_RAW_CPUID
+#define SIMDPP_USER_ARCH_INFO ::simdpp::get_arch_raw_cpuid()
+#elif SIMDPP_HAS_GET_ARCH_GCC_BUILTIN_CPU_SUPPORTS
 #define SIMDPP_USER_ARCH_INFO ::simdpp::get_arch_gcc_builtin_cpu_supports()
+#elif SIMDPP_HAS_GET_ARCH_LINUX_CPUINFO
+#define SIMDPP_USER_ARCH_INFO ::simdpp::get_arch_linux_cpuinfo()
+#else
+#error "Unsupported platform"
+#endif
 
 namespace SIMDPP_ARCH_NAMESPACE {
 
diff --git a/simdpp/detail/insn/i_shift_l.h b/simdpp/detail/insn/i_shift_l.h
index 22b790cc..51f08f56 100644
--- a/simdpp/detail/insn/i_shift_l.h
+++ b/simdpp/detail/insn/i_shift_l.h
@@ -84,7 +84,14 @@ SIMDPP_INL uint16x8 i_shift_l(const uint16x8& a, unsigned count)
 #if SIMDPP_USE_AVX2
 SIMDPP_INL uint16x16 i_shift_l(const uint16x16& a, unsigned count)
 {
+#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
+    __m256i r = a;
+    __m128i x = _mm_cvtsi32_si128(count);
+    __asm("vpsllw	%1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
+    return r;
+#else
     return _mm256_sll_epi16(a, _mm_cvtsi32_si128(count));
+#endif
 }
 #endif
 
@@ -108,7 +115,14 @@ SIMDPP_INL uint32x4 i_shift_l(const uint32x4& a, unsigned count)
 #if SIMDPP_USE_AVX2
 SIMDPP_INL uint32x8 i_shift_l(const uint32x8& a, unsigned count)
 {
+#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
+    __m256i r = a;
+    __m128i x = _mm_cvtsi32_si128(count);
+    __asm("vpslld	%1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
+    return r;
+#else
     return _mm256_sll_epi32(a, _mm_cvtsi32_si128(count));
+#endif
 }
 #endif
 
@@ -136,7 +150,14 @@ SIMDPP_INL uint64x2 i_shift_l(const uint64x2& a, unsigned count)
 #if SIMDPP_USE_AVX2
 SIMDPP_INL uint64x4 i_shift_l(const uint64x4& a, unsigned count)
 {
+#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
+    __m256i r = a;
+    __m128i x = _mm_cvtsi32_si128(count);
+    __asm("vpsllq	%1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
+    return r;
+#else
     return _mm256_sll_epi64(a, _mm_cvtsi32_si128(count));
+#endif
 }
 #endif
 
diff --git a/simdpp/detail/insn/i_shift_r.h b/simdpp/detail/insn/i_shift_r.h
index 15345ba4..dc591b7a 100644
--- a/simdpp/detail/insn/i_shift_r.h
+++ b/simdpp/detail/insn/i_shift_r.h
@@ -130,7 +130,14 @@ SIMDPP_INL int16x8 i_shift_r(const int16x8& a, unsigned count)
 #if SIMDPP_USE_AVX2
 SIMDPP_INL int16x16 i_shift_r(const int16x16& a, unsigned count)
 {
+#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
+    __m256i r = a;
+    __m128i x = _mm_cvtsi32_si128(count);
+    __asm("vpsraw	%1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
+    return r;
+#else
     return _mm256_sra_epi16(a, _mm_cvtsi32_si128(count));
+#endif
 }
 #endif
 
@@ -154,7 +161,14 @@ SIMDPP_INL uint16x8 i_shift_r(const uint16x8& a, unsigned count)
 #if SIMDPP_USE_AVX2
 SIMDPP_INL uint16x16 i_shift_r(const uint16x16& a, unsigned count)
 {
+#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
+    __m256i r = a;
+    __m128i x = _mm_cvtsi32_si128(count);
+    __asm("vpsrlw	%1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
+    return r;
+#else
     return _mm256_srl_epi16(a, _mm_cvtsi32_si128(count));
+#endif
 }
 #endif
 
@@ -178,7 +192,14 @@ SIMDPP_INL int32x4 i_shift_r(const int32x4& a, unsigned count)
 #if SIMDPP_USE_AVX2
 SIMDPP_INL int32x8 i_shift_r(const int32x8& a, unsigned count)
 {
+#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
+    __m256i r = a;
+    __m128i x = _mm_cvtsi32_si128(count);
+    __asm("vpsrad	%1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
+    return r;
+#else
     return _mm256_sra_epi32(a, _mm_cvtsi32_si128(count));
+#endif
 }
 #endif
 
@@ -209,7 +230,14 @@ SIMDPP_INL uint32x4 i_shift_r(const uint32x4& a, unsigned count)
 #if SIMDPP_USE_AVX2
 SIMDPP_INL uint32x8 i_shift_r(const uint32x8& a, unsigned count)
 {
+#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
+    __m256i r = a;
+    __m128i x = _mm_cvtsi32_si128(count);
+    __asm("vpsrld	%1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
+    return r;
+#else
     return _mm256_srl_epi32(a, _mm_cvtsi32_si128(count));
+#endif
 }
 #endif
 
@@ -307,7 +335,14 @@ SIMDPP_INL uint64x2 i_shift_r(const uint64x2& a, unsigned count)
 #if SIMDPP_USE_AVX2
 SIMDPP_INL uint64x4 i_shift_r(const uint64x4& a, unsigned count)
 {
+#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
+    __m256i r = a;
+    __m128i x = _mm_cvtsi32_si128(count);
+    __asm("vpsrlq	%1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
+    return r;
+#else
     return _mm256_srl_epi64(a, _mm_cvtsi32_si128(count));
+#endif
 }
 #endif
 
diff --git a/simdpp/detail/insn/load.h b/simdpp/detail/insn/load.h
index 8a3e5812..b0ef80dc 100644
--- a/simdpp/detail/insn/load.h
+++ b/simdpp/detail/insn/load.h
@@ -139,7 +139,7 @@ void i_load(V& a, const char* p)
     }
 }
 
-template<class V>
+template<class V> SIMDPP_INL
 V i_load_any(const char* p)
 {
     typename detail::remove_sign<V>::type r;
diff --git a/simdpp/detail/preprocess_single_arch.h b/simdpp/detail/preprocess_single_arch.h
index 41cb30ea..a7c124c0 100644
--- a/simdpp/detail/preprocess_single_arch.h
+++ b/simdpp/detail/preprocess_single_arch.h
@@ -197,42 +197,55 @@
 
     #ifdef SIMDPP_ARCH_X86_SSE2
         #define SIMDPP_ARCH_PP_USE_SSE2 1
+        #undef SIMDPP_ARCH_X86_SSE2
     #endif
     #ifdef SIMDPP_ARCH_X86_SSE3
         #define SIMDPP_ARCH_PP_USE_SSE3 1
+        #undef SIMDPP_ARCH_X86_SSE3
     #endif
     #ifdef SIMDPP_ARCH_X86_SSSE3
         #define SIMDPP_ARCH_PP_USE_SSSE3 1
+        #undef SIMDPP_ARCH_X86_SSSE3
     #endif
     #ifdef SIMDPP_ARCH_X86_SSE4_1
         #define SIMDPP_ARCH_PP_USE_SSE4_1 1
+        #undef SIMDPP_ARCH_X86_SSE4_1
     #endif
     #ifdef SIMDPP_ARCH_X86_AVX
         #define SIMDPP_ARCH_PP_USE_AVX 1
+        #undef SIMDPP_ARCH_X86_AVX
     #endif
     #ifdef SIMDPP_ARCH_X86_AVX2
         #define SIMDPP_ARCH_PP_USE_AVX2 1
+        #undef SIMDPP_ARCH_X86_AVX2
     #endif
     #ifdef SIMDPP_ARCH_X86_FMA3
         #define SIMDPP_ARCH_PP_USE_FMA3 1
+        #undef SIMDPP_ARCH_X86_FMA3
     #endif
     #ifdef SIMDPP_ARCH_X86_FMA4
         #define SIMDPP_ARCH_PP_USE_FMA4 1
+        #undef SIMDPP_ARCH_X86_FMA4
     #endif
     #ifdef SIMDPP_ARCH_X86_XOP
         #define SIMDPP_ARCH_PP_USE_XOP 1
+        #undef SIMDPP_ARCH_X86_XOP
     #endif
     #ifdef SIMDPP_ARCH_X86_AVX512F
         #define SIMDPP_ARCH_PP_USE_AVX512F 1
+        #undef SIMDPP_ARCH_X86_AVX512F
     #endif
     #ifdef SIMDPP_ARCH_ARM_NEON
         #define SIMDPP_ARCH_PP_USE_NEON 1
+        #undef SIMDPP_ARCH_ARM_NEON
     #endif
     #ifdef SIMDPP_ARCH_ARM_NEON_FLT_SP
         #define SIMDPP_ARCH_PP_USE_NEON_FLT_SP 1
+        #undef SIMDPP_ARCH_ARM_NEON_FLT_SP
     #endif
     #ifdef SIMDPP_ARCH_POWER_ALTIVEC
         #define SIMDPP_ARCH_PP_USE_ALTIVEC 1
+        #undef SIMDPP_ARCH_POWER_ALTIVEC
     #endif
 #endif
 
diff --git a/simdpp/detail/workarounds.h b/simdpp/detail/workarounds.h
index ac8c342d..9c89ee87 100644
--- a/simdpp/detail/workarounds.h
+++ b/simdpp/detail/workarounds.h
@@ -29,6 +29,11 @@
 
 #if SIMDPP_USE_AVX512F
 #if (__GNUC__ == 4) && !__INTEL_COMPILER
+/*  See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70059.
+    _mm512_inserti64x4(x, y, 0) and related intrinsics result in wrong code.
+    _mm512_castsi256_si512 is not available in GCC 4.9, thus there's no way
+    to convert between 256-bit and 512-bit vectors.
+*/
 #error "The first supported GCC version for AVX512F is 5.0"
 #endif
 
@@ -45,6 +50,26 @@
 #define SIMDPP_ATTRIBUTE_UNUSED
 #endif
 
+#if SIMDPP_USE_AVX || SIMDPP_USE_AVX2
+#if (__clang_major__ == 3) && (__clang_minor__ == 6)
+/*  See https://llvm.org/bugs/show_bug.cgi?id=23441. Clang does not generate
+    correct floating-point code for basic 256-bit floating-point operations,
+    such as those resulting from _mm256_set_ps, _mm256_load_ps. Due to the
+    nature of affected operations, the bug is almost impossible to work around
+    reliably.
+*/
+#error AVX and AVX2 are not supported on clang 3.6 due to compiler bugs
+#endif
+#endif
+
+#if (__clang_major__ == 3) && (__clang_minor <= 4)
+#define SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS 1
+/*  Clang 3.4 and older may crash when the following intrinsics are used with
+    arguments that are known at compile time: _mm256_sll_epi{16,32,64},
+    _mm256_srl_epi{16,32,64}, _mm256_sra_epi{16,32}
+*/
+#endif
+
 namespace simdpp {
 namespace SIMDPP_ARCH_NAMESPACE {
 namespace detail {
diff --git a/simdpp/dispatch/get_arch_gcc_builtin_cpu_supports.h b/simdpp/dispatch/get_arch_gcc_builtin_cpu_supports.h
index 7d03233b..f28c19dd 100644
--- a/simdpp/dispatch/get_arch_gcc_builtin_cpu_supports.h
+++ b/simdpp/dispatch/get_arch_gcc_builtin_cpu_supports.h
@@ -8,6 +8,9 @@
 #ifndef LIBSIMDPP_DISPATCH_GET_ARCH_GCC_BUILTIN_CPU_SUPPORTS_H
 #define LIBSIMDPP_DISPATCH_GET_ARCH_GCC_BUILTIN_CPU_SUPPORTS_H
 
+#if ((__GNUC__ >= 4) && (__GNUC_MINOR__ >= 8)) && (__i386__ || __amd64__)
+#define SIMDPP_HAS_GET_ARCH_GCC_BUILTIN_CPU_SUPPORTS 1
+
 #include <simdpp/dispatch/arch.h>
 
 namespace simdpp {
@@ -18,10 +21,8 @@ namespace simdpp {
 inline Arch get_arch_gcc_builtin_cpu_supports()
 {
     Arch arch_info;
-#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 8)
-#if __i386__ || __amd64__
-#define SIMDPP_GCC_BUILTIN_CPU_SUPPORTS_ENABLED 1
-    if (__builtin_cpu_supports("avx512f")) {
+#if (__GNUC__ > 4)
+    if (__builtin_cpu_supports("avx512f")) { // since 5.0
         arch_info |= Arch::X86_SSE2;
         arch_info |= Arch::X86_SSE3;
         arch_info |= Arch::X86_SSSE3;
@@ -30,7 +31,9 @@ inline Arch get_arch_gcc_builtin_cpu_supports()
         arch_info |= Arch::X86_AVX2;
         arch_info |= Arch::X86_FMA3;
         arch_info |= Arch::X86_AVX512F;
-    } else if (__builtin_cpu_supports("avx2")) {
+    } else
+#endif
+    if (__builtin_cpu_supports("avx2")) {
         arch_info |= Arch::X86_SSE2;
         arch_info |= Arch::X86_SSE3;
         arch_info |= Arch::X86_SSSE3;
@@ -58,10 +61,9 @@ inline Arch get_arch_gcc_builtin_cpu_supports()
     } else if (__builtin_cpu_supports("sse2")) {
         arch_info |= Arch::X86_SSE2;
     }
-#endif
-#endif
     return arch_info;
 }
 } // namespace simdpp
 
 #endif
+#endif
diff --git a/simdpp/dispatch/get_arch_linux_cpuinfo.h b/simdpp/dispatch/get_arch_linux_cpuinfo.h
index ed73fc45..503d9360 100644
--- a/simdpp/dispatch/get_arch_linux_cpuinfo.h
+++ b/simdpp/dispatch/get_arch_linux_cpuinfo.h
@@ -8,6 +8,9 @@
 #ifndef LIBSIMDPP_DISPATCH_GET_ARCH_LINUX_CPUINFO_H
 #define LIBSIMDPP_DISPATCH_GET_ARCH_LINUX_CPUINFO_H
 
+#if __linux__ && (__arm__ || __i386__ || __amd64__)
+#define SIMDPP_HAS_GET_ARCH_LINUX_CPUINFO 1
+
 #include <algorithm>
 #include <iostream>
 #include <sstream>
@@ -31,11 +34,11 @@ inline Arch get_arch_linux_cpuinfo()
     std::map<std::string, Arch> features;
     std::string ident;
 
-#if defined(__arm__)
+#if __arm__
     ident = "Features\t";
     features["neon"] = Arch::ARM_NEON | Arch::ARM_NEON_FLT_SP;
 
-#elif defined(__i386__) || defined(__amd64__)
+#elif __i386__ || __amd64__
     Arch a_sse2 = Arch::X86_SSE2;
     Arch a_sse3 = a_sse2 | Arch::X86_SSE3;
     Arch a_ssse3 = a_sse3 | Arch::X86_SSSE3;
@@ -103,3 +106,4 @@ inline Arch get_arch_linux_cpuinfo()
 } // namespace simdpp
 
 #endif
+#endif
diff --git a/simdpp/dispatch/get_arch_raw_cpuid.h b/simdpp/dispatch/get_arch_raw_cpuid.h
index 67084bb2..389bd587 100644
--- a/simdpp/dispatch/get_arch_raw_cpuid.h
+++ b/simdpp/dispatch/get_arch_raw_cpuid.h
@@ -8,6 +8,9 @@
 #ifndef LIBSIMDPP_DISPATCH_GET_ARCH_RAW_CPUID_H
 #define LIBSIMDPP_DISPATCH_GET_ARCH_RAW_CPUID_H
 
+#if (__i386__ || __amd64__) && (__clang__ || __GNUC__ || __INTEL_COMPILER || _MSC_VER)
+#define SIMDPP_HAS_GET_ARCH_RAW_CPUID 1
+
 #include <simdpp/dispatch/arch.h>
 #include <stdint.h>
 
@@ -21,7 +24,7 @@ namespace detail {
 inline void get_cpuid(unsigned level, unsigned subleaf, unsigned* eax, unsigned* ebx,
                       unsigned* ecx, unsigned* edx)
 {
-#if defined(__clang__) || defined (__INTEL_COMPILER)
+#if __clang__ || __INTEL_COMPILER
     // Older versions of clang don't support subleafs, which leads to inability
     // to detect AVX2 for example. On ICC there's no proper cpuid intrinsic.
 #if __i386__
@@ -121,6 +124,9 @@ inline Arch get_arch_raw_cpuid()
 
     return arch_info;
 }
+
 } // namespace simdpp
 
+#endif // #if (__i386__ || __amd64__) && ...
+
 #endif
diff --git a/simdpp/types/float32x8.h b/simdpp/types/float32x8.h
index f01692c1..611fd38a 100644
--- a/simdpp/types/float32x8.h
+++ b/simdpp/types/float32x8.h
@@ -55,8 +55,8 @@ class float32<8, void> : public any_float32<8, float32<8,void> > {
 
     /// @{
     /// Construct from the underlying vector type
-    float32<8>(const native_type& d) : d_(d) {}
-    float32<8>& operator=(const native_type& d) { d_ = d; return *this; }
+    SIMDPP_INL float32<8>(const native_type& d) : d_(d) {}
+    SIMDPP_INL float32<8>& operator=(const native_type& d) { d_ = d; return *this; }
     /// @}
 
     /// Convert to the underlying vector type
diff --git a/simdpp/types/float64x4.h b/simdpp/types/float64x4.h
index 49617948..330c3cd5 100644
--- a/simdpp/types/float64x4.h
+++ b/simdpp/types/float64x4.h
@@ -57,8 +57,8 @@ class float64<4, void> : public any_float64<4, float64<4,void> > {
 
     /// @{
     /// Construct from the underlying vector type
-    float64<4>(const native_type& d) : d_(d) {}
-    float64<4>& operator=(const native_type& d) { d_ = d; return *this; }
+    SIMDPP_INL float64<4>(const native_type& d) : d_(d) {}
+    SIMDPP_INL float64<4>& operator=(const native_type& d) { d_ = d; return *this; }
     /// @}
 
     /// Convert to the underlying vector type
diff --git a/simdpp/types/int16x16.h b/simdpp/types/int16x16.h
index 9615ecdf..35c51e3e 100644
--- a/simdpp/types/int16x16.h
+++ b/simdpp/types/int16x16.h
@@ -58,8 +58,8 @@ class int16<16, void> : public any_int16<16, int16<16,void> > {
 
     /// @{
     /// Construct from the underlying vector type
-    int16<16>(const native_type& d) : d_(d) {}
-    int16<16>& operator=(const native_type& d) { d_ = d; return *this; }
+    SIMDPP_INL int16<16>(const native_type& d) : d_(d) {}
+    SIMDPP_INL int16<16>& operator=(const native_type& d) { d_ = d; return *this; }
     /// @}
 
     /// Convert to the underlying vector type
@@ -117,8 +117,8 @@ class uint16<16, void> : public any_int16<16, uint16<16,void> > {
 
     /// @{
     /// Construct from the underlying vector type
-    uint16<16>(const native_type& d) : d_(d) {}
-    uint16<16>& operator=(const native_type& d) { d_ = d; return *this; }
+    SIMDPP_INL uint16<16>(const native_type& d) : d_(d) {}
+    SIMDPP_INL uint16<16>& operator=(const native_type& d) { d_ = d; return *this; }
     /// @}
 
     /// Convert to the underlying vector type
diff --git a/simdpp/types/int32x8.h b/simdpp/types/int32x8.h
index 9f6620e5..745befa4 100644
--- a/simdpp/types/int32x8.h
+++ b/simdpp/types/int32x8.h
@@ -57,8 +57,8 @@ class int32<8, void> : public any_int32<8, int32<8,void> > {
 
     /// @{
     /// Construct from the underlying vector type
-    int32<8>(const native_type& d) : d_(d) {}
-    int32<8>& operator=(const native_type& d) { d_ = d; return *this; }
+    SIMDPP_INL int32<8>(const native_type& d) : d_(d) {}
+    SIMDPP_INL int32<8>& operator=(const native_type& d) { d_ = d; return *this; }
     /// @}
 
     /// Convert to the underlying vector type
@@ -116,8 +116,8 @@ class uint32<8, void> : public any_int32<8, uint32<8,void> > {
 
     /// @{
     /// Construct from the underlying vector type
-    uint32<8>(const native_type& d) : d_(d) {}
-    uint32<8>& operator=(const native_type& d) { d_ = d; return *this; }
+    SIMDPP_INL uint32<8>(const native_type& d) : d_(d) {}
+    SIMDPP_INL uint32<8>& operator=(const native_type& d) { d_ = d; return *this; }
     /// @}
 
     /// Convert to the underlying vector type
diff --git a/simdpp/types/int64x4.h b/simdpp/types/int64x4.h
index 31d21e7e..44dd3237 100644
--- a/simdpp/types/int64x4.h
+++ b/simdpp/types/int64x4.h
@@ -57,8 +57,8 @@ class int64<4, void> : public any_int64<4, int64<4,void> > {
 
     /// @{
     /// Construct from the underlying vector type
-    int64<4>(const native_type& d) : d_(d) {}
-    int64<4>& operator=(const native_type& d) { d_ = d; return *this; }
+    SIMDPP_INL int64<4>(const native_type& d) : d_(d) {}
+    SIMDPP_INL int64<4>& operator=(const native_type& d) { d_ = d; return *this; }
     /// @}
 
     /// Convert to the underlying vector type
@@ -116,8 +116,8 @@ class uint64<4, void> : public any_int64<4, uint64<4,void> > {
 
     /// @{
     /// Construct from the underlying vector type
-    uint64<4>(const native_type& d) : d_(d) {}
-    uint64<4>& operator=(const native_type& d) { d_ = d; return *this; }
+    SIMDPP_INL uint64<4>(const native_type& d) : d_(d) {}
+    SIMDPP_INL uint64<4>& operator=(const native_type& d) { d_ = d; return *this; }
     /// @}
 
     /// Convert to the underlying vector type
diff --git a/simdpp/types/int8x32.h b/simdpp/types/int8x32.h
index 0072d6c5..41a1136b 100644
--- a/simdpp/types/int8x32.h
+++ b/simdpp/types/int8x32.h
@@ -57,8 +57,8 @@ class int8<32, void> : public any_int8<32, int8<32,void> > {
 
     /// @{
     /// Construct from the underlying vector type
-    int8<32>(const native_type& d) : d_(d) {}
-    int8<32>& operator=(const native_type& d) { d_ = d; return *this; }
+    SIMDPP_INL int8<32>(const native_type& d) : d_(d) {}
+    SIMDPP_INL int8<32>& operator=(const native_type& d) { d_ = d; return *this; }
     /// @}
 
     /// Convert to the underlying vector type
@@ -116,8 +116,8 @@ class uint8<32, void> : public any_int8<32, uint8<32,void> > {
 
     /// @{
     /// Construct from the underlying vector type
-    uint8<32>(const native_type& d) : d_(d) {}
-    uint8<32>& operator=(const native_type& d) { d_ = d; return *this; }
+    SIMDPP_INL uint8<32>(const native_type& d) : d_(d) {}
+    SIMDPP_INL uint8<32>& operator=(const native_type& d) { d_ = d; return *this; }
     /// @}
 
     /// Convert to the underlying vector type
diff --git a/test/main_dispatcher.cc b/test/main_dispatcher.cc
index 081154bc..87deea00 100644
--- a/test/main_dispatcher.cc
+++ b/test/main_dispatcher.cc
@@ -11,6 +11,13 @@
 #include <string>
 #include <cstdlib>
 
+// Check whether all available getters of supported architecture compiles
+// on all compilers
+#include <simdpp/dispatch/get_arch_gcc_builtin_cpu_supports.h>
+#include <simdpp/dispatch/get_arch_linux_cpuinfo.h>
+#include <simdpp/dispatch/get_arch_raw_cpuid.h>
+#include <simdpp/dispatch/get_arch_string_list.h>
+
 static simdpp::Arch g_supported_arch;
 
 simdpp::Arch get_supported_arch()
diff --git a/test/main_test1.cc b/test/main_test1.cc
index d841db4d..0da44da8 100644
--- a/test/main_test1.cc
+++ b/test/main_test1.cc
@@ -15,23 +15,19 @@
 #include <iomanip>
 #include <vector>
 
-#if __linux__
 #include <simdpp/dispatch/get_arch_linux_cpuinfo.h>
-#endif
-#if SIMDPP_X86
 #include <simdpp/dispatch/get_arch_raw_cpuid.h>
-#endif
 #include <simdpp/dispatch/get_arch_string_list.h>
 
 simdpp::Arch get_arch_from_system(bool is_simulator)
 {
     (void) is_simulator;
     std::vector<simdpp::Arch> supported_archs;
-#if __linux__
+#if SIMDPP_HAS_GET_ARCH_LINUX_CPUINFO
     if (!is_simulator)
         supported_archs.push_back(simdpp::get_arch_linux_cpuinfo());
 #endif
-#if SIMDPP_X86
+#if SIMDPP_HAS_GET_ARCH_RAW_CPUID
     supported_archs.push_back(simdpp::get_arch_raw_cpuid());
 #endif
     if (supported_archs.empty()) {