diff --git a/src/cxon/lang/common/cio/char.hxx b/src/cxon/lang/common/cio/char.hxx index d1673ccc..bad39de9 100644 --- a/src/cxon/lang/common/cio/char.hxx +++ b/src/cxon/lang/common/cio/char.hxx @@ -451,9 +451,11 @@ namespace cxon { namespace cio { namespace chr { default : return poke(o, c, cx); } } + template + static constexpr bool use_simd_() { return CXON_USE_SIMD && !Y::produce_strict_javascript; } template static auto range(O& o, const char* f, const char* l, Cx& cx) - -> enable_if_t<( is_unquoted_key_context::value || !Y::assume_no_escapes) && (!CXON_USE_SIMD || Y::produce_strict_javascript), bool> + -> enable_if_t<( is_unquoted_key_context::value || !Y::assume_no_escapes) && !use_simd_(), bool> { char const* a = f; for ( ; f != l; ++f) { @@ -492,50 +494,54 @@ namespace cxon { namespace cio { namespace chr { } template static auto range(O& o, const char* f, const char* l, Cx& cx) - -> enable_if_t<( is_unquoted_key_context::value || !Y::assume_no_escapes) && ( CXON_USE_SIMD && !Y::produce_strict_javascript), bool> + -> enable_if_t<( is_unquoted_key_context::value || !Y::assume_no_escapes) && use_simd_(), bool> { - char const *a = f; - - __m128i const v1 = _mm_set1_epi8('"'); - __m128i const v2 = _mm_set1_epi8('\\'); - __m128i const v3 = _mm_set1_epi8(0x1F); - std::size_t n = l - f; - while (n >= 16) { - __m128i r0 = _mm_loadu_si128((__m128i const*)f); - __m128i r1 = _mm_cmpeq_epi8(r0, v1); - __m128i r2 = _mm_cmpeq_epi8(r0, v2); - __m128i r3 = _mm_cmpeq_epi8(_mm_min_epu8(r0, v3), r0); - __m128i r4 = _mm_or_si128(_mm_or_si128(r1, r2), r3); - if (int r = _mm_movemask_epi8(r4)) { - int e = ffs(r); - if (a != (f += e) && !poke(o, a, f, cx)) return false; - a = f; - for (auto* k = f + 16 - e; f != k; ++f, ++e) { - if ((1 << e) & r) { - if (a != f && !poke(o, a, f, cx)) return false; - if (!value(o, *f, cx)) return false; - a = f + 1; +# if CXON_USE_SIMD + char const *a = f; + + __m128i const v1 = _mm_set1_epi8('"'); + __m128i const v2 = _mm_set1_epi8('\\'); + __m128i const v3 = _mm_set1_epi8(0x1F); + std::size_t n = l - f; + while (n >= 16) { + __m128i r0 = _mm_loadu_si128((__m128i const*)f); + __m128i r1 = _mm_cmpeq_epi8(r0, v1); + __m128i r2 = _mm_cmpeq_epi8(r0, v2); + __m128i r3 = _mm_cmpeq_epi8(_mm_min_epu8(r0, v3), r0); + __m128i r4 = _mm_or_si128(_mm_or_si128(r1, r2), r3); + if (int r = _mm_movemask_epi8(r4)) { + int e = ffs(r); + if (a != (f += e) && !poke(o, a, f, cx)) return false; + a = f; + for (auto* k = f + 16 - e; f != k; ++f, ++e) { + if ((1 << e) & r) { + if (a != f && !poke(o, a, f, cx)) return false; + if (!value(o, *f, cx)) return false; + a = f + 1; + } } + if (a != f && !poke(o, a, f, cx)) return false; + a = f, f -= 16; } - if (a != f && !poke(o, a, f, cx)) return false; - a = f, f -= 16; + f += 16, n -= 16; + } + if (a != f) { + if (!poke(o, a, f, cx)) return false; + a = f; } - f += 16, n -= 16; - } - if (a != f) { - if (!poke(o, a, f, cx)) return false; - a = f; - } - for ( ; f != l; ++f) { - if (should_escape_(*f)) { - if (a != f && !poke(o, a, f, cx)) return false; - if (!value(o, *f, cx)) return false; - a = f + 1; + for ( ; f != l; ++f) { + if (should_escape_(*f)) { + if (a != f && !poke(o, a, f, cx)) return false; + if (!value(o, *f, cx)) return false; + a = f + 1; + } } - } - return a == f || poke(o, a, f, cx); + return a == f || poke(o, a, f, cx); +# else + return cx/X::read_error::unexpected; +# endif } template static auto range(O& o, const char* f, const char* l, Cx& cx)