diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b7d1862b..71ae8659 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -138,11 +138,21 @@ jobs:
         run: cargo +stable install rustfilt
       - name: Check x86_64 inlining
         run: |
-          ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std.txt
-          ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std.txt "--features public_imp"
-          RUSTFLAGS="-C target-feature=+avx2" ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std-avx2.txt
           RUSTFLAGS="-C target-feature=+avx2" ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-nostd-avx2.txt --no-default-features
           RUSTFLAGS="-C target-feature=+sse4.2" ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-nostd-sse42.txt --no-default-features
+      - name: Check x86_64 inlining with avx2 autoselection
+        run: |
+          ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std-old.txt
+          ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std-old.txt "--features public_imp"
+          RUSTFLAGS="-C target-feature=+avx2" ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std-avx2.txt
+        if: ${{ matrix.toolchain == '1.38.0' }}
+      - name: Check x86_64 inlining with avx512 autoselection
+        run: |
+          ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std.txt
+          ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std.txt "--features public_imp"
+          RUSTFLAGS="-C target-feature=+avx512f,+avx512bw,+avx512vbmi,+avx512vbmi2" ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std-avx512.txt
+          RUSTFLAGS="-C target-feature=+avx512f,+avx512bw,+avx512vbmi,+avx512vbmi2" ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-nostd-avx512.txt --no-default-features
+        if: ${{ matrix.toolchain != '1.38.0' }}
       - uses: dtolnay/rust-toolchain@master
         with:
           toolchain: ${{ matrix.toolchain }}
diff --git a/Cargo.toml b/Cargo.toml
index f3ae8206..eccd5c1a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -53,3 +53,6 @@ targets = ["aarch64-unknown-linux-gnu", "wasm32-unknown-unknown", "wasm32-wasip1
 
 [dependencies]
 flexpect = "0.1.1"
+
+[build-dependencies]
+rustversion = "1.0.22"
diff --git a/README.md b/README.md
index bcdd9d59..ede39012 100644
--- a/README.md
+++ b/README.md
@@ -13,11 +13,12 @@ This library has been thoroughly tested with sample data as well as fuzzing and
 ## Features
 * `basic` API for the fastest validation, optimized for valid UTF-8
 * `compat` API as a fully compatible replacement for `std::str::from_utf8()`
+* 🆕 AVX 512 support on modern x86/x86-64 CPUs since Rust 1.89
 * Supports AVX 2 and SSE 4.2 implementations on x86 and x86-64
 * ARM64 (aarch64) SIMD is supported since Rust 1.61
 * WASM (wasm32) SIMD is supported
 * 🆕 armv7 NEON support with the `armv7_neon` feature on nightly Rust
-* x86-64: Up to 23 times faster than the std library on valid non-ASCII, up to four times faster on ASCI
+* x86-64: Up to 23 times faster than the std library on valid non-ASCII, up to four times faster on ASCII
 * aarch64: Up to eleven times faster than the std library on valid non-ASCII, up to four times faster on ASCII (Apple Silicon)
 * Faster than the original simdjson implementation
 * Selects the fastest implementation at runtime based on CPU support (on x86)
@@ -71,14 +72,17 @@ This comes at a slight performance penalty compared to the `basic` API even if t
 ## Implementation selection
 
 ### X86
-The fastest implementation is selected at runtime using the `std::is_x86_feature_detected!` macro, unless the CPU
-targeted by the compiler supports the fastest available implementation.
-So if you compile with `RUSTFLAGS="-C target-cpu=native"` on a recent x86-64 machine, the AVX 2 implementation is selected at
-compile-time and runtime selection is disabled.
+The fastest implementation is usually selected at runtime using the `std::is_x86_feature_detected!` macro. The AVX 512 
+implementation is however only selected if the CPU support the VBMI2 features to avoid throttling happening with CPUs before 
+Intels Ice Lake microarchitecture.
+
+If you compile with `RUSTFLAGS="-C target-cpu=native"` on a recent x86-64 machine whichs support AVX 512 with Rust 1.89 or later,
+the AVX 512 implementation is selected at compile-time and runtime selection is disabled.
 
 For no-std support (compiled with `--no-default-features`) the implementation is always selected at compile time based on
 the targeted CPU. Use `RUSTFLAGS="-C target-feature=+avx2"` for the AVX 2 implementation or `RUSTFLAGS="-C target-feature=+sse4.2"`
-for the SSE 4.2 implementation.
+for the SSE 4.2 implementation. For AVX 512 use `RUSTFLAGS="-C target-feature=+avx512f,+avx512bw,+avx512vbmi,+avx512vbmi2"` with 
+Rust 1.89 or later.
 
 ### ARM64
 The SIMD implementation is used automatically since Rust 1.61.
diff --git a/build.rs b/build.rs
new file mode 100644
index 00000000..ca6ed851
--- /dev/null
+++ b/build.rs
@@ -0,0 +1,17 @@
+fn main() {
+    println!("cargo::rustc-check-cfg=cfg(avx512_stable)");
+    // `if rustversion::cfg!(...)` is not supported in older Rust versions
+    if avx512_stable() {
+        println!("cargo:rustc-cfg=avx512_stable");
+    }
+}
+
+#[rustversion::since(1.89)]
+fn avx512_stable() -> bool {
+    true
+}
+
+#[rustversion::before(1.89)]
+fn avx512_stable() -> bool {
+    false
+}
diff --git a/inlining/expected-methods-x86-nostd-avx512.txt b/inlining/expected-methods-x86-nostd-avx512.txt
new file mode 100644
index 00000000..643c0462
--- /dev/null
+++ b/inlining/expected-methods-x86-nostd-avx512.txt
@@ -0,0 +1,5 @@
+simdutf8::implementation::helpers::get_compat_error
+simdutf8::implementation::x86::validate_utf8_basic
+simdutf8::implementation::x86::validate_utf8_basic_avx512
+simdutf8::implementation::x86::validate_utf8_compat
+simdutf8::implementation::x86::validate_utf8_compat_avx512
diff --git a/inlining/expected-methods-x86-std-avx512.txt b/inlining/expected-methods-x86-std-avx512.txt
new file mode 100644
index 00000000..643c0462
--- /dev/null
+++ b/inlining/expected-methods-x86-std-avx512.txt
@@ -0,0 +1,5 @@
+simdutf8::implementation::helpers::get_compat_error
+simdutf8::implementation::x86::validate_utf8_basic
+simdutf8::implementation::x86::validate_utf8_basic_avx512
+simdutf8::implementation::x86::validate_utf8_compat
+simdutf8::implementation::x86::validate_utf8_compat_avx512
diff --git a/inlining/expected-methods-x86-std-old.txt b/inlining/expected-methods-x86-std-old.txt
new file mode 100644
index 00000000..4cbccc66
--- /dev/null
+++ b/inlining/expected-methods-x86-std-old.txt
@@ -0,0 +1,9 @@
+simdutf8::implementation::helpers::get_compat_error
+simdutf8::implementation::validate_utf8_basic_fallback
+simdutf8::implementation::validate_utf8_compat_fallback
+simdutf8::implementation::x86::avx2::validate_utf8_basic
+simdutf8::implementation::x86::avx2::validate_utf8_compat
+simdutf8::implementation::x86::sse42::validate_utf8_basic
+simdutf8::implementation::x86::sse42::validate_utf8_compat
+simdutf8::implementation::x86::validate_utf8_basic::get_fastest
+simdutf8::implementation::x86::validate_utf8_compat::get_fastest
diff --git a/inlining/expected-methods-x86-std.txt b/inlining/expected-methods-x86-std.txt
index 4cbccc66..6edecc76 100644
--- a/inlining/expected-methods-x86-std.txt
+++ b/inlining/expected-methods-x86-std.txt
@@ -3,6 +3,8 @@ simdutf8::implementation::validate_utf8_basic_fallback
 simdutf8::implementation::validate_utf8_compat_fallback
 simdutf8::implementation::x86::avx2::validate_utf8_basic
 simdutf8::implementation::x86::avx2::validate_utf8_compat
+simdutf8::implementation::x86::avx512::validate_utf8_basic
+simdutf8::implementation::x86::avx512::validate_utf8_compat
 simdutf8::implementation::x86::sse42::validate_utf8_basic
 simdutf8::implementation::x86::sse42::validate_utf8_compat
 simdutf8::implementation::x86::validate_utf8_basic::get_fastest
diff --git a/src/basic.rs b/src/basic.rs
index 23a72951..02623d8b 100644
--- a/src/basic.rs
+++ b/src/basic.rs
@@ -197,6 +197,16 @@ pub mod imp {
     /// Includes the x86/x86-64 SIMD implementations.
     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
     pub mod x86 {
+        /// Includes the validation implementation for AVX 512-compatible CPUs.
+        ///
+        /// Using the provided functionality on CPUs which do not support AVX 512 is undefined
+        /// behavior and will very likely cause a crash.
+        #[cfg(avx512_stable)]
+        pub mod avx512 {
+            pub use crate::implementation::x86::avx512::validate_utf8_basic as validate_utf8;
+            pub use crate::implementation::x86::avx512::ChunkedUtf8ValidatorImp;
+            pub use crate::implementation::x86::avx512::Utf8ValidatorImp;
+        }
         /// Includes the validation implementation for AVX 2-compatible CPUs.
         ///
         /// Using the provided functionality on CPUs which do not support AVX 2 is undefined
diff --git a/src/compat.rs b/src/compat.rs
index dadb4378..d9482fc2 100644
--- a/src/compat.rs
+++ b/src/compat.rs
@@ -105,6 +105,11 @@ pub mod imp {
     /// Includes the x86/x86-64 SIMD implementations.
     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
     pub mod x86 {
+        /// Includes the validation implementation for AVX 512-compatible CPUs.
+        #[cfg(avx512_stable)]
+        pub mod avx512 {
+            pub use crate::implementation::x86::avx512::validate_utf8_compat as validate_utf8;
+        }
         /// Includes the validation implementation for AVX 2-compatible CPUs.
         pub mod avx2 {
             pub use crate::implementation::x86::avx2::validate_utf8_compat as validate_utf8;
diff --git a/src/implementation/algorithm.rs b/src/implementation/algorithm.rs
index b0381aa7..68f7fad1 100644
--- a/src/implementation/algorithm.rs
+++ b/src/implementation/algorithm.rs
@@ -182,7 +182,10 @@ macro_rules! algorithm_simd {
             unsafe fn check_block(&mut self, input: SimdInput) {
                 // WORKAROUND
                 // necessary because the for loop is not unrolled on ARM64
-                if input.vals.len() == 2 {
+                if input.vals.len() == 1 {
+                    self.check_bytes(*input.vals.as_ptr());
+                    self.incomplete = Self::is_incomplete(*input.vals.as_ptr());
+                } else if input.vals.len() == 2 {
                     self.check_bytes(*input.vals.as_ptr());
                     self.check_bytes(*input.vals.as_ptr().add(1));
                     self.incomplete = Self::is_incomplete(*input.vals.as_ptr().add(1));
@@ -237,13 +240,7 @@ macro_rules! algorithm_simd {
             }
 
             if idx < len {
-                let mut tmpbuf = TempSimdChunk::new();
-                crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64(
-                    input.as_ptr().add(idx),
-                    tmpbuf.0.as_mut_ptr(),
-                    len - idx,
-                );
-                let simd_input = SimdInput::new(tmpbuf.0.as_ptr());
+                let simd_input = SimdInput::new_partial(input.as_ptr().add(idx), len-idx);
                 algorithm.check_utf8(simd_input);
             }
             algorithm.check_incomplete_pending();
@@ -329,14 +326,7 @@ macro_rules! algorithm_simd {
                 break;
             }
             if idx < len {
-                let mut tmpbuf = TempSimdChunk::new();
-                crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64(
-                    input.as_ptr().add(idx),
-                    tmpbuf.0.as_mut_ptr(),
-                    len - idx,
-                );
-                let simd_input = SimdInput::new(tmpbuf.0.as_ptr());
-
+                let simd_input = SimdInput::new_partial(input.as_ptr().add(idx), len-idx);
                 algorithm.check_utf8(simd_input);
             }
             algorithm.check_incomplete_pending();
@@ -534,6 +524,18 @@ macro_rules! simd_input_128_bit {
                 }
             }
 
+            $(#[$feat])*
+            #[inline]
+            unsafe fn new_partial(ptr: *const u8, len: usize) -> Self {
+                let mut tmpbuf = TempSimdChunk::new();
+                crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64(
+                    ptr,
+                    tmpbuf.0.as_mut_ptr(),
+                    len,
+                );
+                Self::new(tmpbuf.0.as_ptr())
+            }
+
             $(#[$feat])*
             #[inline]
             unsafe fn is_ascii(&self) -> bool {
@@ -565,6 +567,18 @@ macro_rules! simd_input_256_bit {
                 }
             }
 
+            $(#[$feat])*
+            #[inline]
+            unsafe fn new_partial(ptr: *const u8, len: usize) -> Self {
+                let mut tmpbuf = TempSimdChunk::new();
+                crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64(
+                    ptr,
+                    tmpbuf.0.as_mut_ptr(),
+                    len,
+                );
+                Self::new(tmpbuf.0.as_ptr())
+            }
+
             $(#[$feat])*
             #[inline]
             unsafe fn is_ascii(&self) -> bool {
@@ -573,3 +587,41 @@ macro_rules! simd_input_256_bit {
         }
     };
 }
+
+macro_rules! simd_input_512_bit {
+    ($(#[$feat:meta])*) => {
+        #[repr(C)]
+        struct SimdInput {
+            vals: [SimdU8Value; 1],
+        }
+
+        impl SimdInput {
+            $(#[$feat])*
+            #[inline]
+            unsafe fn new(ptr: *const u8) -> Self {
+                Self {
+                    vals: [
+                        SimdU8Value::load_from(ptr),
+                    ],
+                }
+            }
+
+
+            $(#[$feat])*
+            #[inline]
+            unsafe fn new_partial(ptr: *const u8, len: usize) -> Self {
+                Self {
+                    vals: [
+                        SimdU8Value::load_from_partial(ptr, len),
+                    ],
+                }
+            }
+
+            $(#[$feat])*
+            #[inline]
+            unsafe fn is_ascii(&self) -> bool {
+                self.vals[0].is_ascii()
+            }
+        }
+    };
+}
diff --git a/src/implementation/helpers.rs b/src/implementation/helpers.rs
index 6fa0f18b..7796750f 100644
--- a/src/implementation/helpers.rs
+++ b/src/implementation/helpers.rs
@@ -139,6 +139,10 @@ impl TempSimdChunkA16 {
 #[allow(dead_code)] // only used if a 256-bit SIMD implementation is used
 pub(crate) struct TempSimdChunkA32(pub(crate) [u8; SIMD_CHUNK_SIZE]);
 
+#[repr(C, align(64))]
+#[allow(dead_code)] // only used if a 256-bit SIMD implementation is used
+pub(crate) struct TempSimdChunkA64(pub(crate) [u8; SIMD_CHUNK_SIZE]);
+
 #[allow(dead_code)] // only used if there is a SIMD implementation
 impl TempSimdChunkA32 {
     #[flexpect::e(clippy::inline_always)]
@@ -148,6 +152,15 @@ impl TempSimdChunkA32 {
     }
 }
 
+#[allow(dead_code)] // only used if there is a SIMD implementation
+impl TempSimdChunkA64 {
+    #[flexpect::e(clippy::inline_always)]
+    #[inline(always)] // needs to be forced because otherwise it is not inlined on armv7 neo
+    pub(crate) const fn new() -> Self {
+        Self([0; SIMD_CHUNK_SIZE])
+    }
+}
+
 #[derive(Clone, Copy)]
 #[allow(dead_code)] // only used if there is a SIMD implementation
 pub(crate) struct SimdU8Value<T>(pub(crate) T)
diff --git a/src/implementation/x86/avx512.rs b/src/implementation/x86/avx512.rs
new file mode 100644
index 00000000..f3b69a9e
--- /dev/null
+++ b/src/implementation/x86/avx512.rs
@@ -0,0 +1,276 @@
+//! Contains the x86-64 AVX512 UTF-8 validation implementation.
+
+#[cfg(target_arch = "x86")]
+use core::arch::x86::{
+    __m512i, _mm512_alignr_epi8, _mm512_and_si512, _mm512_loadu_si512, _mm512_maskz_loadu_epi8,
+    _mm512_movepi8_mask, _mm512_or_si512, _mm512_permutex2var_epi64, _mm512_set1_epi8,
+    _mm512_set_epi64, _mm512_set_epi8, _mm512_setzero_si512, _mm512_shuffle_epi8,
+    _mm512_srli_epi16, _mm512_subs_epu8, _mm512_test_epi8_mask, _mm512_xor_si512, _mm_prefetch,
+    _MM_HINT_T0,
+};
+
+#[cfg(target_arch = "x86_64")]
+use core::arch::x86_64::{
+    __m512i, _mm512_alignr_epi8, _mm512_and_si512, _mm512_loadu_si512, _mm512_maskz_loadu_epi8,
+    _mm512_movepi8_mask, _mm512_or_si512, _mm512_permutex2var_epi64, _mm512_set1_epi8,
+    _mm512_set_epi64, _mm512_set_epi8, _mm512_setzero_si512, _mm512_shuffle_epi8,
+    _mm512_srli_epi16, _mm512_subs_epu8, _mm512_test_epi8_mask, _mm512_xor_si512, _mm_prefetch,
+    _MM_HINT_T0,
+};
+
+use crate::implementation::helpers::Utf8CheckAlgorithm;
+
+// AVX 2 SIMD primitives
+
+type SimdU8Value = crate::implementation::helpers::SimdU8Value<__m512i>;
+
+impl SimdU8Value {
+    #[flexpect::e(clippy::cast_possible_wrap)]
+    #[flexpect::e(clippy::too_many_arguments)]
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn from_32_cut_off_leading(
+        v0: u8,
+        v1: u8,
+        v2: u8,
+        v3: u8,
+        v4: u8,
+        v5: u8,
+        v6: u8,
+        v7: u8,
+        v8: u8,
+        v9: u8,
+        v10: u8,
+        v11: u8,
+        v12: u8,
+        v13: u8,
+        v14: u8,
+        v15: u8,
+        v16: u8,
+        v17: u8,
+        v18: u8,
+        v19: u8,
+        v20: u8,
+        v21: u8,
+        v22: u8,
+        v23: u8,
+        v24: u8,
+        v25: u8,
+        v26: u8,
+        v27: u8,
+        v28: u8,
+        v29: u8,
+        v30: u8,
+        v31: u8,
+    ) -> Self {
+        Self::from(_mm512_set_epi8(
+            v31 as i8, v30 as i8, v29 as i8, v28 as i8, v27 as i8, v26 as i8, v25 as i8, v24 as i8,
+            v23 as i8, v22 as i8, v21 as i8, v20 as i8, v19 as i8, v18 as i8, v17 as i8, v16 as i8,
+            v15 as i8, v14 as i8, v13 as i8, v12 as i8, v11 as i8, v10 as i8, v9 as i8, v8 as i8,
+            v7 as i8, v6 as i8, v5 as i8, v4 as i8, v3 as i8, v2 as i8, v1 as i8, v0 as i8,
+            v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8,
+            v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8,
+            v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8,
+            v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8, v0 as i8,
+        ))
+    }
+
+    #[flexpect::e(clippy::too_many_arguments)]
+    #[flexpect::e(clippy::cast_possible_wrap)]
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn repeat_16(
+        v0: u8,
+        v1: u8,
+        v2: u8,
+        v3: u8,
+        v4: u8,
+        v5: u8,
+        v6: u8,
+        v7: u8,
+        v8: u8,
+        v9: u8,
+        v10: u8,
+        v11: u8,
+        v12: u8,
+        v13: u8,
+        v14: u8,
+        v15: u8,
+    ) -> Self {
+        Self::from(_mm512_set_epi8(
+            v15 as i8, v14 as i8, v13 as i8, v12 as i8, v11 as i8, v10 as i8, v9 as i8, v8 as i8,
+            v7 as i8, v6 as i8, v5 as i8, v4 as i8, v3 as i8, v2 as i8, v1 as i8, v0 as i8,
+            v15 as i8, v14 as i8, v13 as i8, v12 as i8, v11 as i8, v10 as i8, v9 as i8, v8 as i8,
+            v7 as i8, v6 as i8, v5 as i8, v4 as i8, v3 as i8, v2 as i8, v1 as i8, v0 as i8,
+            v15 as i8, v14 as i8, v13 as i8, v12 as i8, v11 as i8, v10 as i8, v9 as i8, v8 as i8,
+            v7 as i8, v6 as i8, v5 as i8, v4 as i8, v3 as i8, v2 as i8, v1 as i8, v0 as i8,
+            v15 as i8, v14 as i8, v13 as i8, v12 as i8, v11 as i8, v10 as i8, v9 as i8, v8 as i8,
+            v7 as i8, v6 as i8, v5 as i8, v4 as i8, v3 as i8, v2 as i8, v1 as i8, v0 as i8,
+        ))
+    }
+
+    #[flexpect::e(clippy::cast_ptr_alignment)]
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn load_from(ptr: *const u8) -> Self {
+        Self::from(_mm512_loadu_si512(ptr.cast::<__m512i>()))
+    }
+
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn load_from_partial(ptr: *const u8, len: usize) -> Self {
+        let res = _mm512_maskz_loadu_epi8(u64::MAX >> (64 - len), ptr.cast::<i8>());
+        Self::from(res)
+    }
+
+    #[flexpect::e(clippy::too_many_arguments)]
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn lookup_16(
+        self,
+        v0: u8,
+        v1: u8,
+        v2: u8,
+        v3: u8,
+        v4: u8,
+        v5: u8,
+        v6: u8,
+        v7: u8,
+        v8: u8,
+        v9: u8,
+        v10: u8,
+        v11: u8,
+        v12: u8,
+        v13: u8,
+        v14: u8,
+        v15: u8,
+    ) -> Self {
+        Self::from(_mm512_shuffle_epi8(
+            Self::repeat_16(
+                v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
+            )
+            .0,
+            self.0,
+        ))
+    }
+
+    #[flexpect::e(clippy::cast_possible_wrap)]
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn splat(val: u8) -> Self {
+        Self::from(_mm512_set1_epi8(val as i8))
+    }
+
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn splat0() -> Self {
+        Self::from(_mm512_setzero_si512())
+    }
+
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn or(self, b: Self) -> Self {
+        Self::from(_mm512_or_si512(self.0, b.0))
+    }
+
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn and(self, b: Self) -> Self {
+        Self::from(_mm512_and_si512(self.0, b.0))
+    }
+
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn xor(self, b: Self) -> Self {
+        Self::from(_mm512_xor_si512(self.0, b.0))
+    }
+
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn saturating_sub(self, b: Self) -> Self {
+        Self::from(_mm512_subs_epu8(self.0, b.0))
+    }
+
+    // ugly but shr<N> requires const generics
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn shr4(self) -> Self {
+        Self::from(_mm512_srli_epi16(self.0, 4)).and(Self::splat(0xFF >> 4))
+    }
+
+    // ugly but prev<N> requires const generics
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn prev1(self, prev: Self) -> Self {
+        const SHIFT: i32 = 16 - 1;
+        return Self::from(_mm512_alignr_epi8(
+            self.0,
+            _mm512_permutex2var_epi64(prev.0, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), self.0),
+            SHIFT,
+        ));
+    }
+    // ugly but prev<N> requires const generics
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn prev2(self, prev: Self) -> Self {
+        const SHIFT: i32 = 16 - 2;
+        return Self::from(_mm512_alignr_epi8(
+            self.0,
+            _mm512_permutex2var_epi64(prev.0, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), self.0),
+            SHIFT,
+        ));
+    }
+
+    // ugly but prev<N> requires const generics
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn prev3(self, prev: Self) -> Self {
+        const SHIFT: i32 = 16 - 3;
+        return Self::from(_mm512_alignr_epi8(
+            self.0,
+            _mm512_permutex2var_epi64(prev.0, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), self.0),
+            SHIFT,
+        ));
+    }
+
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn any_bit_set(self) -> bool {
+        _mm512_test_epi8_mask(self.0, self.0) != 0
+    }
+
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn is_ascii(self) -> bool {
+        _mm512_movepi8_mask(self.0) == 0
+    }
+}
+
+impl From<__m512i> for SimdU8Value {
+    #[inline]
+    fn from(val: __m512i) -> Self {
+        Self(val)
+    }
+}
+
+impl Utf8CheckAlgorithm<SimdU8Value> {
+    #[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+    #[inline]
+    unsafe fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value {
+        let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0xe0 - 0x80));
+        let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0xf0 - 0x80));
+        is_third_byte.or(is_fourth_byte)
+    }
+}
+
+#[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
+#[inline]
+unsafe fn simd_prefetch(ptr: *const u8) {
+    _mm_prefetch(ptr.cast::<i8>(), _MM_HINT_T0);
+}
+
+const PREFETCH: bool = true;
+#[cfg(feature = "public_imp")]
+use crate::implementation::helpers::TempSimdChunkA64 as TempSimdChunk;
+simd_input_512_bit!(#[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]);
+algorithm_simd!(#[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]);
diff --git a/src/implementation/x86/mod.rs b/src/implementation/x86/mod.rs
index 23b7515e..cd279e37 100644
--- a/src/implementation/x86/mod.rs
+++ b/src/implementation/x86/mod.rs
@@ -1,11 +1,69 @@
-#[cfg(any(feature = "std", feature = "public_imp", target_feature = "avx2"))]
+#[cfg(all(
+    avx512_stable,
+    any(
+    feature = "public_imp",
+    // always availabe, except if no-std and no avx512 support
+    feature = "std",
+    all(
+        target_feature = "avx512f",
+        target_feature = "avx512bw",
+        target_feature = "avx512vbmi",
+        target_feature = "avx512vbmi2"
+    )
+)))]
+pub(crate) mod avx512;
+
+#[cfg(any(
+    feature = "public_imp",
+    // std: sse 4.2 is available for auto-selection unless avx512 is selected at compile time
+    all(
+        feature = "std",
+        not(all(
+            avx512_stable,
+            target_feature = "avx512f",
+            target_feature = "avx512bw",
+            target_feature = "avx512vbmi",
+            target_feature = "avx512vbmi2"
+        ))
+    ),
+    // no-std: no avx512 -> select avx2
+    all(
+        not(feature = "std"),
+        not(all(
+            avx512_stable,
+            target_feature = "avx512f",
+            target_feature = "avx512bw",
+            target_feature = "avx512vbmi",
+            target_feature = "avx512vbmi2"
+        )),
+        target_feature = "avx2"
+    )
+))]
 pub(crate) mod avx2;
 
 #[cfg(any(
     feature = "public_imp",
-    all(feature = "std", not(target_feature = "avx2")),
+    // std: sse 4.2 is available for auto-selection unless avx512 or avx2 are selected at compile time
+    all(
+        feature = "std",
+            not(any(all(
+                avx512_stable,
+                target_feature = "avx512f",
+                target_feature = "avx512bw",
+                target_feature = "avx512vbmi",
+                target_feature = "avx512vbmi2"
+            ),all(not(avx512_stable), target_feature = "avx2"))),
+    ),
+    // no-std: no avx512, no avx2 -> select sse4.2
     all(
         not(feature = "std"),
+        not(all(
+            avx512_stable,
+            target_feature = "avx512f",
+            target_feature = "avx512bw",
+            target_feature = "avx512vbmi",
+            target_feature = "avx512vbmi2"
+        )),
         not(target_feature = "avx2"),
         target_feature = "sse4.2"
     )
@@ -14,7 +72,17 @@ pub(crate) mod sse42;
 
 // validate_utf8_basic() std: implementation auto-selection
 
-#[cfg(all(feature = "std", not(target_feature = "avx2")))]
+#[cfg(all(
+    feature = "std",
+    not(all(
+        avx512_stable,
+        target_feature = "avx512f",
+        target_feature = "avx512bw",
+        target_feature = "avx512vbmi",
+        target_feature = "avx512vbmi2"
+    )),
+    not(all(not(avx512_stable), target_feature = "avx2"))
+))]
 #[inline]
 pub(crate) unsafe fn validate_utf8_basic(
     input: &[u8],
@@ -26,9 +94,32 @@ pub(crate) unsafe fn validate_utf8_basic(
     type FnRaw = *mut ();
     type ValidateUtf8Fn = unsafe fn(input: &[u8]) -> Result<(), crate::basic::Utf8Error>;
 
+    #[cfg(avx512_stable)]
+    #[inline]
+    fn get_avx512_implementation() -> Option<ValidateUtf8Fn> {
+        // Test for avx512vbmi2 to make sure we have a newer CPU with a non-throttling AVX-512 implementation
+        if std::is_x86_feature_detected!("avx512f")
+            && std::is_x86_feature_detected!("avx512bw")
+            && std::is_x86_feature_detected!("avx512vbmi")
+            && std::is_x86_feature_detected!("avx512vbmi2")
+        {
+            return Some(avx512::validate_utf8_basic);
+        }
+        None
+    }
+
+    #[cfg(not(avx512_stable))]
+    #[inline]
+    fn get_avx512_implementation() -> Option<ValidateUtf8Fn> {
+        None
+    }
+
+    #[flexpect::e(clippy::option_if_let_else)]
     #[inline]
     fn get_fastest_available_implementation_basic() -> ValidateUtf8Fn {
-        if std::is_x86_feature_detected!("avx2") {
+        if let Some(fun) = get_avx512_implementation() {
+            fun
+        } else if std::is_x86_feature_detected!("avx2") {
             avx2::validate_utf8_basic
         } else if std::is_x86_feature_detected!("sse4.2") {
             sse42::validate_utf8_basic
@@ -55,7 +146,51 @@ pub(crate) unsafe fn validate_utf8_basic(
 
 // validate_utf8_basic() no-std: implementation selection by config
 
-#[cfg(target_feature = "avx2")]
+#[cfg(all(
+    avx512_stable,
+    target_feature = "avx512f",
+    target_feature = "avx512bw",
+    target_feature = "avx512vbmi",
+    target_feature = "avx512vbmi2"
+))]
+pub(crate) unsafe fn validate_utf8_basic(
+    input: &[u8],
+) -> core::result::Result<(), crate::basic::Utf8Error> {
+    if input.len() < super::helpers::SIMD_CHUNK_SIZE {
+        return super::validate_utf8_basic_fallback(input);
+    }
+
+    validate_utf8_basic_avx512(input)
+}
+
+#[cfg(all(
+    avx512_stable,
+    target_feature = "avx512f",
+    target_feature = "avx512bw",
+    target_feature = "avx512vbmi",
+    target_feature = "avx512vbmi2"
+))]
+#[inline(never)]
+unsafe fn validate_utf8_basic_avx512(
+    input: &[u8],
+) -> core::result::Result<(), crate::basic::Utf8Error> {
+    avx512::validate_utf8_basic(input)
+}
+
+#[cfg(any(
+    all(
+        not(feature = "std"),
+        not(all(
+            avx512_stable,
+            target_feature = "avx512f",
+            target_feature = "avx512bw",
+            target_feature = "avx512vbmi",
+            target_feature = "avx512vbmi2"
+        )),
+        target_feature = "avx2"
+    ),
+    all(target_feature = "avx2", feature = "std", not(avx512_stable))
+))]
 pub(crate) unsafe fn validate_utf8_basic(
     input: &[u8],
 ) -> core::result::Result<(), crate::basic::Utf8Error> {
@@ -66,7 +201,20 @@ pub(crate) unsafe fn validate_utf8_basic(
     validate_utf8_basic_avx2(input)
 }
 
-#[cfg(target_feature = "avx2")]
+#[cfg(any(
+    all(
+        not(feature = "std"),
+        not(all(
+            avx512_stable,
+            target_feature = "avx512f",
+            target_feature = "avx512bw",
+            target_feature = "avx512vbmi",
+            target_feature = "avx512vbmi2"
+        )),
+        target_feature = "avx2"
+    ),
+    all(target_feature = "avx2", feature = "std", not(avx512_stable))
+))]
 #[inline(never)]
 unsafe fn validate_utf8_basic_avx2(
     input: &[u8],
@@ -76,6 +224,13 @@ unsafe fn validate_utf8_basic_avx2(
 
 #[cfg(all(
     not(feature = "std"),
+    not(all(
+        avx512_stable,
+        target_feature = "avx512f",
+        target_feature = "avx512bw",
+        target_feature = "avx512vbmi",
+        target_feature = "avx512vbmi2"
+    )),
     not(target_feature = "avx2"),
     target_feature = "sse4.2"
 ))]
@@ -91,6 +246,13 @@ pub(crate) unsafe fn validate_utf8_basic(
 
 #[cfg(all(
     not(feature = "std"),
+    not(all(
+        avx512_stable,
+        target_feature = "avx512f",
+        target_feature = "avx512bw",
+        target_feature = "avx512vbmi",
+        target_feature = "avx512vbmi2"
+    )),
     not(target_feature = "avx2"),
     target_feature = "sse4.2"
 ))]
@@ -103,6 +265,13 @@ unsafe fn validate_utf8_basic_sse42(
 
 #[cfg(all(
     not(feature = "std"),
+    not(all(
+        avx512_stable,
+        target_feature = "avx512f",
+        target_feature = "avx512bw",
+        target_feature = "avx512vbmi",
+        target_feature = "avx512vbmi2"
+    )),
     not(target_feature = "avx2"),
     not(target_feature = "sse4.2")
 ))]
@@ -110,7 +279,17 @@ pub(crate) use super::validate_utf8_basic_fallback as validate_utf8_basic;
 
 // validate_utf8_compat() std: implementation auto-selection
 
-#[cfg(all(feature = "std", not(target_feature = "avx2")))]
+#[cfg(all(
+    feature = "std",
+    not(all(
+        avx512_stable,
+        target_feature = "avx512f",
+        target_feature = "avx512bw",
+        target_feature = "avx512vbmi",
+        target_feature = "avx512vbmi2"
+    )),
+    not(all(not(avx512_stable), target_feature = "avx2"))
+))]
 #[inline]
 pub(crate) unsafe fn validate_utf8_compat(
     input: &[u8],
@@ -122,9 +301,32 @@ pub(crate) unsafe fn validate_utf8_compat(
     type FnRaw = *mut ();
     type ValidateUtf8CompatFn = unsafe fn(input: &[u8]) -> Result<(), crate::compat::Utf8Error>;
 
+    #[cfg(avx512_stable)]
+    #[inline]
+    fn get_avx512_implementation() -> Option<ValidateUtf8CompatFn> {
+        // Test for avx512vbmi2 to make sure we have a newer CPU with a non-throttling AVX-512 implementation
+        if std::is_x86_feature_detected!("avx512f")
+            && std::is_x86_feature_detected!("avx512bw")
+            && std::is_x86_feature_detected!("avx512vbmi")
+            && std::is_x86_feature_detected!("avx512vbmi2")
+        {
+            return Some(avx512::validate_utf8_compat);
+        }
+        None
+    }
+
+    #[cfg(not(avx512_stable))]
+    #[inline]
+    fn get_avx512_implementation() -> Option<ValidateUtf8CompatFn> {
+        None
+    }
+
+    #[flexpect::e(clippy::option_if_let_else)]
     #[inline]
     fn get_fastest_available_implementation_compat() -> ValidateUtf8CompatFn {
-        if std::is_x86_feature_detected!("avx2") {
+        if let Some(fun) = get_avx512_implementation() {
+            fun
+        } else if std::is_x86_feature_detected!("avx2") {
             avx2::validate_utf8_compat
         } else if std::is_x86_feature_detected!("sse4.2") {
             sse42::validate_utf8_compat
@@ -151,7 +353,51 @@ pub(crate) unsafe fn validate_utf8_compat(
 
 // validate_utf8_basic() no-std: implementation selection by config
 
-#[cfg(target_feature = "avx2")]
+#[cfg(all(
+    avx512_stable,
+    target_feature = "avx512f",
+    target_feature = "avx512bw",
+    target_feature = "avx512vbmi",
+    target_feature = "avx512vbmi2"
+))]
+pub(crate) unsafe fn validate_utf8_compat(
+    input: &[u8],
+) -> core::result::Result<(), crate::compat::Utf8Error> {
+    if input.len() < super::helpers::SIMD_CHUNK_SIZE {
+        return super::validate_utf8_compat_fallback(input);
+    }
+
+    validate_utf8_compat_avx512(input)
+}
+
+#[cfg(all(
+    avx512_stable,
+    target_feature = "avx512f",
+    target_feature = "avx512bw",
+    target_feature = "avx512vbmi",
+    target_feature = "avx512vbmi2"
+))]
+#[inline(never)]
+unsafe fn validate_utf8_compat_avx512(
+    input: &[u8],
+) -> core::result::Result<(), crate::compat::Utf8Error> {
+    avx512::validate_utf8_compat(input)
+}
+
+#[cfg(any(
+    all(
+        not(feature = "std"),
+        not(all(
+            avx512_stable,
+            target_feature = "avx512f",
+            target_feature = "avx512bw",
+            target_feature = "avx512vbmi",
+            target_feature = "avx512vbmi2"
+        )),
+        target_feature = "avx2"
+    ),
+    all(target_feature = "avx2", feature = "std", not(avx512_stable))
+))]
 pub(crate) unsafe fn validate_utf8_compat(
     input: &[u8],
 ) -> core::result::Result<(), crate::compat::Utf8Error> {
@@ -162,7 +408,20 @@ pub(crate) unsafe fn validate_utf8_compat(
     validate_utf8_compat_avx2(input)
 }
 
-#[cfg(target_feature = "avx2")]
+#[cfg(any(
+    all(
+        not(feature = "std"),
+        not(all(
+            avx512_stable,
+            target_feature = "avx512f",
+            target_feature = "avx512bw",
+            target_feature = "avx512vbmi",
+            target_feature = "avx512vbmi2"
+        )),
+        target_feature = "avx2"
+    ),
+    all(target_feature = "avx2", feature = "std", not(avx512_stable))
+))]
 #[inline(never)]
 unsafe fn validate_utf8_compat_avx2(
     input: &[u8],
@@ -172,6 +431,13 @@ unsafe fn validate_utf8_compat_avx2(
 
 #[cfg(all(
     not(feature = "std"),
+    not(all(
+        avx512_stable,
+        target_feature = "avx512f",
+        target_feature = "avx512bw",
+        target_feature = "avx512vbmi",
+        target_feature = "avx512vbmi2"
+    )),
     not(target_feature = "avx2"),
     target_feature = "sse4.2"
 ))]
@@ -187,6 +453,13 @@ pub(crate) unsafe fn validate_utf8_compat(
 
 #[cfg(all(
     not(feature = "std"),
+    not(all(
+        avx512_stable,
+        target_feature = "avx512f",
+        target_feature = "avx512bw",
+        target_feature = "avx512vbmi",
+        target_feature = "avx512vbmi2"
+    )),
     not(target_feature = "avx2"),
     target_feature = "sse4.2"
 ))]
@@ -199,6 +472,13 @@ pub(crate) unsafe fn validate_utf8_compat_sse42(
 
 #[cfg(all(
     not(feature = "std"),
+    not(all(
+        avx512_stable,
+        target_feature = "avx512f",
+        target_feature = "avx512bw",
+        target_feature = "avx512vbmi",
+        target_feature = "avx512vbmi2"
+    )),
     not(target_feature = "avx2"),
     not(target_feature = "sse4.2")
 ))]
diff --git a/src/lib.rs b/src/lib.rs
index 1f05450c..0ef442fa 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -95,14 +95,17 @@
 //! ## Implementation selection
 //!
 //! ### X86
-//! The fastest implementation is selected at runtime using the `std::is_x86_feature_detected!` macro, unless the CPU
-//! targeted by the compiler supports the fastest available implementation.
-//! So if you compile with `RUSTFLAGS="-C target-cpu=native"` on a recent x86-64 machine, the AVX 2 implementation is selected at
-//! compile-time and runtime selection is disabled.
+//! The fastest implementation is usually selected at runtime using the `std::is_x86_feature_detected!` macro. The AVX 512
+//! implementation is however only selected if the CPU support the VBMI2 features to avoid throttling happening with CPUs before
+//! Intels Ice Lake microarchitecture.
+//!
+//! If you compile with `RUSTFLAGS="-C target-cpu=native"` on a recent x86-64 machine whichs support AVX 512 with Rust 1.89 or later,
+//! the AVX 512 implementation is selected at compile-time and runtime selection is disabled.
 //!
 //! For no-std support (compiled with `--no-default-features`) the implementation is always selected at compile time based on
 //! the targeted CPU. Use `RUSTFLAGS="-C target-feature=+avx2"` for the AVX 2 implementation or `RUSTFLAGS="-C target-feature=+sse4.2"`
-//! for the SSE 4.2 implementation.
+//! for the SSE 4.2 implementation. For AVX 512 use `RUSTFLAGS="-C target-feature=+avx512f,+avx512bw,+avx512vbmi,+avx512vbmi2"` with
+//! Rust 1.89 or later.
 //!
 //! ### ARM64
 //! The SIMD implementation is used automatically since Rust 1.61.
diff --git a/tests/tests.rs b/tests/tests.rs
index 082746c9..57431651 100644
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -69,6 +69,23 @@ mod public_imp {
     #[allow(unused_variables)] // nothing to do if not SIMD implementation is available
     pub(super) fn test_valid(input: &[u8]) {
         if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
+            #[cfg(all(
+                avx512_stable,
+                target_feature = "avx512f",
+                target_feature = "avx512bw",
+                target_feature = "avx512vbmi",
+                target_feature = "avx512vbmi2"
+            ))]
+            unsafe {
+                assert!(simdutf8::basic::imp::x86::avx512::validate_utf8(input).is_ok());
+                assert!(simdutf8::compat::imp::x86::avx512::validate_utf8(input).is_ok());
+
+                test_streaming::<simdutf8::basic::imp::x86::avx512::Utf8ValidatorImp>(input, true);
+                test_chunked_streaming::<simdutf8::basic::imp::x86::avx512::ChunkedUtf8ValidatorImp>(
+                    input, true,
+                );
+            }
+
             #[cfg(target_feature = "avx2")]
             unsafe {
                 assert!(simdutf8::basic::imp::x86::avx2::validate_utf8(input).is_ok());
@@ -138,6 +155,24 @@ mod public_imp {
     #[allow(unused_variables)] // nothing to do if not SIMD implementation is available
     pub(super) fn test_invalid(input: &[u8], valid_up_to: usize, error_len: Option<usize>) {
         if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
+            #[cfg(all(
+                avx512_stable,
+                target_feature = "avx512f",
+                target_feature = "avx512bw",
+                target_feature = "avx512vbmi",
+                target_feature = "avx512vbmi2"
+            ))]
+            unsafe {
+                assert!(simdutf8::basic::imp::x86::avx512::validate_utf8(input).is_err());
+                let err = simdutf8::compat::imp::x86::avx512::validate_utf8(input).unwrap_err();
+                assert_eq!(err.valid_up_to(), valid_up_to);
+                assert_eq!(err.error_len(), error_len);
+
+                test_streaming::<simdutf8::basic::imp::x86::avx512::Utf8ValidatorImp>(input, false);
+                test_chunked_streaming::<simdutf8::basic::imp::x86::avx512::ChunkedUtf8ValidatorImp>(
+                    input, false,
+                );
+            }
             #[cfg(target_feature = "avx2")]
             unsafe {
                 assert!(simdutf8::basic::imp::x86::avx2::validate_utf8(input).is_err());
@@ -268,6 +303,24 @@ mod public_imp {
         }
     }
 
+    #[test]
+    #[should_panic]
+    #[cfg(all(
+        any(target_arch = "x86", target_arch = "x86_64"),
+        all(
+            avx512_stable,
+            target_feature = "avx512f",
+            target_feature = "avx512bw",
+            target_feature = "avx512vbmi",
+            target_feature = "avx512vbmi2"
+        )
+    ))]
+    fn test_avx2_chunked_panic() {
+        test_chunked_streaming_with_chunk_size::<
+            simdutf8::basic::imp::x86::avx512::ChunkedUtf8ValidatorImp,
+        >(b"abcd", 1, true);
+    }
+
     #[test]
     #[should_panic]
     #[cfg(all(