Skip to content

Commit

Permalink
fix: clmul aarch64 (#74)
Browse files Browse the repository at this point in the history
* fix clmul aarch64

* remove clmul_reuse

* remove references to deprecated features

* document rustflags

* fix import in test
  • Loading branch information
sinui0 authored Oct 9, 2023
1 parent 142d4c4 commit 59e78e0
Show file tree
Hide file tree
Showing 6 changed files with 259 additions and 262 deletions.
6 changes: 1 addition & 5 deletions clmul/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ edition = "2021"

[dependencies]
cfg-if.workspace = true
bytemuck = {workspace = true, features = ["derive"]}
bytemuck = { workspace = true, features = ["derive"] }

[target.'cfg(any(target_arch = "aarch64", target_arch = "x86_64", target_arch = "x86"))'.dependencies]
cpufeatures.workspace = true
Expand All @@ -17,10 +17,6 @@ rand.workspace = true
rand_core.workspace = true
criterion.workspace = true

[features]
armv8 = [] # Enable nightly-only ARMv8 intrinsics support
force-soft = [] # Disable support for hardware intrinsics

[[bench]]
name = "clmul"
harness = false
8 changes: 2 additions & 6 deletions clmul/benches/clmul.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,15 @@ fn criterion_benchmark(c: &mut Criterion) {
let mut rng = ChaCha12Rng::seed_from_u64(0);
let a: [u8; 16] = rng.gen();
let b: [u8; 16] = rng.gen();
let mut a = Clmul::new(&a);
let mut b = Clmul::new(&b);
let a = Clmul::new(&a);
let b = Clmul::new(&b);

c.bench_function("clmul", move |bench| {
bench.iter(|| {
black_box(a.clmul(b));
});
});

c.bench_function("clmul_reuse", move |bench| {
bench.iter(|| a.clmul_reuse(&mut b));
});

c.bench_function("reduce", move |bench| {
bench.iter(|| black_box(Clmul::reduce_gcm(a, b)));
});
Expand Down
252 changes: 14 additions & 238 deletions clmul/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
//! they are unavailable.
use cfg_if::cfg_if;
use core::ops::{BitXor, BitXorAssign};

#[allow(clippy::duplicate_mod)]
#[cfg_attr(not(target_pointer_width = "64"), path = "backend/soft32.rs")]
Expand Down Expand Up @@ -43,243 +42,20 @@ impl soft::Clmul {
}

cfg_if! {
if #[cfg(all(target_arch = "aarch64", feature = "armv8"))] {
#[path = "backend/pmull.rs"]
if #[cfg(all(target_arch = "aarch64", clmul_armv8, not(clmul_force_soft)))] {
mod autodetect;
mod pmull;
use pmull as intrinsics;
cpufeatures::new!(mul_intrinsics, "aes"); // `aes` implies PMULL
} else if #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] {
#[path = "backend/clmul.rs"]
mod clmul_intr;
use clmul_intr as intrinsics;
cpufeatures::new!(mul_intrinsics, "pclmulqdq");
pub use crate::backend::autodetect::Clmul;
} else if #[cfg(
all(
any(target_arch = "x86_64", target_arch = "x86"),
not(clmul_force_soft)
)
)] {
mod autodetect;
mod clmul;
pub use crate::backend::autodetect::Clmul;
} else {
pub use crate::backend::soft::Clmul;
}
}

#[derive(Clone, Copy)]
/// Carryless multiplication
pub struct Clmul {
inner: Inner,
token: mul_intrinsics::InitToken,
}

#[derive(Clone, Copy)]
union Inner {
intrinsics: intrinsics::Clmul,
soft: soft::Clmul,
}

impl mul_intrinsics::InitToken {
#[inline(always)]
fn get_intr(&self) -> bool {
!cfg!(feature = "force-soft") && self.get()
}
}

impl core::fmt::Debug for Clmul {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
unsafe {
if self.token.get_intr() {
self.inner.intrinsics.fmt(f)
} else {
self.inner.soft.fmt(f)
}
}
}
}

impl Clmul {
pub fn new(h: &[u8; 16]) -> Self {
let (token, has_intrinsics) = mul_intrinsics::init_get();

let inner = if cfg!(feature = "force-soft") {
Inner {
soft: soft::Clmul::new(h),
}
} else if has_intrinsics {
Inner {
intrinsics: intrinsics::Clmul::new(h),
}
} else {
Inner {
soft: soft::Clmul::new(h),
}
};

Self { inner, token }
}

/// Performs carryless multiplication
#[inline]
pub fn clmul(self, x: Self) -> (Self, Self) {
unsafe {
let (in0, in1) = if self.token.get_intr() {
let s_intr = self.inner.intrinsics;
let x_intr = x.inner.intrinsics;

let (r0, r1) = s_intr.clmul(x_intr);
(Inner { intrinsics: r0 }, Inner { intrinsics: r1 })
} else {
let s_soft = self.inner.soft;
let x_soft = x.inner.soft;

let (r0, r1) = s_soft.clmul(x_soft);
(Inner { soft: r0 }, Inner { soft: r1 })
};

(
Self {
inner: in0,
token: self.token,
},
Self {
inner: in1,
token: x.token,
},
)
}
}

/// Performs carryless multiplication. Same as clmul() but reusing the
/// operands to return the result. This gives a ~6x speed up compared
/// to clmul() where we create new objects containing the result.
/// The high bits will be placed in `self`, the low bits - in `x`.
#[inline]
pub fn clmul_reuse(&mut self, x: &mut Self) {
unsafe {
if self.token.get_intr() {
let s_intr = self.inner.intrinsics;
let x_intr = x.inner.intrinsics;

let (r0, r1) = s_intr.clmul(x_intr);
self.inner.intrinsics = r0;
x.inner.intrinsics = r1;
} else {
let s_soft = self.inner.soft;
let x_soft = x.inner.soft;

let (r0, r1) = s_soft.clmul(x_soft);
self.inner.soft = r0;
x.inner.soft = r1;
}
}
}

/// Reduces the polynomial represented in bits modulo the GCM polynomial x^128 + x^7 + x^2 + x + 1.
/// x and y are resp. upper and lower bits of the polynomial.
#[inline]
pub fn reduce_gcm(x: Self, y: Self) -> Self {
unsafe {
if x.token.get_intr() {
let x_intr = x.inner.intrinsics;
let y_intr = y.inner.intrinsics;

let r = intrinsics::Clmul::reduce_gcm(x_intr, y_intr);
Self {
inner: Inner { intrinsics: r },
token: x.token,
}
} else {
let x_soft = x.inner.soft;
let y_soft = y.inner.soft;

let r = soft::Clmul::reduce_gcm(x_soft, y_soft);
Self {
inner: Inner { soft: r },
token: x.token,
}
}
}
}
}

impl From<Clmul> for [u8; 16] {
#[inline]
fn from(m: Clmul) -> [u8; 16] {
unsafe {
if m.token.get_intr() {
m.inner.intrinsics.into()
} else {
m.inner.soft.into()
}
}
}
}

impl BitXor for Clmul {
type Output = Self;

#[inline]
fn bitxor(self, other: Self) -> Self::Output {
unsafe {
let inner = if self.token.get_intr() {
let a = self.inner.intrinsics;
let b = other.inner.intrinsics;
Inner { intrinsics: a ^ b }
} else {
let a = self.inner.soft;
let b = other.inner.soft;
Inner { soft: a ^ b }
};

Self {
inner,
token: self.token,
}
}
}
}

impl BitXorAssign for Clmul {
#[inline]
fn bitxor_assign(&mut self, other: Self) {
unsafe {
if self.token.get_intr() {
let a = self.inner.intrinsics;
let b = other.inner.intrinsics;
self.inner.intrinsics = a ^ b;
} else {
let a = self.inner.soft;
let b = other.inner.soft;
self.inner.soft = a ^ b;
}
}
}
}

impl PartialEq for Clmul {
#[inline]
fn eq(&self, other: &Self) -> bool {
unsafe {
if self.token.get_intr() {
self.inner.intrinsics == other.inner.intrinsics
} else {
self.inner.soft == other.inner.soft
}
}
}
}

#[test]
fn reduce_test() {
use rand::Rng;
use rand_chacha::{rand_core::SeedableRng, ChaCha12Rng};

let mut rng = ChaCha12Rng::from_seed([0; 32]);
let x: [u8; 16] = rng.gen();
let y: [u8; 16] = rng.gen();

let xx = soft::Clmul::new(&x);
let yy = soft::Clmul::new(&y);

let zz = soft::Clmul::reduce_gcm(xx, yy);
let zz: [u8; 16] = zz.into();

let xxx = Clmul::new(&x);
let yyy = Clmul::new(&y);

let zzz = Clmul::reduce_gcm(xxx, yyy);
let zzz: [u8; 16] = zzz.into();

assert_eq!(zz, zzz);
}
Loading

0 comments on commit 59e78e0

Please sign in to comment.