Skip to content

Commit

Permalink
fix incorrect results on big endian targets (#35)
Browse files Browse the repository at this point in the history
  • Loading branch information
nissaofthesea authored Apr 23, 2023
1 parent eaa31ae commit 3779fe8
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 21 deletions.
14 changes: 7 additions & 7 deletions src/hw_aarch64.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::hw_tables;
use crate::util;
use crate::util::{self, U64Le};

use std::arch::aarch64 as simd;
use std::arch::asm;
Expand Down Expand Up @@ -61,10 +61,10 @@ unsafe fn crc_u8(crc: u32, buffer: &[u8]) -> u32 {
}

#[inline(always)]
unsafe fn crc_u64(crc: u32, words: &[u64]) -> u32 {
unsafe fn crc_u64(crc: u32, words: &[U64Le]) -> u32 {
words
.iter()
.fold(crc, |crc, &next| crc_u64_append(crc, next))
.fold(crc, |crc, &next| crc_u64_append(crc, next.get()))
}

#[inline(always)]
Expand All @@ -77,7 +77,7 @@ unsafe fn crc_u64_parallel3(
crc: u32,
chunk_size: usize,
table: &hw_tables::CrcTable,
buffer: &[u64],
buffer: &[U64Le],
) -> u32 {
buffer.chunks(chunk_size).fold(crc, |mut crc0, chunk| {
let mut crc1 = 0;
Expand All @@ -92,9 +92,9 @@ unsafe fn crc_u64_parallel3(
let c = blocks.next().unwrap();

for i in 0..block_size {
crc0 = crc_u64_append(crc0, a[i]);
crc1 = crc_u64_append(crc1, b[i]);
crc2 = crc_u64_append(crc2, c[i]);
crc0 = crc_u64_append(crc0, a[i].get());
crc1 = crc_u64_append(crc1, b[i].get());
crc2 = crc_u64_append(crc2, c[i].get());
}

crc0 = table.shift_u32(crc0) ^ crc1;
Expand Down
14 changes: 7 additions & 7 deletions src/hw_x86_64.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Implements crc32c with SSE 4.2 support.
use crate::hw_tables;
use crate::util;
use crate::util::{self, U64Le};
use std::arch::x86_64 as simd;

/// Computes CRC-32C using the SSE 4.2 hardware instruction.
Expand Down Expand Up @@ -62,10 +62,10 @@ unsafe fn crc_u8(crc: u64, buffer: &[u8]) -> u64 {
}

#[inline]
unsafe fn crc_u64(crc: u64, buffer: &[u64]) -> u64 {
unsafe fn crc_u64(crc: u64, buffer: &[U64Le]) -> u64 {
buffer
.iter()
.fold(crc, |crc, &next| crc_u64_append(crc, next))
.fold(crc, |crc, &next| crc_u64_append(crc, next.get()))
}

/// Hardware-parallel version of the algorithm.
Expand All @@ -79,7 +79,7 @@ unsafe fn crc_u64_parallel3(
crc: u64,
chunk_size: usize,
table: &hw_tables::CrcTable,
buffer: &[u64],
buffer: &[U64Le],
) -> u64 {
buffer.chunks(chunk_size).fold(crc, |mut crc0, chunk| {
let mut crc1 = 0;
Expand All @@ -94,9 +94,9 @@ unsafe fn crc_u64_parallel3(
let c = blocks.next().unwrap();

for i in 0..block_size {
crc0 = crc_u64_append(crc0, a[i]);
crc1 = crc_u64_append(crc1, b[i]);
crc2 = crc_u64_append(crc2, c[i]);
crc0 = crc_u64_append(crc0, a[i].get());
crc1 = crc_u64_append(crc1, b[i].get());
crc2 = crc_u64_append(crc2, c[i].get());
}

crc0 = table.shift_u64(crc0) ^ crc1;
Expand Down
6 changes: 3 additions & 3 deletions src/sw.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Implements crc32c without hardware support.
use crate::util;
use crate::util::{self, U64Le};

/// 8-KiB lookup table.
pub struct CrcTable([[u32; 256]; 8]);
Expand Down Expand Up @@ -41,9 +41,9 @@ fn crc_u8(crc: u64, buffer: &[u8]) -> u64 {
}

#[inline]
fn crc_u64(crci: u64, buffer: &[u64]) -> u64 {
fn crc_u64(crci: u64, buffer: &[U64Le]) -> u64 {
buffer.iter().fold(crci, |crc, &next| {
let crc = crc ^ next;
let crc = crc ^ next.get();

// Note: I've tried refactoring this to a for-loop,
// but then it gets worse performance.
Expand Down
27 changes: 23 additions & 4 deletions src/util.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,29 @@
use std::ptr::NonNull;
use std::{cmp, mem, slice};
use std::{cmp, slice};

/// A newtype wrapper for a little endian `u64`.
///
/// It is safe to transmute between a `u64` and `U64Le`.
#[repr(transparent)]
#[derive(Clone, Copy)]
pub(crate) struct U64Le(u64);

impl U64Le {
/// Returns a `u64` with correct endianness for the target.
///
/// On little endian targets, this is a no-op.
#[allow(clippy::inline_always)]
#[inline(always)]
pub const fn get(self) -> u64 {
u64::from_le(self.0)
}
}

/// Splits a buffer into three subslices:
/// - the first one is up to the first 8-byte aligned address.
/// - the second one is 8-byte aligned and its length is a multiple of 8.
/// - the third one is 8-byte aligned but its length is less than 8.
pub fn split(buffer: &[u8]) -> (&[u8], &[u64], &[u8]) {
pub(crate) fn split(buffer: &[u8]) -> (&[u8], &[U64Le], &[u8]) {
let (start, mid) = {
let split_index = {
let addr = buffer.as_ptr() as usize;
Expand Down Expand Up @@ -35,9 +53,10 @@ pub fn split(buffer: &[u8]) -> (&[u8], &[u64], &[u8]) {
let ptr = if length == 0 {
// `slice::from_raw_parts` requires that pointers be nonnull and
// aligned even for zero-length slices.
NonNull::<u64>::dangling().as_ptr()
NonNull::<U64Le>::dangling().as_ptr()
} else {
mem::transmute(mid.as_ptr())
#[allow(clippy::cast_ptr_alignment)]
mid.as_ptr().cast::<U64Le>()
};

slice::from_raw_parts(ptr, length)
Expand Down

0 comments on commit 3779fe8

Please sign in to comment.