From 4545df9f71add9e6b830e0eaafd8492a6adc7b8f Mon Sep 17 00:00:00 2001 From: lihuiba Date: Tue, 31 Dec 2024 12:20:18 +0800 Subject: [PATCH] crc --- CMakeLists.txt | 3 + common/checksum/crc32c.cpp | 363 +++------------ common/checksum/crc32c.h | 30 +- common/checksum/crc64_ecma_refl_pmull.S | 507 ++++++++++++++++++++ common/checksum/crc64ecma.cpp | 231 ++++++++++ common/checksum/crc64ecma.h | 43 ++ common/checksum/test/checksum.crc64 | 512 +++++++++++++++++++++ common/checksum/test/test_checksum.cpp | 109 +++-- include/photon/common/checksum/crc64ecma.h | 1 + 9 files changed, 1441 insertions(+), 358 deletions(-) create mode 100644 common/checksum/crc64_ecma_refl_pmull.S create mode 100644 common/checksum/crc64ecma.cpp create mode 100644 common/checksum/crc64ecma.h create mode 100644 common/checksum/test/checksum.crc64 create mode 120000 include/photon/common/checksum/crc64ecma.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 79e06415..81967a49 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -202,6 +202,9 @@ file(GLOB PHOTON_SRC RELATIVE "${PROJECT_SOURCE_DIR}" rpc/*.cpp thread/*.cpp ) +if ((${ARCH} STREQUAL aarch64) OR (${ARCH} STREQUAL arm64)) + list(APPEND PHOTON_SRC common/checksum/crc64_ecma_refl_pmull.S) +endif () if (APPLE) list(APPEND PHOTON_SRC io/kqueue.cpp) else () diff --git a/common/checksum/crc32c.cpp b/common/checksum/crc32c.cpp index ae4ab9b8..a0c4f32e 100644 --- a/common/checksum/crc32c.cpp +++ b/common/checksum/crc32c.cpp @@ -15,21 +15,37 @@ */ #include "crc32c.h" +#include "crc64ecma.h" +#if defined(__linux__) && defined(__aarch64__) +#include +#include +#endif -static uint32_t (*crc32c_func)(const uint8_t*, size_t, uint32_t) = nullptr; +uint32_t (*crc32c_auto)(const uint8_t*, size_t, uint32_t) = nullptr; +uint64_t (*crc64ecma_auto)(const uint8_t *data, size_t nbytes, uint64_t crc); -__attribute__((constructor)) static void crc_init() { -#if ((defined(__x86_64__) || defined(__i386__)) && defined(__SSE4_2__)) +__attribute__((constructor)) +static void crc_init() { +#if defined(__x86_64__) __builtin_cpu_init(); - if (__builtin_cpu_supports("sse4.2")) { - crc32c_func = crc32c_hw; - } else { - crc32c_func = crc32c_sw; - } -#elif (defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)) - crc32c_func = crc32c_hw; + bool hw = __builtin_cpu_supports("sse4.2"); + crc32c_auto = hw ? crc32c_hw : crc32c_sw; + crc64ecma_auto = crc64ecma_sw; +#elif defined(__aarch64__) +#ifdef __APPLE__ // apple silicon has hw for both crc + crc32c_auto = crc32c_hw; + crc64ecma_auto = crc64ecma_hw; +#elif defined(__linux__) // linux on arm: runtime detection + long hwcaps= getauxval(AT_HWCAP); + crc32c_auto = (hwcaps & HWCAP_CRC32) ? crc32c_hw : crc32c_sw; + crc64ecma_auto = (hwcaps & HWCAP_PMULL) ? crc64ecma_hw : crc64ecma_sw; #else - crc32c_func = crc32c_sw; + crc32c_auto = crc32c_sw; + crc64ecma_auto = crc64ecma_sw; +#endif +#else // not __aarch64__, not __x86_64__ + crc32c_auto = crc32c_sw; + crc64ecma_auto = crc64ecma_sw; #endif } @@ -55,8 +71,8 @@ static inline uint32_t _crc32qi(uint32_t crc, uint8_t value) { #define _crc32qi __builtin_ia32_crc32qi #endif -uint32_t crc32c_hw(const uint8_t *data, size_t nbytes, uint32_t crc) { - uint32_t sum = crc; +template inline +uint32_t do_crc32c(const uint8_t *data, size_t nbytes, uint32_t crc, F1 f1, F8 f8) { size_t offset = 0; // Process bytes one at a time until we reach an 8-byte boundary and can @@ -66,104 +82,34 @@ uint32_t crc32c_hw(const uint8_t *data, size_t nbytes, uint32_t crc) { if (mask != 0) { size_t limit = std::min(nbytes, sizeof(uint64_t) - mask); while (offset < limit) { - sum = (uint32_t)_crc32qi(sum, data[offset]); + crc = f1(crc, data[offset]); offset++; } } // Process 8 bytes at a time until we have fewer than 8 bytes left. while (offset + sizeof(uint64_t) <= nbytes) { - const uint64_t *src = (const uint64_t *)(data + offset); - sum = _crc32di(sum, *src); + crc = f8(crc, *(uint64_t*)(data + offset)); offset += sizeof(uint64_t); } // Process any bytes remaining after the last aligned 8-byte block. while (offset < nbytes) { - sum = (uint32_t)_crc32qi(sum, data[offset]); + crc = f1(crc, data[offset]); offset++; } - return sum; + return crc; } -/* CRC32C routines, these use a different polynomial */ -/*****************************************************************/ -/* */ -/* CRC LOOKUP TABLE */ -/* ================ */ -/* The following CRC lookup table was generated automagically */ -/* by the Rocksoft^tm Model CRC Algorithm Table Generation */ -/* Program V1.0 using the following model parameters: */ -/* */ -/* Width : 4 bytes. */ -/* Poly : 0x1EDC6F41L */ -/* Reverse : TRUE. */ -/* */ -/* For more information on the Rocksoft^tm Model CRC Algorithm, */ -/* see the document titled "A Painless Guide to CRC Error */ -/* Detection Algorithms" by Ross Williams */ -/* (ross@guest.adelaide.edu.au.). This document is likely to be */ -/* in the FTP archive "ftp.adelaide.edu.au/pub/rocksoft". */ -/* */ -/*****************************************************************/ - -static const uint32_t crc32Table[256] = { - 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, 0xC79A971FL, - 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, 0x8AD958CFL, 0x78B2DBCCL, - 0x6BE22838L, 0x9989AB3BL, 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, - 0x5E133C24L, 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL, - 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L, 0x9A879FA0L, - 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L, 0x5D1D08BFL, 0xAF768BBCL, - 0xBC267848L, 0x4E4DFB4BL, 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, - 0x33ED7D2AL, 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L, - 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L, 0x6DFE410EL, - 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, 0x30E349B1L, 0xC288CAB2L, - 0xD1D83946L, 0x23B3BA45L, 0xF779DEAEL, 0x05125DADL, 0x1642AE59L, - 0xE4292D5AL, 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL, - 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L, 0x417B1DBCL, - 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, 0x86E18AA3L, 0x748A09A0L, - 0x67DAFA54L, 0x95B17957L, 0xCBA24573L, 0x39C9C670L, 0x2A993584L, - 0xD8F2B687L, 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L, - 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L, 0x96BF4DCCL, - 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, 0xDBFC821CL, 0x2997011FL, - 0x3AC7F2EBL, 0xC8AC71E8L, 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, - 0x0F36E6F7L, 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L, - 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, 0xEB1FCBADL, - 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L, 0x2C855CB2L, 0xDEEEDFB1L, - 0xCDBE2C45L, 0x3FD5AF46L, 0x7198540DL, 0x83F3D70EL, 0x90A324FAL, - 0x62C8A7F9L, 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L, - 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, 0x3CDB9BDDL, - 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L, 0x82F63B78L, 0x709DB87BL, - 0x63CD4B8FL, 0x91A6C88CL, 0x456CAC67L, 0xB7072F64L, 0xA457DC90L, - 0x563C5F93L, 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L, - 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL, 0x92A8FC17L, - 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L, 0x55326B08L, 0xA759E80BL, - 0xB4091BFFL, 0x466298FCL, 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, - 0x0B21572CL, 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L, - 0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L, 0x65D122B9L, - 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL, 0x2892ED69L, 0xDAF96E6AL, - 0xC9A99D9EL, 0x3BC21E9DL, 0xEF087A76L, 0x1D63F975L, 0x0E330A81L, - 0xFC588982L, 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL, - 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L, 0x38CC2A06L, - 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, 0xFF56BD19L, 0x0D3D3E1AL, - 0x1E6DCDEEL, 0xEC064EEDL, 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, - 0xD0DDD530L, 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL, - 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL, 0x8ECEE914L, - 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L, 0xD3D3E1ABL, 0x21B862A8L, - 0x32E8915CL, 0xC083125FL, 0x144976B4L, 0xE622F5B7L, 0xF5720643L, - 0x07198540L, 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L, - 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL, 0xE330A81AL, - 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, 0x24AA3F05L, 0xD6C1BC06L, - 0xC5914FF2L, 0x37FACCF1L, 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, - 0x7AB90321L, 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL, - 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L, 0x34F4F86AL, - 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, 0x79B737BAL, 0x8BDCB4B9L, - 0x988C474DL, 0x6AE7C44EL, 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, - 0xAD7D5351L}; - -static uint32_t singletable_crc32c(uint32_t crc, const uint8_t* buf, size_t size) { - while (size--) crc = crc32Table[(crc ^ *buf++) & 0xff] ^ (crc >> 8); - return crc; +uint32_t crc32c_hw(const uint8_t *data, size_t nbytes, uint32_t crc) { + return do_crc32c(data, nbytes, crc, + [](uint32_t crc, uint8_t b) { + return (uint32_t)_crc32qi(crc, b); + }, + [](uint32_t crc, uint64_t x) { + return (uint32_t)_crc32di(crc, x); + } + ); } /* @@ -237,26 +183,6 @@ static const uint32_t sctp_crc_tableil8_o32[256] = { 0xD5CF889D, 0x27A40B9E, 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351}; -/* - * end of the CRC lookup table crc_tableil8_o32 - */ - - - -/* - * The following CRC lookup table was generated automagically using the - * following model parameters: - * - * Generator Polynomial = ................. 0x1EDC6F41 - * Generator Polynomial Length = .......... 32 bits - * Reflected Bits = ....................... TRUE - * Table Generation Offset = .............. 32 bits - * Number of Slices = ..................... 8 slices - * Slice Lengths = ........................ 8 8 8 8 8 8 8 8 - * Directory Name = ....................... .\ - * File Name = ............................ 8x256_tables.c - */ - static const uint32_t sctp_crc_tableil8_o40[256] = { 0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945, 0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, @@ -302,26 +228,6 @@ static const uint32_t sctp_crc_tableil8_o40[256] = { 0x2D557F4C, 0x3EF7E73B, 0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483}; -/* - * end of the CRC lookup table crc_tableil8_o40 - */ - - - -/* - * The following CRC lookup table was generated automagically using the - * following model parameters: - * - * Generator Polynomial = ................. 0x1EDC6F41 - * Generator Polynomial Length = .......... 32 bits - * Reflected Bits = ....................... TRUE - * Table Generation Offset = .............. 32 bits - * Number of Slices = ..................... 8 slices - * Slice Lengths = ........................ 8 8 8 8 8 8 8 8 - * Directory Name = ....................... .\ - * File Name = ............................ 8x256_tables.c - */ - static const uint32_t sctp_crc_tableil8_o48[256] = { 0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469, 0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, @@ -367,26 +273,6 @@ static const uint32_t sctp_crc_tableil8_o48[256] = { 0x0C158713, 0xA954156D, 0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8}; -/* - * end of the CRC lookup table crc_tableil8_o48 - */ - - - -/* - * The following CRC lookup table was generated automagically using the - * following model parameters: - * - * Generator Polynomial = ................. 0x1EDC6F41 - * Generator Polynomial Length = .......... 32 bits - * Reflected Bits = ....................... TRUE - * Table Generation Offset = .............. 32 bits - * Number of Slices = ..................... 8 slices - * Slice Lengths = ........................ 8 8 8 8 8 8 8 8 - * Directory Name = ....................... .\ - * File Name = ............................ 8x256_tables.c - */ - static const uint32_t sctp_crc_tableil8_o56[256] = { 0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA, 0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, @@ -432,26 +318,6 @@ static const uint32_t sctp_crc_tableil8_o56[256] = { 0x0302211C, 0xDE478BA4, 0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842}; -/* - * end of the CRC lookup table crc_tableil8_o56 - */ - - - -/* - * The following CRC lookup table was generated automagically using the - * following model parameters: - * - * Generator Polynomial = ................. 0x1EDC6F41 - * Generator Polynomial Length = .......... 32 bits - * Reflected Bits = ....................... TRUE - * Table Generation Offset = .............. 32 bits - * Number of Slices = ..................... 8 slices - * Slice Lengths = ........................ 8 8 8 8 8 8 8 8 - * Directory Name = ....................... .\ - * File Name = ............................ 8x256_tables.c - */ - static const uint32_t sctp_crc_tableil8_o64[256] = { 0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44, 0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, @@ -497,26 +363,6 @@ static const uint32_t sctp_crc_tableil8_o64[256] = { 0x5D1E0A9E, 0x650F6532, 0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3}; -/* - * end of the CRC lookup table crc_tableil8_o64 - */ - - - -/* - * The following CRC lookup table was generated automagically using the - * following model parameters: - * - * Generator Polynomial = ................. 0x1EDC6F41 - * Generator Polynomial Length = .......... 32 bits - * Reflected Bits = ....................... TRUE - * Table Generation Offset = .............. 32 bits - * Number of Slices = ..................... 8 slices - * Slice Lengths = ........................ 8 8 8 8 8 8 8 8 - * Directory Name = ....................... .\ - * File Name = ............................ 8x256_tables.c - */ - static const uint32_t sctp_crc_tableil8_o72[256] = { 0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD, 0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, @@ -562,26 +408,6 @@ static const uint32_t sctp_crc_tableil8_o72[256] = { 0x3E8D5D3A, 0xD1BD3623, 0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C}; -/* - * end of the CRC lookup table crc_tableil8_o72 - */ - - - -/* - * The following CRC lookup table was generated automagically using the - * following model parameters: - * - * Generator Polynomial = ................. 0x1EDC6F41 - * Generator Polynomial Length = .......... 32 bits - * Reflected Bits = ....................... TRUE - * Table Generation Offset = .............. 32 bits - * Number of Slices = ..................... 8 slices - * Slice Lengths = ........................ 8 8 8 8 8 8 8 8 - * Directory Name = ....................... .\ - * File Name = ............................ 8x256_tables.c - */ - static const uint32_t sctp_crc_tableil8_o80[256] = { 0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089, 0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, @@ -627,26 +453,6 @@ static const uint32_t sctp_crc_tableil8_o80[256] = { 0x5B3FECD4, 0x333CC01C, 0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F}; -/* - * end of the CRC lookup table crc_tableil8_o80 - */ - - - -/* - * The following CRC lookup table was generated automagically using the - * following model parameters: - * - * Generator Polynomial = ................. 0x1EDC6F41 - * Generator Polynomial Length = .......... 32 bits - * Reflected Bits = ....................... TRUE - * Table Generation Offset = .............. 32 bits - * Number of Slices = ..................... 8 slices - * Slice Lengths = ........................ 8 8 8 8 8 8 8 8 - * Directory Name = ....................... .\ - * File Name = ............................ 8x256_tables.c - */ - static const uint32_t sctp_crc_tableil8_o88[256] = { 0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504, 0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, @@ -692,75 +498,20 @@ static const uint32_t sctp_crc_tableil8_o88[256] = { 0x14124958, 0x5D2E347F, 0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5}; -/* - * end of the CRC lookup table crc_tableil8_o88 - */ - -static uint32_t crc32c_sb8_64_bit(uint32_t crc, const unsigned char *p_buf, - uint32_t length, uint32_t init_bytes) { - uint32_t li; - uint32_t term1, term2; - uint32_t running_length; - uint32_t end_bytes; - - running_length = ((length - init_bytes) / 8) * 8; - end_bytes = length - init_bytes - running_length; - - for (li = 0; li < init_bytes; li++) { - crc = sctp_crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8); - } - for (li = 0; li < running_length / 8; li++) { - crc ^= *(const uint32_t *)p_buf; - p_buf += 4; - - term1 = sctp_crc_tableil8_o88[crc & 0x000000FF] ^ - sctp_crc_tableil8_o80[(crc >> 8) & 0x000000FF]; - term2 = crc >> 16; - crc = term1 ^ sctp_crc_tableil8_o72[term2 & 0x000000FF] ^ - sctp_crc_tableil8_o64[(term2 >> 8) & 0x000000FF]; - - term1 = - sctp_crc_tableil8_o56[(*(const uint32_t *)p_buf) & 0x000000FF] ^ - sctp_crc_tableil8_o48[((*(const uint32_t *)p_buf) >> 8) & 0x000000FF]; - - term2 = (*(const uint32_t *)p_buf) >> 16; - crc = crc ^ term1 ^ sctp_crc_tableil8_o40[term2 & 0x000000FF] ^ - sctp_crc_tableil8_o32[(term2 >> 8) & 0x000000FF]; - p_buf += 4; - } - for (li = 0; li < end_bytes; li++) { - crc = sctp_crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8); - } - return crc; -} - -static uint32_t multitable_crc32c(uint32_t crc32c, const unsigned char *buffer, - unsigned int length) { - uint32_t to_even_word; - - if (length == 0) { - return (crc32c); - } - to_even_word = (4 - (((uintptr_t)buffer) & 0x3)); - return (crc32c_sb8_64_bit(crc32c, buffer, length, to_even_word)); -} - uint32_t crc32c_sw(const uint8_t *buffer, size_t nbytes, uint32_t crc) { - if (nbytes < 4) { - return (singletable_crc32c(crc, buffer, nbytes)); - } else { - return (multitable_crc32c(crc, buffer, nbytes)); - } -} - -uint32_t crc32c_extend(const void *data, size_t nbytes, uint32_t crc) { - return crc32c_func(reinterpret_cast(data), nbytes, crc); -} - -uint32_t crc32c(const void *data, size_t nbytes) { - return crc32c_extend(reinterpret_cast(data), nbytes, 0); -} - -bool is_crc32c_hw_available() { - return crc32c_func == crc32c_hw; + auto f1 = [](uint32_t crc, uint8_t b) { + return sctp_crc_tableil8_o32[(crc ^ b) & 0xff] ^ (crc >> 8); + }; + auto f2 = [](uint32_t crc, uint64_t x) { + x ^= crc; + return sctp_crc_tableil8_o88[(x >> 0) & 0xff] ^ + sctp_crc_tableil8_o80[(x >> 8) & 0xff] ^ + sctp_crc_tableil8_o72[(x >> 16) & 0xff] ^ + sctp_crc_tableil8_o64[(x >> 24) & 0xff] ^ + sctp_crc_tableil8_o56[(x >> 32) & 0xff] ^ + sctp_crc_tableil8_o48[(x >> 40) & 0xff] ^ + sctp_crc_tableil8_o40[(x >> 48) & 0xff] ^ + sctp_crc_tableil8_o32[(x >> 56) & 0xff] ; + }; + return do_crc32c(buffer, nbytes, crc, f1, f2); } diff --git a/common/checksum/crc32c.h b/common/checksum/crc32c.h index 3a42d2ab..6ee1b45e 100644 --- a/common/checksum/crc32c.h +++ b/common/checksum/crc32c.h @@ -15,7 +15,11 @@ limitations under the License. */ #pragma once #include -#include +#include + +uint32_t crc32c_sw(const uint8_t *buffer, size_t nbytes, uint32_t crc); + +uint32_t crc32c_hw(const uint8_t *data, size_t nbytes, uint32_t crc); /** * @brief We recommand using of crc32c() and crc32c_extend(), which can exploit hardware @@ -23,20 +27,24 @@ limitations under the License. * is_crc32c_hw_available() to detect whether hardware acceleartion is available at first; * */ -uint32_t crc32c(const void *data, size_t nbytes); - -uint32_t crc32c_extend(const void *data, size_t nbytes, uint32_t crc); +inline uint32_t crc32c_extend(const void *data, size_t nbytes, uint32_t crc) { + extern uint32_t (*crc32c_auto)(const uint8_t *data, size_t nbytes, uint32_t crc); + return crc32c_auto((uint8_t*)data, nbytes, crc); +} -inline uint32_t crc32c(const std::string &text) { - return crc32c_extend(text.data(), text.size(), 0); +inline uint32_t crc32c(const void *data, size_t nbytes) { + return crc32c_extend((uint8_t*)data, nbytes, 0); } -inline uint32_t crc32c_extend(const std::string &text, uint32_t crc) { +inline uint32_t crc32c_extend(std::string_view text, uint32_t crc = 0) { return crc32c_extend(text.data(), text.size(), crc); } -uint32_t crc32c_sw(const uint8_t *buffer, size_t nbytes, uint32_t crc); - -uint32_t crc32c_hw(const uint8_t *data, size_t nbytes, uint32_t crc); +inline uint32_t crc32c(std::string_view text) { + return crc32c_extend(text); +} -bool is_crc32c_hw_available(); +inline bool is_crc32c_hw_available() { + extern uint32_t (*crc32c_auto)(const uint8_t *data, size_t nbytes, uint32_t crc); + return crc32c_auto == crc32c_hw; +} diff --git a/common/checksum/crc64_ecma_refl_pmull.S b/common/checksum/crc64_ecma_refl_pmull.S new file mode 100644 index 00000000..a1f7cb44 --- /dev/null +++ b/common/checksum/crc64_ecma_refl_pmull.S @@ -0,0 +1,507 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +#ifndef __AARCH64_LABEL_H__ +#define __AARCH64_LABEL_H__ + +#ifdef __USER_LABEL_PREFIX__ +#define CONCAT1(a, b) CONCAT2(a, b) +#define CONCAT2(a, b) a ## b +#define cdecl(x) CONCAT1 (__USER_LABEL_PREFIX__, x) +#else +#define cdecl(x) x +#endif + +#ifdef __APPLE__ +#define ASM_DEF_RODATA .section __TEXT,__const +#else +#define ASM_DEF_RODATA .section .rodata +#endif + +#endif +; #include "aarch64_label.h" + +.equ p4_low_b0, 0x41f3 +.equ p4_low_b1, 0x9dd4 +.equ p4_low_b2, 0xefbb +.equ p4_low_b3, 0x6ae3 +.equ p4_high_b0, 0x2df4 +.equ p4_high_b1, 0xa784 +.equ p4_high_b2, 0x6054 +.equ p4_high_b3, 0x081f + +.equ p1_low_b0, 0x3ae4 +.equ p1_low_b1, 0xca39 +.equ p1_low_b2, 0xd497 +.equ p1_low_b3, 0xe05d +.equ p1_high_b0, 0x5f40 +.equ p1_high_b1, 0xc787 +.equ p1_high_b2, 0x95af +.equ p1_high_b3, 0xdabe + +.equ p0_low_b0, 0x5f40 +.equ p0_low_b1, 0xc787 +.equ p0_low_b2, 0x95af +.equ p0_low_b3, 0xdabe + +.equ br_low_b0, 0x63d5 +.equ br_low_b1, 0x1729 +.equ br_low_b2, 0x466c +.equ br_low_b3, 0x9c3e +.equ br_high_b0, 0x1e85 +.equ br_high_b1, 0xaf0e +.equ br_high_b2, 0xaf2b +.equ br_high_b3, 0x92d8 + + .text +ASM_DEF_RODATA + .align 4 + .set .lanchor_crc_tab,. + 0 +#ifndef __APPLE__ + .type crc64_tab, %object + .size crc64_tab, 2048 +#endif +crc64_tab: + .xword 0x0000000000000000, 0xb32e4cbe03a75f6f + .xword 0xf4843657a840a05b, 0x47aa7ae9abe7ff34 + .xword 0x7bd0c384ff8f5e33, 0xc8fe8f3afc28015c + .xword 0x8f54f5d357cffe68, 0x3c7ab96d5468a107 + .xword 0xf7a18709ff1ebc66, 0x448fcbb7fcb9e309 + .xword 0x0325b15e575e1c3d, 0xb00bfde054f94352 + .xword 0x8c71448d0091e255, 0x3f5f08330336bd3a + .xword 0x78f572daa8d1420e, 0xcbdb3e64ab761d61 + .xword 0x7d9ba13851336649, 0xceb5ed8652943926 + .xword 0x891f976ff973c612, 0x3a31dbd1fad4997d + .xword 0x064b62bcaebc387a, 0xb5652e02ad1b6715 + .xword 0xf2cf54eb06fc9821, 0x41e11855055bc74e + .xword 0x8a3a2631ae2dda2f, 0x39146a8fad8a8540 + .xword 0x7ebe1066066d7a74, 0xcd905cd805ca251b + .xword 0xf1eae5b551a2841c, 0x42c4a90b5205db73 + .xword 0x056ed3e2f9e22447, 0xb6409f5cfa457b28 + .xword 0xfb374270a266cc92, 0x48190ecea1c193fd + .xword 0x0fb374270a266cc9, 0xbc9d3899098133a6 + .xword 0x80e781f45de992a1, 0x33c9cd4a5e4ecdce + .xword 0x7463b7a3f5a932fa, 0xc74dfb1df60e6d95 + .xword 0x0c96c5795d7870f4, 0xbfb889c75edf2f9b + .xword 0xf812f32ef538d0af, 0x4b3cbf90f69f8fc0 + .xword 0x774606fda2f72ec7, 0xc4684a43a15071a8 + .xword 0x83c230aa0ab78e9c, 0x30ec7c140910d1f3 + .xword 0x86ace348f355aadb, 0x3582aff6f0f2f5b4 + .xword 0x7228d51f5b150a80, 0xc10699a158b255ef + .xword 0xfd7c20cc0cdaf4e8, 0x4e526c720f7dab87 + .xword 0x09f8169ba49a54b3, 0xbad65a25a73d0bdc + .xword 0x710d64410c4b16bd, 0xc22328ff0fec49d2 + .xword 0x85895216a40bb6e6, 0x36a71ea8a7ace989 + .xword 0x0adda7c5f3c4488e, 0xb9f3eb7bf06317e1 + .xword 0xfe5991925b84e8d5, 0x4d77dd2c5823b7ba + .xword 0x64b62bcaebc387a1, 0xd7986774e864d8ce + .xword 0x90321d9d438327fa, 0x231c512340247895 + .xword 0x1f66e84e144cd992, 0xac48a4f017eb86fd + .xword 0xebe2de19bc0c79c9, 0x58cc92a7bfab26a6 + .xword 0x9317acc314dd3bc7, 0x2039e07d177a64a8 + .xword 0x67939a94bc9d9b9c, 0xd4bdd62abf3ac4f3 + .xword 0xe8c76f47eb5265f4, 0x5be923f9e8f53a9b + .xword 0x1c4359104312c5af, 0xaf6d15ae40b59ac0 + .xword 0x192d8af2baf0e1e8, 0xaa03c64cb957be87 + .xword 0xeda9bca512b041b3, 0x5e87f01b11171edc + .xword 0x62fd4976457fbfdb, 0xd1d305c846d8e0b4 + .xword 0x96797f21ed3f1f80, 0x2557339fee9840ef + .xword 0xee8c0dfb45ee5d8e, 0x5da24145464902e1 + .xword 0x1a083bacedaefdd5, 0xa9267712ee09a2ba + .xword 0x955cce7fba6103bd, 0x267282c1b9c65cd2 + .xword 0x61d8f8281221a3e6, 0xd2f6b4961186fc89 + .xword 0x9f8169ba49a54b33, 0x2caf25044a02145c + .xword 0x6b055fede1e5eb68, 0xd82b1353e242b407 + .xword 0xe451aa3eb62a1500, 0x577fe680b58d4a6f + .xword 0x10d59c691e6ab55b, 0xa3fbd0d71dcdea34 + .xword 0x6820eeb3b6bbf755, 0xdb0ea20db51ca83a + .xword 0x9ca4d8e41efb570e, 0x2f8a945a1d5c0861 + .xword 0x13f02d374934a966, 0xa0de61894a93f609 + .xword 0xe7741b60e174093d, 0x545a57dee2d35652 + .xword 0xe21ac88218962d7a, 0x5134843c1b317215 + .xword 0x169efed5b0d68d21, 0xa5b0b26bb371d24e + .xword 0x99ca0b06e7197349, 0x2ae447b8e4be2c26 + .xword 0x6d4e3d514f59d312, 0xde6071ef4cfe8c7d + .xword 0x15bb4f8be788911c, 0xa6950335e42fce73 + .xword 0xe13f79dc4fc83147, 0x521135624c6f6e28 + .xword 0x6e6b8c0f1807cf2f, 0xdd45c0b11ba09040 + .xword 0x9aefba58b0476f74, 0x29c1f6e6b3e0301b + .xword 0xc96c5795d7870f42, 0x7a421b2bd420502d + .xword 0x3de861c27fc7af19, 0x8ec62d7c7c60f076 + .xword 0xb2bc941128085171, 0x0192d8af2baf0e1e + .xword 0x4638a2468048f12a, 0xf516eef883efae45 + .xword 0x3ecdd09c2899b324, 0x8de39c222b3eec4b + .xword 0xca49e6cb80d9137f, 0x7967aa75837e4c10 + .xword 0x451d1318d716ed17, 0xf6335fa6d4b1b278 + .xword 0xb199254f7f564d4c, 0x02b769f17cf11223 + .xword 0xb4f7f6ad86b4690b, 0x07d9ba1385133664 + .xword 0x4073c0fa2ef4c950, 0xf35d8c442d53963f + .xword 0xcf273529793b3738, 0x7c0979977a9c6857 + .xword 0x3ba3037ed17b9763, 0x888d4fc0d2dcc80c + .xword 0x435671a479aad56d, 0xf0783d1a7a0d8a02 + .xword 0xb7d247f3d1ea7536, 0x04fc0b4dd24d2a59 + .xword 0x3886b22086258b5e, 0x8ba8fe9e8582d431 + .xword 0xcc0284772e652b05, 0x7f2cc8c92dc2746a + .xword 0x325b15e575e1c3d0, 0x8175595b76469cbf + .xword 0xc6df23b2dda1638b, 0x75f16f0cde063ce4 + .xword 0x498bd6618a6e9de3, 0xfaa59adf89c9c28c + .xword 0xbd0fe036222e3db8, 0x0e21ac88218962d7 + .xword 0xc5fa92ec8aff7fb6, 0x76d4de52895820d9 + .xword 0x317ea4bb22bfdfed, 0x8250e80521188082 + .xword 0xbe2a516875702185, 0x0d041dd676d77eea + .xword 0x4aae673fdd3081de, 0xf9802b81de97deb1 + .xword 0x4fc0b4dd24d2a599, 0xfceef8632775faf6 + .xword 0xbb44828a8c9205c2, 0x086ace348f355aad + .xword 0x34107759db5dfbaa, 0x873e3be7d8faa4c5 + .xword 0xc094410e731d5bf1, 0x73ba0db070ba049e + .xword 0xb86133d4dbcc19ff, 0x0b4f7f6ad86b4690 + .xword 0x4ce50583738cb9a4, 0xffcb493d702be6cb + .xword 0xc3b1f050244347cc, 0x709fbcee27e418a3 + .xword 0x3735c6078c03e797, 0x841b8ab98fa4b8f8 + .xword 0xadda7c5f3c4488e3, 0x1ef430e13fe3d78c + .xword 0x595e4a08940428b8, 0xea7006b697a377d7 + .xword 0xd60abfdbc3cbd6d0, 0x6524f365c06c89bf + .xword 0x228e898c6b8b768b, 0x91a0c532682c29e4 + .xword 0x5a7bfb56c35a3485, 0xe955b7e8c0fd6bea + .xword 0xaeffcd016b1a94de, 0x1dd181bf68bdcbb1 + .xword 0x21ab38d23cd56ab6, 0x9285746c3f7235d9 + .xword 0xd52f0e859495caed, 0x6601423b97329582 + .xword 0xd041dd676d77eeaa, 0x636f91d96ed0b1c5 + .xword 0x24c5eb30c5374ef1, 0x97eba78ec690119e + .xword 0xab911ee392f8b099, 0x18bf525d915feff6 + .xword 0x5f1528b43ab810c2, 0xec3b640a391f4fad + .xword 0x27e05a6e926952cc, 0x94ce16d091ce0da3 + .xword 0xd3646c393a29f297, 0x604a2087398eadf8 + .xword 0x5c3099ea6de60cff, 0xef1ed5546e415390 + .xword 0xa8b4afbdc5a6aca4, 0x1b9ae303c601f3cb + .xword 0x56ed3e2f9e224471, 0xe5c372919d851b1e + .xword 0xa26908783662e42a, 0x114744c635c5bb45 + .xword 0x2d3dfdab61ad1a42, 0x9e13b115620a452d + .xword 0xd9b9cbfcc9edba19, 0x6a978742ca4ae576 + .xword 0xa14cb926613cf817, 0x1262f598629ba778 + .xword 0x55c88f71c97c584c, 0xe6e6c3cfcadb0723 + .xword 0xda9c7aa29eb3a624, 0x69b2361c9d14f94b + .xword 0x2e184cf536f3067f, 0x9d36004b35545910 + .xword 0x2b769f17cf112238, 0x9858d3a9ccb67d57 + .xword 0xdff2a94067518263, 0x6cdce5fe64f6dd0c + .xword 0x50a65c93309e7c0b, 0xe388102d33392364 + .xword 0xa4226ac498dedc50, 0x170c267a9b79833f + .xword 0xdcd7181e300f9e5e, 0x6ff954a033a8c131 + .xword 0x28532e49984f3e05, 0x9b7d62f79be8616a + .xword 0xa707db9acf80c06d, 0x14299724cc279f02 + .xword 0x5383edcd67c06036, 0xe0ada17364673f59 +; #include "crc64_ecma_refl_pmull.h" + +// parameters +#define w_seed w0 +#define x_seed x0 +#define x_buf x1 +#define w_len w2 +#define x_len x2 + +// return +#define w_crc_ret w0 +#define x_crc_ret x0 + +// constant +#define FOLD_SIZE 64 + +// global variables +#define x_buf_end x3 +#define w_counter w4 +#define x_counter x4 +#define x_buf_iter x5 +#define x_crc_tab_addr x6 +#define x_tmp2 x6 +#define w_tmp w7 +#define x_tmp x7 + +#define v_x0 v0 +#define d_x0 d0 +#define s_x0 s0 + +#define q_x1 q1 +#define v_x1 v1 + +#define q_x2 q2 +#define v_x2 v2 + +#define q_x3 q3 +#define v_x3 v3 +#define d_x3 d3 +#define s_x3 s3 + +#define q_y0 q4 +#define v_y0 v4 +#define v_tmp_high v4 +#define d_tmp_high d4 + +#define q_y1 q5 +#define v_y1 v5 +#define v_tmp_low v5 + +#define q_y2 q6 +#define v_y2 v6 + +#define q_y3 q7 +#define v_y3 v7 + +#define q_x0_tmp q30 +#define v_x0_tmp v30 +#define d_p4_high v30.d[1] +#define d_p4_low d30 +#define v_p4 v30 +#define d_p1_high v30.d[1] +#define d_p1_low d30 +#define v_p1 v30 +#define d_p0_high v30.d[1] +#define d_p0_low d30 +#define v_p0 v30 +#define d_br_low d30 +#define d_br_low2 v30.d[1] +#define v_br_low v30 + +#define q_shuffle q31 +#define v_shuffle v31 +#define d_br_high d31 +#define d_br_high2 v31.d[1] +#define v_br_high v31 +#define d_p0_low2 d31 +#define d_p0_high2 v31.d[1] +#define v_p02 v31 + +#define v_x0_high v16 +#define v_x1_high v17 +#define v_x2_high v18 +#define v_x3_high v19 + +.macro crc_refl_load_first_block + ldr q_x0_tmp, [x_buf] + ldr q_x1, [x_buf, 16] + ldr q_x2, [x_buf, 32] + ldr q_x3, [x_buf, 48] + + and x_counter, x_len, -64 + sub x_tmp, x_counter, #64 + cmp x_tmp, 63 + + add x_buf_iter, x_buf, 64 + + eor v_x0.16b, v_x0.16b, v_x0_tmp.16b +.endm + +.macro crc64_load_p4 + add x_buf_end, x_buf_iter, x_tmp + + mov x_tmp, p4_low_b0 + movk x_tmp, p4_low_b1, lsl 16 + movk x_tmp, p4_low_b2, lsl 32 + movk x_tmp, p4_low_b3, lsl 48 + fmov d_p4_low, x_tmp + + mov x_tmp2, p4_high_b0 + movk x_tmp2, p4_high_b1, lsl 16 + movk x_tmp2, p4_high_b2, lsl 32 + movk x_tmp2, p4_high_b3, lsl 48 + fmov d_p4_high, x_tmp2 +.endm + +.macro crc_refl_loop + .align 3 +.clmul_loop: + // interleave ldr and pmull(2) for arch which can only issue quadword load every + // other cycle (i.e. A55) + ldr q_y0, [x_buf_iter] + pmull2 v_x0_high.1q, v_x0.2d, v_p4.2d + ldr q_y1, [x_buf_iter, 16] + pmull2 v_x1_high.1q, v_x1.2d, v_p4.2d + ldr q_y2, [x_buf_iter, 32] + pmull2 v_x2_high.1q, v_x2.2d, v_p4.2d + ldr q_y3, [x_buf_iter, 48] + pmull2 v_x3_high.1q, v_x3.2d, v_p4.2d + + pmull v_x0.1q, v_x0.1d, v_p4.1d + add x_buf_iter, x_buf_iter, 64 + pmull v_x1.1q, v_x1.1d, v_p4.1d + cmp x_buf_iter, x_buf_end + pmull v_x2.1q, v_x2.1d, v_p4.1d + pmull v_x3.1q, v_x3.1d, v_p4.1d + + eor v_x0.16b, v_x0_high.16b, v_x0.16b + eor v_x0.16b, v_x0.16b, v_y0.16b + + eor v_x1.16b, v_x1_high.16b, v_x1.16b + eor v_x1.16b, v_x1.16b, v_y1.16b + + eor v_x2.16b, v_x2_high.16b, v_x2.16b + eor v_x2.16b, v_x2.16b, v_y2.16b + + eor v_x3.16b, v_x3_high.16b, v_x3.16b + eor v_x3.16b, v_x3.16b, v_y3.16b + bne .clmul_loop +.endm + +.macro crc64_fold_512b_to_128b + mov x_tmp, p1_low_b0 + movk x_tmp, p1_low_b1, lsl 16 + movk x_tmp, p1_low_b2, lsl 32 + movk x_tmp, p1_low_b3, lsl 48 + fmov d_p1_low, x_tmp + + mov x_tmp2, p1_high_b0 + movk x_tmp2, p1_high_b1, lsl 16 + movk x_tmp2, p1_high_b2, lsl 32 + movk x_tmp2, p1_high_b3, lsl 48 + fmov d_p1_high, x_tmp2 + + pmull2 v_tmp_high.1q, v_x0.2d, v_p1.2d + pmull v_tmp_low.1q, v_x0.1d, v_p1.1d + eor v_x1.16b, v_x1.16b, v_tmp_high.16b + eor v_x1.16b, v_x1.16b, v_tmp_low.16b + + pmull2 v_tmp_high.1q, v_x1.2d, v_p1.2d + pmull v_tmp_low.1q, v_x1.1d, v_p1.1d + eor v_x2.16b, v_x2.16b, v_tmp_high.16b + eor v_x2.16b, v_x2.16b, v_tmp_low.16b + + pmull2 v_tmp_high.1q, v_x2.2d, v_p1.2d + pmull v_tmp_low.1q, v_x2.1d, v_p1.1d + eor v_x3.16b, v_x3.16b, v_tmp_high.16b + eor v_x3.16b, v_x3.16b, v_tmp_low.16b +.endm +; #include "crc_common_pmull.h" + +.macro crc64_refl_func name:req + .arch armv8-a+crypto + .text + .align 3 + .global cdecl(\name) +#ifndef __APPLE__ + .type \name, %function +#endif + +/* uint64_t crc64_refl_func(uint64_t seed, const uint8_t * buf, uint64_t len) */ + +cdecl(\name\()): + mvn x_seed, x_seed + mov x_counter, 0 + cmp x_len, (FOLD_SIZE-1) + bhi .crc_clmul_pre + +.crc_tab_pre: + cmp x_len, x_counter + bls .done + +#ifndef __APPLE__ + adrp x_tmp, .lanchor_crc_tab + add x_buf_iter, x_buf, x_counter + add x_buf, x_buf, x_len + add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab +#else + adrp x_tmp, .lanchor_crc_tab@PAGE + add x_buf_iter, x_buf, x_counter + add x_buf, x_buf, x_len + add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF +#endif + + .align 3 +.loop_crc_tab: + ldrb w_tmp, [x_buf_iter], 1 + eor w_tmp, w_tmp, w0 + cmp x_buf, x_buf_iter + and x_tmp, x_tmp, 255 + ldr x_tmp, [x_crc_tab_addr, x_tmp, lsl 3] + eor x_seed, x_tmp, x_seed, lsr 8 + bne .loop_crc_tab + +.done: + mvn x_crc_ret, x_seed + ret + + .align 2 +.crc_clmul_pre: + fmov d_x0, x_seed // save crc to d_x0 + + crc_refl_load_first_block + + bls .clmul_loop_end + + crc64_load_p4 + +// 1024bit --> 512bit loop +// merge x0, x1, x2, x3, y0, y1, y2, y3 => x0, x1, x2, x3 (uint64x2_t) + crc_refl_loop + +.clmul_loop_end: +// folding 512bit --> 128bit + crc64_fold_512b_to_128b + +// folding 128bit --> 64bit + mov x_tmp, p0_low_b0 + movk x_tmp, p0_low_b1, lsl 16 + movk x_tmp, p0_low_b2, lsl 32 + movk x_tmp, p0_low_b3, lsl 48 + fmov d_p0_low, x_tmp + + pmull v_tmp_low.1q, v_x3.1d, v_p0.1d + + mov d_tmp_high, v_x3.d[1] + + eor v_x3.16b, v_tmp_high.16b, v_tmp_low.16b + +// barrett reduction + mov x_tmp, br_low_b0 + movk x_tmp, br_low_b1, lsl 16 + movk x_tmp, br_low_b2, lsl 32 + movk x_tmp, br_low_b3, lsl 48 + fmov d_br_low, x_tmp + + mov x_tmp2, br_high_b0 + movk x_tmp2, br_high_b1, lsl 16 + movk x_tmp2, br_high_b2, lsl 32 + movk x_tmp2, br_high_b3, lsl 48 + fmov d_br_high, x_tmp2 + + pmull v_tmp_low.1q, v_x3.1d, v_br_low.1d + pmull v_tmp_high.1q, v_tmp_low.1d, v_br_high.1d + + ext v_tmp_low.16b, v_br_low.16b, v_tmp_low.16b, #8 + + eor v_tmp_low.16b, v_tmp_high.16b, v_tmp_low.16b + eor v_tmp_low.16b, v_x3.16b, v_tmp_low.16b + umov x_crc_ret, v_tmp_low.d[1] + + b .crc_tab_pre +#ifndef __APPLE__ + .size \name, .-\name +#endif +.endm +; #include "crc64_refl_common_pmull.h" + +crc64_refl_func crc64_ecma_refl_pmull__ diff --git a/common/checksum/crc64ecma.cpp b/common/checksum/crc64ecma.cpp new file mode 100644 index 00000000..eacaaffa --- /dev/null +++ b/common/checksum/crc64ecma.cpp @@ -0,0 +1,231 @@ +/* crc64.c -- compute CRC-64 + * Copyright (C) 2013 Mark Adler + * Version 1.4 16 Dec 2013 Mark Adler + */ + +/* + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler + madler@alumni.caltech.edu + */ + +/* Compute CRC-64 in the manner of xz, using the ECMA-182 polynomial, + bit-reversed, with one's complement pre and post processing. Provide a + means to combine separately computed CRC-64's. */ + +/* Version history: + 1.0 13 Dec 2013 First version + 1.1 13 Dec 2013 Fix comments in test code + 1.2 14 Dec 2013 Determine endianess at run time + 1.3 15 Dec 2013 Add eight-byte processing for big endian as well + Make use of the pthread library optional + 1.4 16 Dec 2013 Make once variable volatile for limited thread protection + */ + +#include +#include +#include + +/* The include of pthread.h below can be commented out in order to not use the + pthread library for table initialization. In that case, the initialization + will not be thread-safe. That's fine, so long as it can be assured that + there is only one thread using crc64(). */ +#include /* link with -lpthread */ + +/* 64-bit CRC polynomial with these coefficients, but reversed: + 64, 62, 57, 55, 54, 53, 52, 47, 46, 45, 40, 39, 38, 37, 35, 33, 32, + 31, 29, 27, 24, 23, 22, 21, 19, 17, 13, 12, 10, 9, 7, 4, 1, 0 */ +#define POLY UINT64_C(0xc96c5795d7870f42) + +/* Tables for CRC calculation -- filled in by initialization functions that are + called once. These could be replaced by constant tables generated in the + same way. There are two tables, one for each endianess. Since these are + static, i.e. local, one should be compiled out of existence if the compiler + can evaluate the endianess check in crc64() at compile time. */ +#include +using Table = std::tuple; +static std::unique_ptr crc64_table; +#define crc64_little_table std::get<0>(*crc64_table) +#define crc64_big_table std::get<0>(*crc64_table) + +/* Fill in the CRC-64 constants table. */ +static void crc64_init(uint64_t [][256]) +{ + unsigned n, k; + uint64_t crc; + + crc64_table.reset(new Table); + auto& table = std::get<0>(*crc64_table); + /* generate CRC-64's for all single byte sequences */ + for (n = 0; n < 256; n++) { + crc = n; + for (k = 0; k < 8; k++) + crc = crc & 1 ? POLY ^ (crc >> 1) : crc >> 1; + table[0][n] = crc; + } + + /* generate CRC-64's for those followed by 1 to 7 zeros */ + for (n = 0; n < 256; n++) { + crc = table[0][n]; + for (k = 1; k < 8; k++) { + crc = table[0][crc & 0xff] ^ (crc >> 8); + table[k][n] = crc; + } + } +} + +/* This function is called once to initialize the CRC-64 table for use on a + little-endian architecture. */ +static void crc64_little_init(void) +{ + crc64_init(crc64_little_table); +} + +/* Reverse the bytes in a 64-bit word. */ +static inline uint64_t rev8(uint64_t a) +{ + uint64_t m; + + m = UINT64_C(0xff00ff00ff00ff); + a = ((a >> 8) & m) | (a & m) << 8; + m = UINT64_C(0xffff0000ffff); + a = ((a >> 16) & m) | (a & m) << 16; + return a >> 32 | a << 32; +} + +/* This function is called once to initialize the CRC-64 table for use on a + big-endian architecture. */ +static void crc64_big_init(void) +{ + unsigned k, n; + + crc64_init(crc64_big_table); + for (k = 0; k < 8; k++) + for (n = 0; n < 256; n++) + crc64_big_table[k][n] = rev8(crc64_big_table[k][n]); +} + +/* Run the init() function exactly once. If pthread.h is not included, then + this macro will use a simple static state variable for the purpose, which is + not thread-safe. The init function must be of the type void init(void). */ +#ifdef PTHREAD_ONCE_INIT +# define ONCE(init) \ + do { \ + static pthread_once_t once = PTHREAD_ONCE_INIT; \ + pthread_once(&once, init); \ + } while (0) +#else +# define ONCE(init) \ + do { \ + static volatile int once = 1; \ + if (once) { \ + if (once++ == 1) { \ + init(); \ + once = 0; \ + } \ + else \ + while (once) \ + ; \ + } \ + } while (0) +#endif + +/* Calculate a CRC-64 eight bytes at a time on a little-endian architecture. */ +static inline uint64_t crc64_little(uint64_t crc, const void *buf, size_t len) +{ + auto next = (unsigned char *)buf; + + ONCE(crc64_little_init); + crc = ~crc; + while (len && ((uintptr_t)next & 7) != 0) { + crc = crc64_little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8); + len--; + } + while (len >= 8) { + crc ^= *(uint64_t *)next; + crc = crc64_little_table[7][crc & 0xff] ^ + crc64_little_table[6][(crc >> 8) & 0xff] ^ + crc64_little_table[5][(crc >> 16) & 0xff] ^ + crc64_little_table[4][(crc >> 24) & 0xff] ^ + crc64_little_table[3][(crc >> 32) & 0xff] ^ + crc64_little_table[2][(crc >> 40) & 0xff] ^ + crc64_little_table[1][(crc >> 48) & 0xff] ^ + crc64_little_table[0][crc >> 56]; + next += 8; + len -= 8; + } + while (len) { + crc = crc64_little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8); + len--; + } + return ~crc; +} + +/* Calculate a CRC-64 eight bytes at a time on a big-endian architecture. */ +static inline uint64_t crc64_big(uint64_t crc, const void *buf, size_t len) +{ + auto next = (unsigned char *)buf; + + ONCE(crc64_big_init); + crc = ~rev8(crc); + while (len && ((uintptr_t)next & 7) != 0) { + crc = crc64_big_table[0][(crc >> 56) ^ *next++] ^ (crc << 8); + len--; + } + while (len >= 8) { + crc ^= *(uint64_t *)next; + crc = crc64_big_table[0][crc & 0xff] ^ + crc64_big_table[1][(crc >> 8) & 0xff] ^ + crc64_big_table[2][(crc >> 16) & 0xff] ^ + crc64_big_table[3][(crc >> 24) & 0xff] ^ + crc64_big_table[4][(crc >> 32) & 0xff] ^ + crc64_big_table[5][(crc >> 40) & 0xff] ^ + crc64_big_table[6][(crc >> 48) & 0xff] ^ + crc64_big_table[7][crc >> 56]; + next += 8; + len -= 8; + } + while (len) { + crc = crc64_big_table[0][(crc >> 56) ^ *next++] ^ (crc << 8); + len--; + } + return ~rev8(crc); +} + +/* Return the CRC-64 of buf[0..len-1] with initial crc, processing eight bytes + at a time. This selects one of two routines depending on the endianess of + the architecture. A good optimizing compiler will determine the endianess + at compile time if it can, and get rid of the unused code and table. If the + endianess can be changed at run time, then this code will handle that as + well, initializing and using two tables, if called upon to do so. */ +uint64_t crc64ecma_sw(const uint8_t *buf, size_t len, uint64_t crc) { + uint64_t n = 1; + return *(char *)&n ? crc64_little(crc, buf, len) : + crc64_big(crc, buf, len); +} + +extern "C" uint64_t crc64_ecma_refl_pmull__(uint64_t seed, const uint8_t *buf, uint64_t len); + +uint64_t crc64ecma_hw(const uint8_t *buf, size_t len, uint64_t crc) { +#ifdef __aarch64__ + return crc64_ecma_refl_pmull__(crc, buf, len); +#else + // todo: use Intel ISA-L + return crc64ecma_sw(buf, len, crc); +#endif +} diff --git a/common/checksum/crc64ecma.h b/common/checksum/crc64ecma.h new file mode 100644 index 00000000..fa1bb305 --- /dev/null +++ b/common/checksum/crc64ecma.h @@ -0,0 +1,43 @@ +/* +Copyright 2022 The Photon Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once +#include +#include + +uint64_t crc64ecma_sw(const uint8_t *buffer, size_t nbytes, uint64_t crc); +uint64_t crc64ecma_hw(const uint8_t *buffer, size_t nbytes, uint64_t crc); + +inline uint64_t crc64ecma_extend(const void *data, size_t nbytes, uint64_t crc) { + extern uint64_t (*crc64ecma_auto)(const uint8_t *data, size_t nbytes, uint64_t crc); + return crc64ecma_auto((uint8_t*)data, nbytes, crc); +} + +inline uint64_t crc64ecma_extend(std::string_view text, uint64_t crc = 0) { + return crc64ecma_extend(text.data(), text.size(), crc); +} + +inline uint32_t crc64ecma(std::string_view text) { + return crc64ecma_extend(text); +} + +inline uint64_t crc64ecma(const void *buffer, size_t nbytes, uint64_t crc = 0) { + return crc64ecma_extend(buffer, nbytes, crc); +} + +inline bool is_crc64ecma_hw_available() { + extern uint64_t (*crc64ecma_auto)(const uint8_t *data, size_t nbytes, uint64_t crc); + return crc64ecma_auto == crc64ecma_hw; +} diff --git a/common/checksum/test/checksum.crc64 b/common/checksum/test/checksum.crc64 new file mode 100644 index 00000000..9b20e329 --- /dev/null +++ b/common/checksum/test/checksum.crc64 @@ -0,0 +1,512 @@ + 3675645893302102789 + 13575383233297952838 + 3231342946509354535 + 4367691578187800762 + 291572174157288946 + 15028124401329963252 + 17014779337585528422 + 7472864907675438169 + 11053793676921073550 + 3605476949374759924 + 6968019153079245888 + 7533137815332682114 + 9435932015950401840 + 9148968571860805934 + 14432684054188465790 + 7462632362787677257 + 7982443057201898176 + 11855695972569282553 + 15366493431182141777 + 3220404658222730942 + 166793581594187238 + 12346626414356130503 + 5820424956931456042 + 17461823117744195926 + 3437242385298896483 + 2780542265819075119 + 272588841074876900 + 6455570711871082130 + 9776439656859975616 + 6242512412415265925 + 5266465564011430308 + 5096664403121515872 + 2563008760762659315 + 1977708729002323441 + 6692528182942807385 + 16066548174710698499 + 8598418529517870334 + 6893095892081048213 + 14061663413440516349 + 17019956380734428150 + 17238001880848458952 + 12043657883101059650 + 16000193783518722284 + 15236004228351008676 + 17529615381412136683 + 6949665614061830261 + 2291720192163502169 + 3413183933714370674 + 15527409143567426453 + 8559349612884769585 + 10145701057055578917 + 14832562630011312523 + 5379824476256332236 + 6145564891850378094 + 2353095822024071400 + 6525739549449249046 + 13422841307811499766 + 12384916970906049820 + 5555036215006715564 + 14549452389470506583 + 16230369325893357186 + 13690984167305151040 + 6066101059331622939 + 13950462258569324076 + 14480304220909953841 + 14813431790713122483 + 4850565333566413960 + 14067531948433845723 + 3319219039967132441 + 3461338562224177923 + 18268676878873766368 + 15021496225322344904 + 8062123443099406939 + 15828474123762140387 + 15010847560398489189 + 6753527275098338156 + 12291790305805041087 + 8801097039422467995 + 4000512575340316147 + 1748416779352845752 + 9223304329144121868 + 8643164160622673206 + 4710100935417359544 + 13211401236549884561 + 13490695293695448403 + 3004331170400441989 + 14202218345514910571 + 12413879521331287029 + 15205106657650763471 + 7678581411100242139 + 16092328695417661592 + 13497456137996522209 + 12578506088388608199 + 7834331611433682439 + 8254857329234604557 + 3899792581377884592 + 17720460694149853967 + 5569122774645687170 + 643115854471100197 + 4693522282388589170 + 15497463510892026127 + 13914647926147645245 + 2907108223172916 + 283430118176918967 + 4664435318249208470 + 5713738099521438952 + 7387381409011786765 + 14178886605126496135 + 13619004627836256412 + 3982184096955930012 + 9576763920211647142 + 14557402521382095790 + 10288299666603789750 + 4833488278617031659 + 15457168767848598424 + 5754895559604413337 + 5711726096055333617 + 6975563015898070977 + 6152770729218755192 + 16742489449855320693 + 7247764814923185000 + 7023121927399233302 + 5238584495820150369 + 12285886033849180890 + 16611257634685608586 + 13280268554304491862 + 6272638245688029365 + 10196107758201456944 + 4546734394345678904 + 10349454260502732491 + 3340726913691323306 + 5041786933067064151 + 16326808388500938898 + 13505556197084861545 + 10621608474111610008 + 5529623431127887075 + 12492290790569894852 + 11613285437483635647 + 4618902571955208776 + 1179605513722975442 + 1509410478697805085 + 5660417593586828729 + 13002510355655922309 + 18014135240494484968 + 16880248534872429585 + 3740458051569469969 + 9232133013863102496 + 17475095711723620754 + 18058405225694959950 + 11159479187753079462 + 9861995408041397717 + 7697798172238796016 + 16920476357524218056 + 5802822380459211424 + 10030414017252607936 + 6035834778378040890 + 13175304857422050837 + 13941746459697458578 + 9742422692011676067 + 10271110174071200044 + 4547170379373949416 + 12541292372195717412 + 4332458232275946877 + 12318629803273969153 + 8586037168367691602 + 7987101543150449283 + 13731422176117240764 + 14939271039062684224 + 15783632091755434563 + 15789305322237708067 + 17848276411596632938 + 7053763765548643934 + 3395777236412981980 + 6137533786125991525 + 17093370046287534086 + 711120688704321312 + 14877494942260099354 + 906344198000509766 + 3036443741128475758 + 17098151302748294332 + 8819203384936990506 + 502801836050663734 + 4250387197919840639 + 6724987020658279779 + 2301606073182866524 + 7980156167489379240 + 10179940738955629147 + 12020965448626459370 + 16805232423094194931 + 1997507046919404818 + 5595490004180117772 + 1095951645234621867 + 12860284606197956951 + 3006371775100816465 + 1538470143029859035 + 10990686529243401550 + 6281441205434422761 + 17258550512580943566 + 11194164693136769663 + 2335662765789290849 + 10849839921452614506 + 1897669995286041117 + 8617385137219123486 + 8645460869578253621 + 10369615344008878278 + 2888935134627221014 + 5467336011823116485 + 11240408713887477073 + 11068369933166174831 + 2336018004865546625 + 13683275116546176522 + 17898535074001754962 + 11926551095232924190 + 771080252708178383 + 15734874452272594342 + 14580670382273717769 + 9288269866115010444 + 14309215983365080399 + 11496761908746812605 + 8961525670598514400 + 16853212429874044087 + 10054942579164177894 + 10576060529095595255 + 16873169473044436168 + 1443675893467692701 + 9407924856390126071 + 12507545973388336545 + 558271068641131890 + 2580746748845696291 + 14870169209800486600 + 11202834635654405199 + 11952697586612022041 + 9261978407187503553 + 16205747401435211339 + 16672242519483985552 + 13886945689398787220 + 603833500019293658 + 2922843080411118003 + 4652246732152320866 + 7223196482914814041 + 16253505433954232964 + 14253976232801935447 + 16226997956950807212 + 4450147005499392768 + 3466354650467154043 + 6808496856085718575 + 10358772059012940485 + 11345280726708325017 + 13480040316160535043 + 13449439132088294525 + 10621251537002725712 + 1192806390130035985 + 8410598943675863164 + 16751312578565345391 + 7060458441478549181 + 152518779198326854 + 3039106620426714021 + 17336986321833918130 + 183725735365460886 + 14209797355350005566 + 14816322370537402311 + 16556821618827162618 + 17235374509141591592 + 17835941644510261163 + 14588612784430339291 + 8068313868882843310 + 17768933582590528037 + 4950042461813724296 + 18026328174633002387 + 8119673136482509773 + 11732703428066496642 + 3537365755888717942 + 15135730369202322066 + 13292504056513665614 + 6272625651230524222 + 268514710707725237 + 14529922311705877226 + 6643500425815173742 + 6716338243683695232 + 5716977617110786152 + 6712156408612805450 + 1525912030212476300 + 9567315371957424510 + 10411637680289835296 + 4800729070209434321 + 11367040293104977497 + 7943640439633885922 + 15469807117382450721 + 10324804673507392349 + 15925221547802936816 + 7167999032755054539 + 1354111775254115540 + 1717460813215978496 + 8618370596856376408 + 3051351612464660801 + 6105551199112099096 + 13423445288729437282 + 7485714862179804851 + 13222265404217591067 + 3890714424930550010 + 7174557996355104334 + 16255639257226082466 + 9846998936298255423 + 4957378091228311862 + 12943848520592150037 + 3724799867068708683 + 16269114859884108539 + 16825229767771416315 + 6510460492287673768 + 14967192027093521526 + 15328328101831300588 + 2655464773986738927 + 12542628484892489975 + 7136737261680648566 + 12298070239014657197 + 17749457018469028143 + 8703322118498504148 + 11858518300540788182 + 16422315550142690930 + 7057236125858522587 + 11855958768521896830 + 1820099784752393152 + 11377230916760523184 + 4828101321051089399 + 12122817580452385350 + 1410568880630855231 + 473925628806227527 + 11924802760201520739 + 10815195993672159629 + 18416933339021905553 + 13635561686712821885 + 6602085719454756344 + 16844977202076469900 + 3969723353272636182 + 3690017361867130966 + 11267061006912040077 + 5735229964423980798 + 12492601374916330702 + 16609508867851194412 + 218557311299093094 + 15514942708978823687 + 14362509205129835372 + 6621763864170714851 + 2190047836527548561 + 13515776556400686417 + 15939672995833882174 + 4190502647553322768 + 18350197095287665137 + 12142072380911668446 + 11835920205107194071 + 6047460542694346389 + 9548527731519464189 + 6871053384825100587 + 13174309304976909114 + 14674596572540550665 + 5581986007484473551 + 15875512019988326581 + 109705646516351485 + 2405472162865805900 + 11098438012211838277 + 7863128813077373412 + 2829962756502596006 + 292738908557025467 + 98377248771707637 + 15027326526426909939 + 1775703209604510257 + 17446022770582192536 + 13283479598692569781 + 14939605937449462241 + 16894158543120329973 + 6860131853043902309 + 2354711172704666692 + 15845148575244941941 + 12193174286037583499 + 5325974434102036910 + 5096389031301767464 + 12944400317482224069 + 11335255464450344082 + 799803163615198751 + 5475514946086628725 + 7221132879774069895 + 18176958831074004873 + 3963957934196986259 + 8434438355089997118 + 14656255240245799447 + 3731761213600785903 + 6685621366829263697 + 11550009753214684777 + 7103946463567737246 + 3671219373304085994 + 7419955042653249844 + 9922035446092568568 + 2006380125845666711 + 8316631826068966766 + 12670092567506251910 + 5327558707454737880 + 11240919251142731612 + 16305984471490690518 + 11192678402063317852 + 1643501093540151377 + 11111010877637715199 + 17035462671773577602 + 9401206117066611704 + 2633714930939286561 + 273159056232267170 + 4281656844854859285 + 18142576061317815247 + 8055210191079505249 + 10871126303031598902 + 10306875951639986748 + 4547307891999956851 + 15522840298197467866 + 1332731856965561457 + 6570773932613147452 + 12951848502757192767 + 12922722574985280976 + 16424645072260574866 + 3762984447633517391 + 7556933799889638005 + 15091724436780259053 + 2427723306863481579 + 16843268906778887667 + 15015892955159394404 + 10595992274582064628 + 10706836884028707877 + 12705818443340198221 + 7745702016801958802 + 5495854982244130436 + 973641572342898943 + 1763608341139817501 + 660663820846972028 + 10652647922238444592 + 14458265409144075459 + 16396479259042290715 + 8606031716325691024 + 3667955518331909013 + 5292424929822597211 + 15820188128445018513 + 14325921925696672881 + 6621598919975238916 + 14029703943182456036 + 15234998596020428232 + 7613918699024695517 + 11231838761953236449 + 2512479936426295253 + 16384525419787925904 + 18214475374852427012 + 8382386715659794940 + 1956248919054143481 + 1760694831535652850 + 1966319316799166847 + 12309400686307874261 + 2896576188504706139 + 11913087413721143561 + 18111604701215337664 + 6194876196726473387 + 14551946472897744395 + 5581480524770951423 + 12572345207593290197 + 1234676834671686471 + 16339239542028683714 + 16130522631905003105 + 1817511339732967397 + 17071198057407753849 + 7472662268947226105 + 7316113408961838889 + 496983171634199246 + 3151169028593442441 + 13747980473339691280 + 688370169755326777 + 408727275135827797 + 3038419650289971474 + 13972411516260428437 + 8336342642802271743 + 6667685301448851857 + 8072840540412251561 + 12828456611399865015 + 17294231528838884571 + 5808822036800100268 + 12837299874346129901 + 15452925259671773506 + 7511393259623248959 + 497672315332969925 + 10994819971877203033 + 1415240208776319937 + 1717222442472674215 + 12982588394781304403 + 15588208479983722154 + 355527091949200844 + 6002255854966681112 + 5450686947651626277 + 4149089626147111342 + 17559343624934895925 + 12696522066263270952 + 4549886962362211351 + 3852480633589126307 + 12845018129691480430 + 2034028920912254453 + 12341672278596947655 + 11417678686531726696 + 15341772343555876757 + 815396427324337980 + 14877065661296547266 + 15751698599412629242 + 1778502004910262467 + 16588204868569276128 + 16823420647216183103 + 13062444377797872425 + 17652483765337306715 + 6081518504972187439 + 9088612316959819848 + 11122433511582148855 diff --git a/common/checksum/test/test_checksum.cpp b/common/checksum/test/test_checksum.cpp index 6bd03fa8..60ccd660 100644 --- a/common/checksum/test/test_checksum.cpp +++ b/common/checksum/test/test_checksum.cpp @@ -1,9 +1,11 @@ #include +#include #include #include #include #include #include +#include #include "../../../test/ci-tools.h" #include "../../../test/gtest.h" @@ -14,64 +16,89 @@ #define xstr(arg) str(arg) #define str(s) #s -class TestChecksum : public ::testing::Test { - virtual void SetUp() { - in.open(xstr(DATA_DIR) "checksum.in"); - if (!in) in.open("checksum.in"); - ASSERT_TRUE(!!in); - uint32_t value; - std::string str; +struct test_case { + std::string s; + uint64_t crc64ecma; + uint32_t crc32c; +}; - while (getline(in, str)) { - std::stringstream ss(str); - ss.imbue(std::locale::classic()); - ss >> value >> str; - cases.push_back(std::make_pair(value, str)); - } +std::vector cases; - in.close(); +void setup() { + chdir(xstr(DATA_DIR)); + std::ifstream in("checksum.in"), + in2("checksum.crc64"); + ASSERT_TRUE(in && in2); + uint32_t bytes = 0; + while (true) { + uint32_t crc32; + uint64_t crc64; + std::string str; + in >> crc32 >> str; + in2 >> crc64; + if (str.empty()) break; + bytes += str.size(); + cases.push_back({std::move(str), crc64, crc32}); } + printf("Loaded %d cases, %d bytes\n", (int)cases.size(), (int)bytes); +} - virtual void TearDown() { cases.clear(); } - -protected: - std::ifstream in; - std::vector > cases; -}; - -TEST_F(TestChecksum, crc32c_hw) { - if (is_crc32c_hw_available()) { - auto start = std::chrono::system_clock::now(); - for (size_t i = 0; i < cases.size(); ++i) { - size_t len = cases[i].second.length(); - const char* data = cases[i].second.c_str(); +typedef uint32_t (*CRC32C)(const uint8_t*, size_t, uint32_t); +void do_test32(const char* name, CRC32C crc32c) { + auto start = std::chrono::system_clock::now(); + for (int i = 0; i < 100; ++i) + for (auto& c: cases) { + auto crc32 = crc32c((uint8_t*)c.s.data(), c.s.length(), 0); + EXPECT_EQ(crc32, c.crc32c); + } + int time_cost = std::chrono::duration_cast(std::chrono::system_clock::now() - start).count(); + printf("%s time spent: %dns \n", name, time_cost); +} - auto crc = crc32c_hw(reinterpret_cast(data), len, 0); - EXPECT_EQ(cases[i].first, crc); - } - int time_cost = std::chrono::duration_cast(std::chrono::system_clock::now() - start).count(); - printf("crc32c_hw time spent: %dns \n", time_cost); - } else { +TEST(TestChecksum, crc32c_hw) { + if (!is_crc32c_hw_available()) { std::cout << "skip crc32c_hw test on unsupported paltform." << std::endl; + return; } + do_test32("crc32c_hw", crc32c_hw); } -TEST_F(TestChecksum, crc32c_sw) { - auto start = std::chrono::system_clock::now(); - for (size_t i = 0; i < cases.size(); ++i) { - size_t len = cases[i].second.length(); - const char* data = cases[i].second.c_str(); +TEST(TestChecksum, crc32c_sw) { + do_test32("crc32c_sw", crc32c_sw); +} - auto crc = crc32c_sw(reinterpret_cast(data), len, 0); - EXPECT_EQ(cases[i].first, crc); +typedef uint64_t (*CRC64ECMA)(const uint8_t*, size_t, uint64_t); +void do_test64(const char* name, CRC64ECMA crc64ecma) { + auto start = std::chrono::system_clock::now(); + for (int i = 0; i < 100; ++i) + for (auto& c: cases) { + auto crc64 = crc64ecma((uint8_t*)c.s.data(), c.s.length(), 0); + EXPECT_EQ(crc64, c.crc64ecma); } int time_cost = std::chrono::duration_cast(std::chrono::system_clock::now() - start).count(); - printf("crc32c_sw time spent: %dns \n", time_cost); + printf("%s time spent: %dns \n", name, time_cost); +} + +TEST(TestChecksum, crc64ecma_hw) { + if (!is_crc64ecma_hw_available()) { + std::cout << "skip crc64ecma_hw test on unsupported paltform." << std::endl; + return; + } +#ifdef __aarch64__ + do_test64("crc64ecma_hw (using pmull)", crc64ecma_hw); +#else + do_test64("crc64ecma_hw", crc64ecma_hw); +#endif +} + +TEST(TestChecksum, crc64ecma_sw) { + do_test64("crc64ecma_sw", crc64ecma_sw); } int main(int argc, char **argv) { if (!photon::is_using_default_engine()) return 0; ::testing::InitGoogleTest(&argc, argv); + setup(); return RUN_ALL_TESTS(); } \ No newline at end of file diff --git a/include/photon/common/checksum/crc64ecma.h b/include/photon/common/checksum/crc64ecma.h new file mode 120000 index 00000000..b90e2806 --- /dev/null +++ b/include/photon/common/checksum/crc64ecma.h @@ -0,0 +1 @@ +../../../../common/checksum/crc64ecma.h \ No newline at end of file