Skip to content

Commit 850e4e8

Browse files
committed
Add: Look-Up Table transforms
The new `sz_look_up_transform` API implements a 256-byte lookup table using serial code and AVX-512 that can significantly accelerates text and image processing. The AVX-512 implementation reaches 18 GB/s on Intel Sapphire Rapids CPU, while serial code stays around 3 GB/s for large files.
1 parent bba72a6 commit 850e4e8

File tree

6 files changed

+388
-27
lines changed

6 files changed

+388
-27
lines changed

c/lib.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ typedef struct sz_implementations_t {
119119
sz_move_t copy;
120120
sz_move_t move;
121121
sz_fill_t fill;
122+
sz_look_up_transform_t look_up_transform;
122123

123124
sz_find_byte_t find_byte;
124125
sz_find_byte_t rfind_byte;
@@ -153,6 +154,7 @@ static void sz_dispatch_table_init(void) {
153154
impl->copy = sz_copy_serial;
154155
impl->move = sz_move_serial;
155156
impl->fill = sz_fill_serial;
157+
impl->look_up_transform = sz_look_up_transform_serial;
156158

157159
impl->find = sz_find_serial;
158160
impl->rfind = sz_rfind_serial;
@@ -205,6 +207,7 @@ static void sz_dispatch_table_init(void) {
205207
impl->find_from_set = sz_find_charset_avx512;
206208
impl->rfind_from_set = sz_rfind_charset_avx512;
207209
impl->alignment_score = sz_alignment_score_avx512;
210+
impl->look_up_transform = sz_look_up_transform_avx512;
208211
}
209212
#endif
210213

@@ -261,6 +264,10 @@ SZ_DYNAMIC void sz_fill(sz_ptr_t target, sz_size_t length, sz_u8_t value) {
261264
sz_dispatch_table.fill(target, length, value);
262265
}
263266

267+
SZ_DYNAMIC void sz_look_up_transform(sz_cptr_t source, sz_size_t length, sz_cptr_t lut, sz_ptr_t target) {
268+
sz_dispatch_table.look_up_transform(source, length, lut, target);
269+
}
270+
264271
SZ_DYNAMIC sz_cptr_t sz_find_byte(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle) {
265272
return sz_dispatch_table.find_byte(haystack, h_length, needle);
266273
}

include/stringzilla/stringzilla.h

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,25 @@ SZ_DYNAMIC sz_ordering_t sz_order(sz_cptr_t a, sz_size_t a_length, sz_cptr_t b,
453453
/** @copydoc sz_order */
454454
SZ_PUBLIC sz_ordering_t sz_order_serial(sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length);
455455

456+
/**
457+
* @brief Look Up Table @b (LUT) transformation of a string. Equivalent to `for (char & c : text) c = lut[c]`.
458+
*
459+
* Can be used to implement some form of string normalization, partially masking punctuation marks,
460+
* or converting between different character sets, like uppercase or lowercase. Surprisingly, also has
461+
* broad implications in image processing, where image channel transformations are often done using LUTs.
462+
*
463+
* @param text String to be normalized.
464+
* @param length Number of bytes in the string.
465+
* @param lut Look Up Table to apply. Must be exactly @b 256 bytes long.
466+
* @param result Output string, can point to the same address as ::text.
467+
*/
468+
SZ_DYNAMIC void sz_look_up_transform(sz_cptr_t text, sz_size_t length, sz_cptr_t lut, sz_ptr_t result);
469+
470+
typedef void (*sz_look_up_transform_t)(sz_cptr_t, sz_size_t, sz_cptr_t, sz_ptr_t);
471+
472+
/** @copydoc sz_look_up_transform */
473+
SZ_PUBLIC void sz_look_up_transform_serial(sz_cptr_t text, sz_size_t length, sz_cptr_t lut, sz_ptr_t result);
474+
456475
/**
457476
* @brief Equivalent to `for (char & c : text) c = tolower(c)`.
458477
*
@@ -1169,6 +1188,8 @@ SZ_PUBLIC void sz_copy_avx512(sz_ptr_t target, sz_cptr_t source, sz_size_t lengt
11691188
SZ_PUBLIC void sz_move_avx512(sz_ptr_t target, sz_cptr_t source, sz_size_t length);
11701189
/** @copydoc sz_fill */
11711190
SZ_PUBLIC void sz_fill_avx512(sz_ptr_t target, sz_size_t length, sz_u8_t value);
1191+
/** @copydoc sz_look_up_tranform */
1192+
SZ_PUBLIC void sz_look_up_tranform_avx512(sz_cptr_t source, sz_size_t length, sz_cptr_t table, sz_ptr_t target);
11721193
/** @copydoc sz_find_byte */
11731194
SZ_PUBLIC sz_cptr_t sz_find_byte_avx512(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle);
11741195
/** @copydoc sz_rfind_byte */
@@ -3095,6 +3116,14 @@ SZ_INTERNAL sz_u8_t sz_u8_divide(sz_u8_t number, sz_u8_t divisor) {
30953116
return (sz_u8_t)(t >> shift);
30963117
}
30973118

3119+
SZ_PUBLIC void sz_look_up_transform_serial(sz_cptr_t text, sz_size_t length, sz_cptr_t lut, sz_ptr_t result) {
3120+
sz_u8_t const *unsigned_lut = (sz_u8_t const *)lut;
3121+
sz_u8_t const *unsigned_text = (sz_u8_t const *)text;
3122+
sz_u8_t *unsigned_result = (sz_u8_t *)result;
3123+
sz_u8_t const *end = unsigned_text + length;
3124+
for (; unsigned_text != end; ++unsigned_text, ++unsigned_result) *unsigned_result = unsigned_lut[*unsigned_text];
3125+
}
3126+
30983127
SZ_PUBLIC void sz_tolower_serial(sz_cptr_t text, sz_size_t length, sz_ptr_t result) {
30993128
sz_u8_t *unsigned_result = (sz_u8_t *)result;
31003129
sz_u8_t const *unsigned_text = (sz_u8_t const *)text;
@@ -5106,6 +5135,108 @@ SZ_PUBLIC void sz_hashes_avx512(sz_cptr_t start, sz_size_t length, sz_size_t win
51065135
#pragma clang attribute push(__attribute__((target("avx,avx512f,avx512vl,avx512bw,avx512vbmi,avx512vbmi2,bmi,bmi2"))), \
51075136
apply_to = function)
51085137

5138+
SZ_PUBLIC void sz_look_up_transform_avx512(sz_cptr_t source, sz_size_t length, sz_cptr_t lut, sz_ptr_t target) {
5139+
5140+
// If the input is tiny (especially smaller than the look-up table itself), we may end up paying
5141+
// more for organizing the SIMD registers and changing the CPU state, than for the actual computation.
5142+
// But if at least 3 cache lines are touched, the AVX-512 implementation should be faster.
5143+
if (length <= 128) {
5144+
sz_look_up_transform_serial(source, length, lut, target);
5145+
return;
5146+
}
5147+
5148+
// When the buffer is over 64 bytes, it's guaranteed to touch at least two cache lines - the head and tail,
5149+
// and may include more cache-lines in-between. Knowing this, we can avoid expensive unaligned stores
5150+
// by computing 2 masks - for the head and tail, using masked stores for the head and tail, and unmasked
5151+
// for the body.
5152+
sz_size_t head_length = (64 - ((sz_size_t)target % 64)) % 64; // 63 or less.
5153+
sz_size_t tail_length = (sz_size_t)(target + length) % 64; // 63 or less.
5154+
__mmask64 head_mask = _sz_u64_mask_until(head_length);
5155+
__mmask64 tail_mask = _sz_u64_mask_until(tail_length);
5156+
5157+
// We need to pull the lookup table into 4x ZMM registers.
5158+
// We can use `vpermi2b` instruction to perform the look in two ZMM registers with `_mm512_permutex2var_epi8`
5159+
// intrinsics, but it has a 6-cycle latency on Sapphire Rapids and requires AVX512-VBMI. Assuming we need to
5160+
// operate on 4 registers, it might be cleaner to use 2x separate `_mm512_permutexvar_epi8` calls.
5161+
// Combining the results with 2x `_mm512_test_epi8_mask` and 3x blends afterwards.
5162+
//
5163+
// - `_mm512_mask_blend_epi8` - 1 cycle latency, and generally 2x can run in parallel.
5164+
// - `_mm512_test_epi8_mask` - 3 cycles latency, same as most comparison functions in AVX-512.
5165+
sz_u512_vec_t lut_0_to_63_vec, lut_64_to_127_vec, lut_128_to_191_vec, lut_192_to_255_vec;
5166+
lut_0_to_63_vec.zmm = _mm512_loadu_si512((lut));
5167+
lut_64_to_127_vec.zmm = _mm512_loadu_si512((lut + 64));
5168+
lut_128_to_191_vec.zmm = _mm512_loadu_si512((lut + 128));
5169+
lut_192_to_255_vec.zmm = _mm512_loadu_si512((lut + 192));
5170+
5171+
sz_u512_vec_t first_bit_vec, second_bit_vec;
5172+
first_bit_vec.zmm = _mm512_set1_epi8((char)0x80);
5173+
second_bit_vec.zmm = _mm512_set1_epi8((char)0x40);
5174+
5175+
__mmask64 first_bit_mask, second_bit_mask;
5176+
sz_u512_vec_t source_vec;
5177+
// If the top bit is set in each word of `source_vec`, than we use `lookup_128_to_191_vec` or
5178+
// `lookup_192_to_255_vec`. If the second bit is set, we use `lookup_64_to_127_vec` or `lookup_192_to_255_vec`.
5179+
sz_u512_vec_t lookup_0_to_63_vec, lookup_64_to_127_vec, lookup_128_to_191_vec, lookup_192_to_255_vec;
5180+
sz_u512_vec_t blended_0_to_127_vec, blended_128_to_255_vec, blended_0_to_255_vec;
5181+
5182+
// Handling the head.
5183+
if (head_length) {
5184+
source_vec.zmm = _mm512_maskz_loadu_epi8(head_mask, source);
5185+
lookup_0_to_63_vec.zmm = _mm512_permutexvar_epi8(source_vec.zmm, lut_0_to_63_vec.zmm);
5186+
lookup_64_to_127_vec.zmm = _mm512_permutexvar_epi8(source_vec.zmm, lut_64_to_127_vec.zmm);
5187+
lookup_128_to_191_vec.zmm = _mm512_permutexvar_epi8(source_vec.zmm, lut_128_to_191_vec.zmm);
5188+
lookup_192_to_255_vec.zmm = _mm512_permutexvar_epi8(source_vec.zmm, lut_192_to_255_vec.zmm);
5189+
first_bit_mask = _mm512_test_epi8_mask(source_vec.zmm, first_bit_vec.zmm);
5190+
second_bit_mask = _mm512_test_epi8_mask(source_vec.zmm, second_bit_vec.zmm);
5191+
blended_0_to_127_vec.zmm =
5192+
_mm512_mask_blend_epi8(second_bit_mask, lookup_0_to_63_vec.zmm, lookup_64_to_127_vec.zmm);
5193+
blended_128_to_255_vec.zmm =
5194+
_mm512_mask_blend_epi8(second_bit_mask, lookup_128_to_191_vec.zmm, lookup_192_to_255_vec.zmm);
5195+
blended_0_to_255_vec.zmm =
5196+
_mm512_mask_blend_epi8(first_bit_mask, blended_0_to_127_vec.zmm, blended_128_to_255_vec.zmm);
5197+
_mm512_mask_storeu_epi8(target, head_mask, blended_0_to_255_vec.zmm);
5198+
source += head_length, target += head_length, length -= head_length;
5199+
}
5200+
5201+
// Handling the body in 64-byte chunks aligned to cache-line boundaries with respect to `target`.
5202+
while (length >= 64) {
5203+
source_vec.zmm = _mm512_loadu_si512(source);
5204+
lookup_0_to_63_vec.zmm = _mm512_permutexvar_epi8(source_vec.zmm, lut_0_to_63_vec.zmm);
5205+
lookup_64_to_127_vec.zmm = _mm512_permutexvar_epi8(source_vec.zmm, lut_64_to_127_vec.zmm);
5206+
lookup_128_to_191_vec.zmm = _mm512_permutexvar_epi8(source_vec.zmm, lut_128_to_191_vec.zmm);
5207+
lookup_192_to_255_vec.zmm = _mm512_permutexvar_epi8(source_vec.zmm, lut_192_to_255_vec.zmm);
5208+
first_bit_mask = _mm512_test_epi8_mask(source_vec.zmm, first_bit_vec.zmm);
5209+
second_bit_mask = _mm512_test_epi8_mask(source_vec.zmm, second_bit_vec.zmm);
5210+
blended_0_to_127_vec.zmm =
5211+
_mm512_mask_blend_epi8(second_bit_mask, lookup_0_to_63_vec.zmm, lookup_64_to_127_vec.zmm);
5212+
blended_128_to_255_vec.zmm =
5213+
_mm512_mask_blend_epi8(second_bit_mask, lookup_128_to_191_vec.zmm, lookup_192_to_255_vec.zmm);
5214+
blended_0_to_255_vec.zmm =
5215+
_mm512_mask_blend_epi8(first_bit_mask, blended_0_to_127_vec.zmm, blended_128_to_255_vec.zmm);
5216+
_mm512_store_si512(target, blended_0_to_255_vec.zmm); //! Aligned store, our main weapon!
5217+
source += 64, target += 64, length -= 64;
5218+
}
5219+
5220+
// Handling the tail.
5221+
if (tail_length) {
5222+
source_vec.zmm = _mm512_maskz_loadu_epi8(tail_mask, source);
5223+
lookup_0_to_63_vec.zmm = _mm512_permutexvar_epi8(source_vec.zmm, lut_0_to_63_vec.zmm);
5224+
lookup_64_to_127_vec.zmm = _mm512_permutexvar_epi8(source_vec.zmm, lut_64_to_127_vec.zmm);
5225+
lookup_128_to_191_vec.zmm = _mm512_permutexvar_epi8(source_vec.zmm, lut_128_to_191_vec.zmm);
5226+
lookup_192_to_255_vec.zmm = _mm512_permutexvar_epi8(source_vec.zmm, lut_192_to_255_vec.zmm);
5227+
first_bit_mask = _mm512_test_epi8_mask(source_vec.zmm, first_bit_vec.zmm);
5228+
second_bit_mask = _mm512_test_epi8_mask(source_vec.zmm, second_bit_vec.zmm);
5229+
blended_0_to_127_vec.zmm =
5230+
_mm512_mask_blend_epi8(second_bit_mask, lookup_0_to_63_vec.zmm, lookup_64_to_127_vec.zmm);
5231+
blended_128_to_255_vec.zmm =
5232+
_mm512_mask_blend_epi8(second_bit_mask, lookup_128_to_191_vec.zmm, lookup_192_to_255_vec.zmm);
5233+
blended_0_to_255_vec.zmm =
5234+
_mm512_mask_blend_epi8(first_bit_mask, blended_0_to_127_vec.zmm, blended_128_to_255_vec.zmm);
5235+
_mm512_mask_storeu_epi8(target, tail_mask, blended_0_to_255_vec.zmm);
5236+
source += tail_length, target += tail_length, length -= tail_length;
5237+
}
5238+
}
5239+
51095240
SZ_PUBLIC sz_cptr_t sz_find_charset_avx512(sz_cptr_t text, sz_size_t length, sz_charset_t const *filter) {
51105241

51115242
// Before initializing the AVX-512 vectors, we may want to run the sequential code for the first few bytes.
@@ -5920,6 +6051,14 @@ SZ_DYNAMIC void sz_fill(sz_ptr_t target, sz_size_t length, sz_u8_t value) {
59206051
#endif
59216052
}
59226053

6054+
SZ_DYNAMIC void sz_look_up_transform(sz_cptr_t source, sz_size_t length, sz_cptr_t lut, sz_ptr_t target) {
6055+
#if SZ_USE_X86_AVX512
6056+
sz_look_up_transform_avx512(source, length, lut, target);
6057+
#else
6058+
sz_look_up_transform_serial(source, length, lut, target);
6059+
#endif
6060+
}
6061+
59236062
SZ_DYNAMIC sz_cptr_t sz_find_byte(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle) {
59246063
#if SZ_USE_X86_AVX512
59256064
return sz_find_byte_avx512(haystack, h_length, needle);

include/stringzilla/stringzilla.hpp

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,55 @@ inline char_set whitespaces_set() { return char_set {whitespaces()}; }
340340
inline char_set newlines_set() { return char_set {newlines()}; }
341341
inline char_set base64_set() { return char_set {base64()}; }
342342

343+
/**
344+
* @brief A look-up table for character replacement operations.
345+
* Exactly 256 bytes for byte-to-byte replacement.
346+
* ! For larger character types should be allocated on the heap.
347+
*/
348+
template <typename char_type_ = char>
349+
class basic_look_up_table {
350+
static_assert(sizeof(char_type_) == 1 || sizeof(char_type_) == 2 || sizeof(char_type_) == 4,
351+
"Character type must be 1, 2, or 4 bytes long");
352+
static constexpr std::size_t size_k = sizeof(char_type_) == 1 ? 256ul
353+
: sizeof(char_type_) == 2 ? 65536ul
354+
: 4294967296ul;
355+
static constexpr std::size_t bytes_k = size_k * sizeof(char_type_);
356+
using usnigned_type_ = typename std::make_unsigned<char_type_>::type;
357+
358+
char_type_ lut_[size_k];
359+
360+
public:
361+
using char_type = char_type_;
362+
363+
basic_look_up_table() noexcept { memset(&lut_[0], 0, bytes_k); }
364+
explicit basic_look_up_table(char_type const (&chars)[size_k]) noexcept { memcpy(&lut_[0], chars, bytes_k); }
365+
basic_look_up_table(std::array<char_type, size_k> const &chars) noexcept {
366+
memcpy(&lut_[0], chars.data(), bytes_k);
367+
}
368+
369+
basic_look_up_table(basic_look_up_table const &other) noexcept { memcpy(&lut_[0], other.lut_, bytes_k); }
370+
basic_look_up_table &operator=(basic_look_up_table const &other) noexcept {
371+
memcpy(&lut_[0], other.lut_, bytes_k);
372+
return *this;
373+
}
374+
375+
/**
376+
* @brief Creates a look-up table with a one-to-one mapping of characters to themselves.
377+
* Similar to `std::iota` filling, but properly handles signed integer casts.
378+
*/
379+
static basic_look_up_table identity() noexcept {
380+
basic_look_up_table result;
381+
for (std::size_t i = 0; i < size_k; ++i) { result.lut_[i] = static_cast<usnigned_type_>(i); }
382+
return result;
383+
}
384+
385+
inline sz_cptr_t raw() const noexcept { return reinterpret_cast<sz_cptr_t>(&lut_[0]); }
386+
inline char_type &operator[](char_type c) noexcept { return lut_[sz_bitcast(usnigned_type_, c)]; }
387+
inline char_type const &operator[](char_type c) const noexcept { return lut_[sz_bitcast(usnigned_type_, c)]; }
388+
};
389+
390+
using look_up_table = basic_look_up_table<char>;
391+
343392
#pragma endregion
344393

345394
#pragma region Ranges of Search Matches
@@ -3355,6 +3404,24 @@ class basic_string {
33553404
return try_replace_all_<char_set>(pattern, replacement);
33563405
}
33573406

3407+
/**
3408+
* @brief Replaces ( @b in-place ) all characters in the string using the provided lookup table.
3409+
*/
3410+
basic_string &transform(look_up_table const &table) noexcept {
3411+
transform(table, data());
3412+
return *this;
3413+
}
3414+
3415+
/**
3416+
* @brief Maps all chatacters in the current string into another buffer using the provided lookup table.
3417+
*/
3418+
void transform(look_up_table const &table, pointer output) const noexcept {
3419+
sz_ptr_t start;
3420+
sz_size_t length;
3421+
sz_string_range(&string_, &start, &length);
3422+
sz_look_up_transform((sz_cptr_t)start, (sz_size_t)length, (sz_cptr_t)table.raw(), (sz_ptr_t)output);
3423+
}
3424+
33583425
private:
33593426
template <typename pattern_type>
33603427
bool try_replace_all_(pattern_type pattern, string_view replacement) noexcept;
@@ -3797,6 +3864,26 @@ void randomize(basic_string_slice<char_type_> string, generator_type_ &generator
37973864
sz_generate(alphabet.data(), alphabet.size(), string.data(), string.size(), generator_callback, &generator);
37983865
}
37993866

3867+
/**
3868+
* @brief Replaces ( @b in-place ) all characters in the string using the provided lookup table.
3869+
*/
3870+
template <typename char_type_>
3871+
void transform(basic_string_slice<char_type_> string, basic_look_up_table<char_type_> const &table) noexcept {
3872+
static_assert(sizeof(char_type_) == 1, "The character type must be 1 byte long.");
3873+
sz_look_up_transform((sz_cptr_t)string.data(), (sz_size_t)string.size(), (sz_cptr_t)table.raw(),
3874+
(sz_ptr_t)string.data());
3875+
}
3876+
3877+
/**
3878+
* @brief Maps all chatacters in the current string into another buffer using the provided lookup table.
3879+
*/
3880+
template <typename char_type_>
3881+
void transform(basic_string_slice<char_type_ const> source, basic_look_up_table<char_type_> const &table,
3882+
char_type_ *target) noexcept {
3883+
static_assert(sizeof(char_type_) == 1, "The character type must be 1 byte long.");
3884+
sz_look_up_transform((sz_cptr_t)source.data(), (sz_size_t)source.size(), (sz_cptr_t)table.raw(), (sz_ptr_t)target);
3885+
}
3886+
38003887
/**
38013888
* @brief Overwrites the string slice with random characters from the given alphabet
38023889
* using `std::rand` as the random generator.

python/lib.c

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1927,6 +1927,53 @@ static PyObject *Str_endswith(PyObject *self, PyObject *args, PyObject *kwargs)
19271927
else { Py_RETURN_FALSE; }
19281928
}
19291929

1930+
static PyObject *Str_translate(PyObject *self, PyObject *args, PyObject *kwargs) {
1931+
int is_member = self != NULL && PyObject_TypeCheck(self, &StrType);
1932+
Py_ssize_t nargs = PyTuple_Size(args);
1933+
if (nargs < !is_member + 1 || nargs > !is_member + 3) {
1934+
PyErr_Format(PyExc_TypeError, "Invalid number of arguments");
1935+
return NULL;
1936+
}
1937+
1938+
PyObject *str_obj = is_member ? self : PyTuple_GET_ITEM(args, 0);
1939+
PyObject *look_up_table_obj = PyTuple_GET_ITEM(args, !is_member);
1940+
PyObject *start_obj = nargs > !is_member + 1 ? PyTuple_GET_ITEM(args, !is_member + 1) : NULL;
1941+
PyObject *end_obj = nargs > !is_member + 2 ? PyTuple_GET_ITEM(args, !is_member + 2) : NULL;
1942+
1943+
// Optional start and end arguments
1944+
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1945+
1946+
if (start_obj && ((start = PyLong_AsSsize_t(start_obj)) == -1 && PyErr_Occurred())) {
1947+
PyErr_SetString(PyExc_TypeError, "start must be an integer");
1948+
return NULL;
1949+
}
1950+
1951+
if (end_obj && ((end = PyLong_AsSsize_t(end_obj)) == -1 && PyErr_Occurred())) {
1952+
PyErr_SetString(PyExc_TypeError, "end must be an integer");
1953+
return NULL;
1954+
}
1955+
1956+
sz_string_view_t str, look_up_table;
1957+
if (!export_string_like(str_obj, &str.start, &str.length) ||
1958+
!export_string_like(look_up_table_obj, &look_up_table.start, &look_up_table.length)) {
1959+
PyErr_SetString(PyExc_TypeError, "Both arguments must be string-like");
1960+
return NULL;
1961+
}
1962+
1963+
// Apply start and end arguments
1964+
str.start += start;
1965+
str.length -= start;
1966+
if (end != PY_SSIZE_T_MAX && end - start < str.length) { str.length = end - start; }
1967+
1968+
if (look_up_table.length != 256) {
1969+
PyErr_SetString(PyExc_ValueError, "The look-up table must be exactly 256 bytes long");
1970+
return NULL;
1971+
}
1972+
1973+
sz_look_up_transform(str.start, str.length, look_up_table.start, str.start);
1974+
return Py_None;
1975+
}
1976+
19301977
static PyObject *Str_find_first_of(PyObject *self, PyObject *args, PyObject *kwargs) {
19311978
Py_ssize_t signed_offset;
19321979
sz_string_view_t text;
@@ -2438,6 +2485,7 @@ static PyMethodDef Str_methods[] = {
24382485
{"splitlines", Str_splitlines, SZ_METHOD_FLAGS, "Split a string by line breaks."},
24392486
{"startswith", Str_startswith, SZ_METHOD_FLAGS, "Check if a string starts with a given prefix."},
24402487
{"endswith", Str_endswith, SZ_METHOD_FLAGS, "Check if a string ends with a given suffix."},
2488+
{"translate", Str_translate, SZ_METHOD_FLAGS, "Look-Up Table in-place transformation of a byte-string."},
24412489
{"decode", Str_decode, SZ_METHOD_FLAGS, "Decode the bytes into `str` with a given encoding"},
24422490

24432491
// Bidirectional operations
@@ -3139,6 +3187,7 @@ static PyMethodDef stringzilla_methods[] = {
31393187
{"splitlines", Str_splitlines, SZ_METHOD_FLAGS, "Split a string by line breaks."},
31403188
{"startswith", Str_startswith, SZ_METHOD_FLAGS, "Check if a string starts with a given prefix."},
31413189
{"endswith", Str_endswith, SZ_METHOD_FLAGS, "Check if a string ends with a given suffix."},
3190+
{"translate", Str_translate, SZ_METHOD_FLAGS, "Look-Up Table in-place transformation of a byte-string."},
31423191
{"decode", Str_decode, SZ_METHOD_FLAGS, "Decode the bytes into `str` with a given encoding"},
31433192

31443193
// Bidirectional operations

0 commit comments

Comments
 (0)