181
181
#endif
182
182
#endif
183
183
184
- #define sz_assert (condition, message, ...) \
184
+ #define SZ_ASSERT (condition, message, ...) \
185
185
do { \
186
186
if (!(condition)) { \
187
187
fprintf (stderr, " Assertion failed: %s, in file %s, line %d\n " , #condition, __FILE__, __LINE__); \
@@ -258,6 +258,7 @@ SZ_PUBLIC void sz_u8_set_invert(sz_u8_set_t *f) {
258
258
259
259
typedef sz_ptr_t (*sz_memory_allocate_t )(sz_size_t , void *);
260
260
typedef void (*sz_memory_free_t )(sz_ptr_t , sz_size_t , void *);
261
+ typedef sz_u64_t (*sz_random_generator_t )(void *);
261
262
262
263
/* *
263
264
* @brief Some complex pattern matching algorithms may require memory allocations.
@@ -402,7 +403,7 @@ SZ_PUBLIC void sz_toascii(sz_cptr_t text, sz_size_t length, sz_ptr_t result);
402
403
403
404
/* *
404
405
* @brief Generates a random string for a given alphabet, avoiding integer division and modulo operations.
405
- * Similar to `result [i] = alphabet[rand() % size ]`.
406
+ * Similar to `text [i] = alphabet[rand() % cardinality ]`.
406
407
*
407
408
* The modulo operation is expensive, and should be avoided in performance-critical code.
408
409
* We avoid it using small lookup tables and replacing it with a multiplication and shifts, similar to libdivide.
@@ -411,11 +412,14 @@ SZ_PUBLIC void sz_toascii(sz_cptr_t text, sz_size_t length, sz_ptr_t result);
411
412
* - Barret reduction: https://www.nayuki.io/page/barrett-reduction-algorithm
412
413
* - Lemire's trick: https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
413
414
*
414
- * @param text String to be normalized.
415
- * @param length Number of bytes in the string.
416
- * @param result Output string, can point to the same address as ::text.
415
+ * @param alphabet Set of characters to sample from.
416
+ * @param cardinality Number of characters to sample from.
417
+ * @param text Output string, can point to the same address as ::text.
418
+ * @param generate Callback producing random numbers given the generator state.
419
+ * @param generator Generator state, can be a pointer to a seed, or a pointer to a random number generator.
417
420
*/
418
- SZ_PUBLIC void sz_generate (sz_cptr_t alphabet, sz_size_t size, sz_ptr_t result, sz_size_t length);
421
+ SZ_PUBLIC void sz_generate (sz_cptr_t alphabet, sz_size_t cardinality, sz_ptr_t text, sz_size_t length,
422
+ sz_random_generator_t generate, void *generator);
419
423
420
424
#pragma endregion
421
425
@@ -1763,6 +1767,9 @@ SZ_INTERNAL sz_u8_t sz_u8_toupper(sz_u8_t c) {
1763
1767
/* *
1764
1768
* @brief Uses two small lookup tables (768 bytes total) to accelerate division by a small
1765
1769
* unsigned integer. Performs two lookups, one multiplication, two shifts, and two accumulations.
1770
+ *
1771
+ * @param divisor Integral value larger than one.
1772
+ * @param number Integral value to divide.
1766
1773
*/
1767
1774
SZ_INTERNAL sz_u8_t sz_u8_divide (sz_u8_t number, sz_u8_t divisor) {
1768
1775
static sz_u16_t multipliers[256 ] = {
@@ -1783,6 +1790,7 @@ SZ_INTERNAL sz_u8_t sz_u8_divide(sz_u8_t number, sz_u8_t divisor) {
1783
1790
9363 , 9030 , 8700 , 8373 , 8049 , 7727 , 7409 , 7093 , 6780 , 6470 , 6162 , 5857 , 5554 , 5254 , 4957 , 4662 ,
1784
1791
4370 , 4080 , 3792 , 3507 , 3224 , 2943 , 2665 , 2388 , 2115 , 1843 , 1573 , 1306 , 1041 , 778 , 517 , 258 ,
1785
1792
};
1793
+ // This table can be avoided using a single addition and counting trailing zeros.
1786
1794
static sz_u8_t shifts[256 ] = {
1787
1795
0 , 0 , 0 , 1 , 1 , 2 , 2 , 2 , 2 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , //
1788
1796
4 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , //
@@ -1802,26 +1810,40 @@ SZ_INTERNAL sz_u8_t sz_u8_divide(sz_u8_t number, sz_u8_t divisor) {
1802
1810
}
1803
1811
1804
1812
SZ_PUBLIC void sz_tolower_serial (sz_cptr_t text, sz_size_t length, sz_ptr_t result) {
1805
- for (sz_cptr_t end = text + length; text != end; ++text, ++result) {
1806
- *result = sz_u8_tolower (*(sz_u8_t const *)text);
1807
- }
1813
+ sz_u8_t *unsigned_result = (sz_u8_t *)result;
1814
+ sz_u8_t const *unsigned_text = (sz_u8_t const *)text;
1815
+ sz_u8_t const *end = unsigned_text + length;
1816
+ for (; unsigned_text != end; ++unsigned_text, ++unsigned_result) *unsigned_result = sz_u8_tolower (*unsigned_text);
1808
1817
}
1809
1818
1810
1819
SZ_PUBLIC void sz_toupper_serial (sz_cptr_t text, sz_size_t length, sz_ptr_t result) {
1811
- for (sz_cptr_t end = text + length; text != end; ++text, ++result) {
1812
- *result = sz_u8_toupper (*(sz_u8_t const *)text);
1813
- }
1820
+ sz_u8_t *unsigned_result = (sz_u8_t *)result;
1821
+ sz_u8_t const *unsigned_text = (sz_u8_t const *)text;
1822
+ sz_u8_t const *end = unsigned_text + length;
1823
+ for (; unsigned_text != end; ++unsigned_text, ++unsigned_result) *unsigned_result = sz_u8_toupper (*unsigned_text);
1814
1824
}
1815
1825
1816
1826
SZ_PUBLIC void sz_toascii_serial (sz_cptr_t text, sz_size_t length, sz_ptr_t result) {
1817
- for (sz_cptr_t end = text + length; text != end; ++text, ++result) { *result = *(sz_u8_t const *)text & 0x7F ; }
1827
+ sz_u8_t *unsigned_result = (sz_u8_t *)result;
1828
+ sz_u8_t const *unsigned_text = (sz_u8_t const *)text;
1829
+ sz_u8_t const *end = unsigned_text + length;
1830
+ for (; unsigned_text != end; ++unsigned_text, ++unsigned_result) *unsigned_result = *unsigned_text & 0x7F ;
1818
1831
}
1819
1832
1820
- SZ_PUBLIC void sz_toascii_serial (sz_cptr_t text, sz_size_t length, sz_ptr_t result) {
1821
- for (sz_cptr_t end = text + length; text != end; ++text, ++result) { *result = *(sz_u8_t const *)text & 0x7F ; }
1822
- }
1833
+ SZ_PUBLIC void sz_generate (sz_cptr_t alphabet, sz_size_t alphabet_size, sz_ptr_t result, sz_size_t result_length,
1834
+ sz_random_generator_t generator, void *generator_user_data) {
1835
+
1836
+ SZ_ASSERT (alphabet_size > 0 && alphabet_size <= 256 , " Inadequate alphabet size" );
1823
1837
1824
- SZ_PUBLIC void sz_generate (sz_cptr_t alphabet, sz_size_t size, sz_ptr_t result, sz_size_t length) {}
1838
+ if (alphabet_size == 1 )
1839
+ for (sz_cptr_t end = result + result_length; result != end; ++result) *result = *alphabet;
1840
+
1841
+ else {
1842
+ SZ_ASSERT (generator, " Expects a valid random generator" );
1843
+ for (sz_cptr_t end = result + result_length; result != end; ++result)
1844
+ *result = alphabet[sz_u8_divide (generator (generator_user_data) & 0xFF , alphabet_size)];
1845
+ }
1846
+ }
1825
1847
1826
1848
#pragma endregion
1827
1849
@@ -2641,8 +2663,6 @@ SZ_PUBLIC sz_cptr_t sz_find_last_avx512(sz_cptr_t h, sz_size_t h_length, sz_cptr
2641
2663
*/
2642
2664
#pragma region Compile-Time Dispatching
2643
2665
2644
- #include < stringzilla/stringzilla.h>
2645
-
2646
2666
SZ_PUBLIC sz_u64_t sz_hash (sz_cptr_t text, sz_size_t length) { return sz_hash_serial (text, length); }
2647
2667
2648
2668
SZ_PUBLIC sz_ordering_t sz_order (sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length) {
0 commit comments