|
129 | 129 |
|
130 | 130 | /**
|
131 | 131 | * @brief A misaligned load can be - trying to fetch eight consecutive bytes from an address
|
132 |
| - * that is not divisble by eight. |
| 132 | + * that is not divisible by eight. |
133 | 133 | *
|
134 | 134 | * Most platforms support it, but there is no industry standard way to check for those.
|
135 | 135 | * This value will mostly affect the performance of the serial (SWAR) backend.
|
|
142 | 142 | * @brief Cache-line width, that will affect the execution of some algorithms,
|
143 | 143 | * like equality checks and relative order computing.
|
144 | 144 | */
|
145 |
| -#ifndef SZ_CACHE_LINE_WIDRTH |
146 |
| -#define SZ_CACHE_LINE_WIDRTH (64) |
| 145 | +#ifndef SZ_CACHE_LINE_WIDTH |
| 146 | +#define SZ_CACHE_LINE_WIDTH (64) |
147 | 147 | #endif
|
148 | 148 |
|
149 | 149 | /*
|
@@ -351,7 +351,7 @@ typedef sz_ordering_t (*sz_order_t)(sz_cptr_t, sz_size_t, sz_cptr_t, sz_size_t);
|
351 | 351 | * https://github.com/Cyan4973/xxHash
|
352 | 352 | *
|
353 | 353 | * Neither of those functions are cryptographic, unlike MD5, SHA, and BLAKE algorithms.
|
354 |
| - * Most of those are based on the Merkle–Damgård construction, and aren't resistant to |
| 354 | + * Most of those are based on the Merkle-Damgård construction, and aren't resistant to |
355 | 355 | * the length-extension attacks. Current state of the Art, might be the BLAKE3 algorithm.
|
356 | 356 | * It's resistant to a broad range of attacks, can process 2 bytes per CPU cycle, and comes
|
357 | 357 | * with a very optimized official implementation for C and Rust. It has the same 128-bit
|
@@ -511,7 +511,7 @@ SZ_PUBLIC sz_cptr_t sz_find_last_byte_avx512(sz_cptr_t haystack, sz_size_t h_len
|
511 | 511 |
|
512 | 512 | /**
|
513 | 513 | * @brief Locates first matching substring.
|
514 |
| - * Equivalient to `memmem(haystack, h_length, needle, n_length)` in LibC. |
| 514 | + * Equivalent to `memmem(haystack, h_length, needle, n_length)` in LibC. |
515 | 515 | * Similar to `strstr(haystack, needle)` in LibC, but requires known length.
|
516 | 516 | *
|
517 | 517 | * @param haystack Haystack - the string to search in.
|
@@ -591,8 +591,8 @@ SZ_PUBLIC sz_cptr_t sz_find_last_bounded_regex(sz_cptr_t haystack, sz_size_t h_l
|
591 | 591 | #pragma region String Similarity Measures
|
592 | 592 |
|
593 | 593 | /**
|
594 |
| - * @brief Computes Levenshtein edit-distance between two strings using the Wagner Ficher algorithm. |
595 |
| - * Similar to the Needleman–Wunsch algorithm. Often used in fuzzy string matching. |
| 594 | + * @brief Computes Levenshtein edit-distance between two strings using the Wagner-Fisher algorithm. |
| 595 | + * Similar to the Needleman-Wunsch algorithm. Often used in fuzzy string matching. |
596 | 596 | *
|
597 | 597 | * @param a First string to compare.
|
598 | 598 | * @param a_length Number of bytes in the first string.
|
@@ -628,7 +628,7 @@ SZ_PUBLIC sz_size_t sz_alignment_score_memory_needed(sz_size_t a_length, sz_size
|
628 | 628 | *
|
629 | 629 | * This function is equivalent to the default Levenshtein distance implementation with the ::gap parameter set
|
630 | 630 | * to one, and the ::subs matrix formed of all ones except for the main diagonal, which is zeros.
|
631 |
| - * Unlike the default Levenshtein implementaion, this can't be bounded, as the substitution costs can be both positive |
| 631 | + * Unlike the default Levenshtein implementation, this can't be bounded, as the substitution costs can be both positive |
632 | 632 | * and negative, meaning that the distance isn't monotonically growing as we go through the strings.
|
633 | 633 | *
|
634 | 634 | * @param a First string to compare.
|
@@ -1494,7 +1494,7 @@ SZ_INTERNAL sz_size_t _sz_levenshtein_serial_upto256bytes( //
|
1494 | 1494 | sz_size_t bound, sz_memory_allocator_t const *alloc) {
|
1495 | 1495 |
|
1496 | 1496 | // When dealing with short strings, we won't need to allocate memory on heap,
|
1497 |
| - // as everythin would easily fit on the stack. Let's just make sure that |
| 1497 | + // as everything would easily fit on the stack. Let's just make sure that |
1498 | 1498 | // we use the amount proportional to the number of elements in the shorter string,
|
1499 | 1499 | // not the larger.
|
1500 | 1500 | if (b_length > a_length) return _sz_levenshtein_serial_upto256bytes(b, b_length, a, a_length, bound, alloc);
|
@@ -2065,14 +2065,14 @@ typedef union sz_u512_vec_t {
|
2065 | 2065 | SZ_INTERNAL __mmask64 sz_u64_clamp_mask_until(sz_size_t n) {
|
2066 | 2066 | // The simplest approach to compute this if we know that `n` is blow or equal 64:
|
2067 | 2067 | // return (1ull << n) - 1;
|
2068 |
| - // A slighly more complex approach, if we don't know that `n` is under 64: |
| 2068 | + // A slightly more complex approach, if we don't know that `n` is under 64: |
2069 | 2069 | return _bzhi_u64(0xFFFFFFFFFFFFFFFF, n < 64 ? n : 64);
|
2070 | 2070 | }
|
2071 | 2071 |
|
2072 | 2072 | SZ_INTERNAL __mmask64 sz_u64_mask_until(sz_size_t n) {
|
2073 | 2073 | // The simplest approach to compute this if we know that `n` is blow or equal 64:
|
2074 | 2074 | // return (1ull << n) - 1;
|
2075 |
| - // A slighly more complex approach, if we don't know that `n` is under 64: |
| 2075 | + // A slightly more complex approach, if we don't know that `n` is under 64: |
2076 | 2076 | return _bzhi_u64(0xFFFFFFFFFFFFFFFF, n);
|
2077 | 2077 | }
|
2078 | 2078 |
|
@@ -2442,15 +2442,15 @@ SZ_PUBLIC sz_cptr_t sz_find_avx512(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n,
|
2442 | 2442 | (sz_find_t)sz_find_2byte_avx512,
|
2443 | 2443 | (sz_find_t)sz_find_3byte_avx512,
|
2444 | 2444 | (sz_find_t)sz_find_4byte_avx512,
|
2445 |
| - // For longer needles we use a Two-Way heurstic with a follow-up check in-between. |
| 2445 | + // For longer needles we use a Two-Way heuristic with a follow-up check in-between. |
2446 | 2446 | (sz_find_t)sz_find_under66byte_avx512,
|
2447 | 2447 | (sz_find_t)sz_find_over66byte_avx512,
|
2448 | 2448 | };
|
2449 | 2449 |
|
2450 | 2450 | return backends[
|
2451 | 2451 | // For very short strings brute-force SWAR makes sense.
|
2452 | 2452 | (n_length > 1) + (n_length > 2) + (n_length > 3) +
|
2453 |
| - // For longer needles we use a Two-Way heurstic with a follow-up check in-between. |
| 2453 | + // For longer needles we use a Two-Way heuristic with a follow-up check in-between. |
2454 | 2454 | (n_length > 4) + (n_length > 66)](h, h_length, n, n_length);
|
2455 | 2455 | }
|
2456 | 2456 |
|
@@ -2592,15 +2592,15 @@ SZ_PUBLIC sz_cptr_t sz_find_last_avx512(sz_cptr_t h, sz_size_t h_length, sz_cptr
|
2592 | 2592 | sz_find_t backends[] = {
|
2593 | 2593 | // For very short strings brute-force SWAR makes sense.
|
2594 | 2594 | (sz_find_t)sz_find_last_byte_avx512,
|
2595 |
| - // For longer needles we use a Two-Way heurstic with a follow-up check in-between. |
| 2595 | + // For longer needles we use a Two-Way heuristic with a follow-up check in-between. |
2596 | 2596 | (sz_find_t)sz_find_last_under66byte_avx512,
|
2597 | 2597 | (sz_find_t)sz_find_last_over66byte_avx512,
|
2598 | 2598 | };
|
2599 | 2599 |
|
2600 | 2600 | return backends[
|
2601 | 2601 | // For very short strings brute-force SWAR makes sense.
|
2602 | 2602 | 0 +
|
2603 |
| - // For longer needles we use a Two-Way heurstic with a follow-up check in-between. |
| 2603 | + // For longer needles we use a Two-Way heuristic with a follow-up check in-between. |
2604 | 2604 | (n_length > 1) + (n_length > 66)](h, h_length, n, n_length);
|
2605 | 2605 | }
|
2606 | 2606 |
|
|
0 commit comments