Skip to content

Commit 8c2d6fe

Browse files
committed
Docs: Spelling
1 parent 29bd41f commit 8c2d6fe

File tree

2 files changed

+146
-133
lines changed

2 files changed

+146
-133
lines changed

.vscode/settings.json

Lines changed: 131 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -1,137 +1,32 @@
11
{
2+
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
23
// This may cause overheating.
34
// https://github.com/microsoft/vscode-cpptools/issues/1816
45
"C_Cpp.workspaceParsingPriority": "low",
5-
"cmake.configureOnOpen": true,
66
"cmake.buildDirectory": "${workspaceRoot}/build",
7-
"cmake.sourceDirectory": "${workspaceRoot}",
8-
"editor.rulers": [
9-
120
10-
],
7+
"cmake.configureOnOpen": true,
118
// https://github.com/microsoft/vscode-cpptools/issues/2456#issuecomment-439295153
129
"cmake.debugConfig": {
13-
"stopAtEntry": false,
14-
"MIMode": "lldb",
1510
"logging": {
16-
"trace": true,
1711
"engineLogging": true,
12+
"trace": true,
1813
"traceResponse": true
19-
}
20-
},
21-
"editor.formatOnSave": true,
22-
"python.pythonPath": "/Users/av/miniconda3/bin/python",
23-
"files.associations": {
24-
"string_view": "cpp",
25-
"array": "cpp",
26-
"atomic": "cpp",
27-
"bit": "cpp",
28-
"*.tcc": "cpp",
29-
"cctype": "cpp",
30-
"clocale": "cpp",
31-
"cmath": "cpp",
32-
"complex": "cpp",
33-
"cstdarg": "cpp",
34-
"cstddef": "cpp",
35-
"cstdint": "cpp",
36-
"cstdio": "cpp",
37-
"cstdlib": "cpp",
38-
"cwchar": "cpp",
39-
"cwctype": "cpp",
40-
"deque": "cpp",
41-
"set": "cpp",
42-
"unordered_map": "cpp",
43-
"unordered_set": "cpp",
44-
"vector": "cpp",
45-
"exception": "cpp",
46-
"algorithm": "cpp",
47-
"functional": "cpp",
48-
"iterator": "cpp",
49-
"memory": "cpp",
50-
"memory_resource": "cpp",
51-
"numeric": "cpp",
52-
"optional": "cpp",
53-
"random": "cpp",
54-
"string": "cpp",
55-
"system_error": "cpp",
56-
"tuple": "cpp",
57-
"type_traits": "cpp",
58-
"utility": "cpp",
59-
"fstream": "cpp",
60-
"initializer_list": "cpp",
61-
"iosfwd": "cpp",
62-
"istream": "cpp",
63-
"limits": "cpp",
64-
"new": "cpp",
65-
"ostream": "cpp",
66-
"sstream": "cpp",
67-
"stdexcept": "cpp",
68-
"streambuf": "cpp",
69-
"typeinfo": "cpp",
70-
"map": "cpp",
71-
"__bit_reference": "cpp",
72-
"__config": "cpp",
73-
"__debug": "cpp",
74-
"__errc": "cpp",
75-
"__functional_base": "cpp",
76-
"__hash_table": "cpp",
77-
"__locale": "cpp",
78-
"__mutex_base": "cpp",
79-
"__node_handle": "cpp",
80-
"__nullptr": "cpp",
81-
"__split_buffer": "cpp",
82-
"__string": "cpp",
83-
"__threading_support": "cpp",
84-
"__tree": "cpp",
85-
"__tuple": "cpp",
86-
"bitset": "cpp",
87-
"chrono": "cpp",
88-
"codecvt": "cpp",
89-
"condition_variable": "cpp",
90-
"cstring": "cpp",
91-
"ctime": "cpp",
92-
"forward_list": "cpp",
93-
"iomanip": "cpp",
94-
"ios": "cpp",
95-
"iostream": "cpp",
96-
"locale": "cpp",
97-
"mutex": "cpp",
98-
"queue": "cpp",
99-
"ratio": "cpp",
100-
"stack": "cpp",
101-
"thread": "cpp",
102-
"typeindex": "cpp",
103-
"cinttypes": "cpp",
104-
"__bits": "cpp",
105-
"any": "cpp",
106-
"compare": "cpp",
107-
"concepts": "cpp",
108-
"csignal": "cpp",
109-
"future": "cpp",
110-
"list": "cpp",
111-
"numbers": "cpp",
112-
"semaphore": "cpp",
113-
"span": "cpp",
114-
"variant": "cpp",
115-
"source_location": "cpp",
116-
"stop_token": "cpp",
117-
"__verbose_abort": "cpp",
118-
"strstream": "cpp",
119-
"filesystem": "cpp",
120-
"stringzilla.h": "c",
121-
"__memory": "c",
122-
"charconv": "c",
123-
"format": "cpp",
124-
"shared_mutex": "cpp"
14+
},
15+
"MIMode": "lldb",
16+
"stopAtEntry": false
12517
},
126-
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
18+
"cmake.sourceDirectory": "${workspaceRoot}",
12719
"cSpell.words": [
128-
"abababab",
12920
"allowoverlap",
13021
"Apostolico",
22+
"Baeza",
23+
"Gonnet",
24+
"Galil",
13125
"ashvardanian",
13226
"basicsize",
13327
"bigram",
13428
"bioinformatics",
29+
"cheminformatics",
13530
"Bitap",
13631
"cibuildwheel",
13732
"endregion",
@@ -184,6 +79,124 @@
18479
"Vardanian",
18580
"vectorcallfunc",
18681
"XDECREF",
187-
"Zilla"
188-
]
82+
"Zilla",
83+
"Appleby",
84+
"Cawley",
85+
"Brumme",
86+
"Merkle-Damgård",
87+
"Lemire",
88+
"copydoc",
89+
"Needleman",
90+
"Wunsch",
91+
"Wagner",
92+
"Fisher",
93+
],
94+
"editor.formatOnSave": true,
95+
"editor.rulers": [
96+
120
97+
],
98+
"files.associations": {
99+
"*.tcc": "cpp",
100+
"__bit_reference": "cpp",
101+
"__bits": "cpp",
102+
"__config": "cpp",
103+
"__debug": "cpp",
104+
"__errc": "cpp",
105+
"__functional_base": "cpp",
106+
"__hash_table": "cpp",
107+
"__locale": "cpp",
108+
"__memory": "c",
109+
"__mutex_base": "cpp",
110+
"__node_handle": "cpp",
111+
"__nullptr": "cpp",
112+
"__split_buffer": "cpp",
113+
"__string": "cpp",
114+
"__threading_support": "cpp",
115+
"__tree": "cpp",
116+
"__tuple": "cpp",
117+
"__verbose_abort": "cpp",
118+
"algorithm": "cpp",
119+
"any": "cpp",
120+
"array": "cpp",
121+
"atomic": "cpp",
122+
"bit": "cpp",
123+
"bitset": "cpp",
124+
"cctype": "cpp",
125+
"charconv": "c",
126+
"chrono": "cpp",
127+
"cinttypes": "cpp",
128+
"clocale": "cpp",
129+
"cmath": "cpp",
130+
"codecvt": "cpp",
131+
"compare": "cpp",
132+
"complex": "cpp",
133+
"concepts": "cpp",
134+
"condition_variable": "cpp",
135+
"csignal": "cpp",
136+
"cstdarg": "cpp",
137+
"cstddef": "cpp",
138+
"cstdint": "cpp",
139+
"cstdio": "cpp",
140+
"cstdlib": "cpp",
141+
"cstring": "cpp",
142+
"ctime": "cpp",
143+
"cwchar": "cpp",
144+
"cwctype": "cpp",
145+
"deque": "cpp",
146+
"exception": "cpp",
147+
"filesystem": "cpp",
148+
"format": "cpp",
149+
"forward_list": "cpp",
150+
"fstream": "cpp",
151+
"functional": "cpp",
152+
"future": "cpp",
153+
"initializer_list": "cpp",
154+
"iomanip": "cpp",
155+
"ios": "cpp",
156+
"iosfwd": "cpp",
157+
"iostream": "cpp",
158+
"istream": "cpp",
159+
"iterator": "cpp",
160+
"limits": "cpp",
161+
"list": "cpp",
162+
"locale": "cpp",
163+
"map": "cpp",
164+
"memory": "cpp",
165+
"memory_resource": "cpp",
166+
"mutex": "cpp",
167+
"new": "cpp",
168+
"numbers": "cpp",
169+
"numeric": "cpp",
170+
"optional": "cpp",
171+
"ostream": "cpp",
172+
"queue": "cpp",
173+
"random": "cpp",
174+
"ratio": "cpp",
175+
"semaphore": "cpp",
176+
"set": "cpp",
177+
"shared_mutex": "cpp",
178+
"source_location": "cpp",
179+
"span": "cpp",
180+
"sstream": "cpp",
181+
"stack": "cpp",
182+
"stdexcept": "cpp",
183+
"stop_token": "cpp",
184+
"streambuf": "cpp",
185+
"string": "cpp",
186+
"string_view": "cpp",
187+
"stringzilla.h": "c",
188+
"strstream": "cpp",
189+
"system_error": "cpp",
190+
"thread": "cpp",
191+
"tuple": "cpp",
192+
"type_traits": "cpp",
193+
"typeindex": "cpp",
194+
"typeinfo": "cpp",
195+
"unordered_map": "cpp",
196+
"unordered_set": "cpp",
197+
"utility": "cpp",
198+
"variant": "cpp",
199+
"vector": "cpp"
200+
},
201+
"python.pythonPath": "~/miniconda3/bin/python"
189202
}

include/stringzilla/stringzilla.h

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@
129129

130130
/**
131131
* @brief A misaligned load can be - trying to fetch eight consecutive bytes from an address
132-
* that is not divisble by eight.
132+
* that is not divisible by eight.
133133
*
134134
* Most platforms support it, but there is no industry standard way to check for those.
135135
* This value will mostly affect the performance of the serial (SWAR) backend.
@@ -142,8 +142,8 @@
142142
* @brief Cache-line width, that will affect the execution of some algorithms,
143143
* like equality checks and relative order computing.
144144
*/
145-
#ifndef SZ_CACHE_LINE_WIDRTH
146-
#define SZ_CACHE_LINE_WIDRTH (64)
145+
#ifndef SZ_CACHE_LINE_WIDTH
146+
#define SZ_CACHE_LINE_WIDTH (64)
147147
#endif
148148

149149
/*
@@ -351,7 +351,7 @@ typedef sz_ordering_t (*sz_order_t)(sz_cptr_t, sz_size_t, sz_cptr_t, sz_size_t);
351351
* https://github.com/Cyan4973/xxHash
352352
*
353353
* Neither of those functions are cryptographic, unlike MD5, SHA, and BLAKE algorithms.
354-
* Most of those are based on the MerkleDamgård construction, and aren't resistant to
354+
* Most of those are based on the Merkle-Damgård construction, and aren't resistant to
355355
* the length-extension attacks. Current state of the Art, might be the BLAKE3 algorithm.
356356
* It's resistant to a broad range of attacks, can process 2 bytes per CPU cycle, and comes
357357
* with a very optimized official implementation for C and Rust. It has the same 128-bit
@@ -511,7 +511,7 @@ SZ_PUBLIC sz_cptr_t sz_find_last_byte_avx512(sz_cptr_t haystack, sz_size_t h_len
511511

512512
/**
513513
* @brief Locates first matching substring.
514-
* Equivalient to `memmem(haystack, h_length, needle, n_length)` in LibC.
514+
* Equivalent to `memmem(haystack, h_length, needle, n_length)` in LibC.
515515
* Similar to `strstr(haystack, needle)` in LibC, but requires known length.
516516
*
517517
* @param haystack Haystack - the string to search in.
@@ -591,8 +591,8 @@ SZ_PUBLIC sz_cptr_t sz_find_last_bounded_regex(sz_cptr_t haystack, sz_size_t h_l
591591
#pragma region String Similarity Measures
592592

593593
/**
594-
* @brief Computes Levenshtein edit-distance between two strings using the Wagner Ficher algorithm.
595-
* Similar to the NeedlemanWunsch algorithm. Often used in fuzzy string matching.
594+
* @brief Computes Levenshtein edit-distance between two strings using the Wagner-Fisher algorithm.
595+
* Similar to the Needleman-Wunsch algorithm. Often used in fuzzy string matching.
596596
*
597597
* @param a First string to compare.
598598
* @param a_length Number of bytes in the first string.
@@ -628,7 +628,7 @@ SZ_PUBLIC sz_size_t sz_alignment_score_memory_needed(sz_size_t a_length, sz_size
628628
*
629629
* This function is equivalent to the default Levenshtein distance implementation with the ::gap parameter set
630630
* to one, and the ::subs matrix formed of all ones except for the main diagonal, which is zeros.
631-
* Unlike the default Levenshtein implementaion, this can't be bounded, as the substitution costs can be both positive
631+
* Unlike the default Levenshtein implementation, this can't be bounded, as the substitution costs can be both positive
632632
* and negative, meaning that the distance isn't monotonically growing as we go through the strings.
633633
*
634634
* @param a First string to compare.
@@ -1494,7 +1494,7 @@ SZ_INTERNAL sz_size_t _sz_levenshtein_serial_upto256bytes( //
14941494
sz_size_t bound, sz_memory_allocator_t const *alloc) {
14951495

14961496
// When dealing with short strings, we won't need to allocate memory on heap,
1497-
// as everythin would easily fit on the stack. Let's just make sure that
1497+
// as everything would easily fit on the stack. Let's just make sure that
14981498
// we use the amount proportional to the number of elements in the shorter string,
14991499
// not the larger.
15001500
if (b_length > a_length) return _sz_levenshtein_serial_upto256bytes(b, b_length, a, a_length, bound, alloc);
@@ -2065,14 +2065,14 @@ typedef union sz_u512_vec_t {
20652065
SZ_INTERNAL __mmask64 sz_u64_clamp_mask_until(sz_size_t n) {
20662066
// The simplest approach to compute this if we know that `n` is blow or equal 64:
20672067
// return (1ull << n) - 1;
2068-
// A slighly more complex approach, if we don't know that `n` is under 64:
2068+
// A slightly more complex approach, if we don't know that `n` is under 64:
20692069
return _bzhi_u64(0xFFFFFFFFFFFFFFFF, n < 64 ? n : 64);
20702070
}
20712071

20722072
SZ_INTERNAL __mmask64 sz_u64_mask_until(sz_size_t n) {
20732073
// The simplest approach to compute this if we know that `n` is blow or equal 64:
20742074
// return (1ull << n) - 1;
2075-
// A slighly more complex approach, if we don't know that `n` is under 64:
2075+
// A slightly more complex approach, if we don't know that `n` is under 64:
20762076
return _bzhi_u64(0xFFFFFFFFFFFFFFFF, n);
20772077
}
20782078

@@ -2442,15 +2442,15 @@ SZ_PUBLIC sz_cptr_t sz_find_avx512(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n,
24422442
(sz_find_t)sz_find_2byte_avx512,
24432443
(sz_find_t)sz_find_3byte_avx512,
24442444
(sz_find_t)sz_find_4byte_avx512,
2445-
// For longer needles we use a Two-Way heurstic with a follow-up check in-between.
2445+
// For longer needles we use a Two-Way heuristic with a follow-up check in-between.
24462446
(sz_find_t)sz_find_under66byte_avx512,
24472447
(sz_find_t)sz_find_over66byte_avx512,
24482448
};
24492449

24502450
return backends[
24512451
// For very short strings brute-force SWAR makes sense.
24522452
(n_length > 1) + (n_length > 2) + (n_length > 3) +
2453-
// For longer needles we use a Two-Way heurstic with a follow-up check in-between.
2453+
// For longer needles we use a Two-Way heuristic with a follow-up check in-between.
24542454
(n_length > 4) + (n_length > 66)](h, h_length, n, n_length);
24552455
}
24562456

@@ -2592,15 +2592,15 @@ SZ_PUBLIC sz_cptr_t sz_find_last_avx512(sz_cptr_t h, sz_size_t h_length, sz_cptr
25922592
sz_find_t backends[] = {
25932593
// For very short strings brute-force SWAR makes sense.
25942594
(sz_find_t)sz_find_last_byte_avx512,
2595-
// For longer needles we use a Two-Way heurstic with a follow-up check in-between.
2595+
// For longer needles we use a Two-Way heuristic with a follow-up check in-between.
25962596
(sz_find_t)sz_find_last_under66byte_avx512,
25972597
(sz_find_t)sz_find_last_over66byte_avx512,
25982598
};
25992599

26002600
return backends[
26012601
// For very short strings brute-force SWAR makes sense.
26022602
0 +
2603-
// For longer needles we use a Two-Way heurstic with a follow-up check in-between.
2603+
// For longer needles we use a Two-Way heuristic with a follow-up check in-between.
26042604
(n_length > 1) + (n_length > 66)](h, h_length, n, n_length);
26052605
}
26062606

0 commit comments

Comments
 (0)