Skip to content

Commit

Permalink
Add: Benchmarks notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Oct 10, 2023
1 parent c8a5b14 commit bcaf791
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 35 deletions.
26 changes: 6 additions & 20 deletions scripts/bench.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"152 ms ± 3.24 ms per loop (mean ± std. dev. of 100 runs, 1 loop each)\n"
"152 ms ± 2.43 ms per loop (mean ± std. dev. of 100 runs, 1 loop each)\n"
]
}
],
Expand All @@ -106,7 +106,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"38.1 ms ± 312 µs per loop (mean ± std. dev. of 100 runs, 1 loop each)\n"
"37.7 ms ± 341 µs per loop (mean ± std. dev. of 100 runs, 1 loop each)\n"
]
}
],
Expand All @@ -124,8 +124,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"The slowest run took 7.28 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
"186 ns ± 41.1 ns per loop (mean ± std. dev. of 1000 runs, 1 loop each)\n"
"The slowest run took 8.67 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
"182 ns ± 35 ns per loop (mean ± std. dev. of 1000 runs, 1 loop each)\n"
]
}
],
Expand All @@ -143,29 +143,15 @@
"name": "stdout",
"output_type": "stream",
"text": [
"The slowest run took 120.95 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
"99.6 ns ± 155 ns per loop (mean ± std. dev. of 1000 runs, 1 loop each)\n"
"The slowest run took 40.69 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
"90 ns ± 53.2 ns per loop (mean ± std. dev. of 1000 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit -n 1 -r 1000\n",
"sz_str.find(pattern)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
28 changes: 13 additions & 15 deletions stringzilla/stringzilla.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ extern "C" {
#endif

/**
* @brief Analogous to `size_t` and `std::size_t`, unsigned integer, identical to pointer size.
* @brief Analogous to `sz_size_t` and `std::sz_size_t`, unsigned integer, identical to pointer size.
* 64-bit on most platforms where pointers are 64-bit.
* 32-bit on platforms where pointers are 32-bit.
*/
Expand Down Expand Up @@ -490,23 +490,21 @@ inline static sz_string_start_t sz_find_substring_avx2(sz_string_start_t const h
int matches3 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(texts3, anomalies));

if (matches0 | matches1 | matches2 | matches3) {
int matches = //
(matches0 & 0x1111'1111u) | //
(matches1 & 0x2222'2222u) | //
(matches2 & 0x4444'4444u) | //
(matches3 & 0x8888'8888u);
size_t first_match_offset = _tzcnt_u32(matches);
int matches = //
(matches0 & 0x11111111u) | //
(matches1 & 0x22222222u) | //
(matches2 & 0x44444444u) | //
(matches3 & 0x88888888u);
sz_size_t first_match_offset = _tzcnt_u32(matches);
if (needle_length > 4) {
if (sz_equal(text + first_match_offset + 4, needle + 4, needle_length - 4))
if (sz_equal(text + first_match_offset + 4, needle + 4, needle_length - 4)) {
return text + first_match_offset;
else
text += first_match_offset + 1;
}
else { text += first_match_offset + 1; }
}
else
return text + first_match_offset;
else { return text + first_match_offset; }
}
else
text += 32;
else { text += 32; }
}

// Don't forget the last (up to 35) characters.
Expand Down Expand Up @@ -566,7 +564,7 @@ inline static sz_string_start_t sz_find_substring_neon(sz_string_start_t const h
(vget_lane_u16(matches_u16x4, 3) << 12);

// Find the first match
size_t first_match_offset = __builtin_ctz(matches_u16);
sz_size_t first_match_offset = __builtin_ctz(matches_u16);
if (needle_length > 4) {
if (sz_equal(text + first_match_offset + 4, needle + 4, needle_length - 4))
return text + first_match_offset;
Expand Down

0 comments on commit bcaf791

Please sign in to comment.