Skip to content

Commit

Permalink
Merge pull request #165 from ashvardanian/main-dev
Browse files Browse the repository at this point in the history
Improve: Signed args for `front`/`back`
  • Loading branch information
ashvardanian authored Aug 22, 2024
2 parents a4cd08e + d9c3a43 commit 1b2b9ef
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 21 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ COMMON_FLAGS="-DSTRINGZILLA_BUILD_TEST=1 -DSTRINGZILLA_BUILD_BENCHMARK=1 -DSTRIN
# Compiler specific settings
case "$COMPILER" in
"GCC")
COMPILER_FLAGS="-DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12"
COMPILER_FLAGS="-DCMAKE_CXX_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++"
;;
"LLVM")
COMPILER_FLAGS="-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++"
Expand Down
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
"tparam",
"TPFLAGS",
"unigram",
"unpoison",
"usecases",
"Vardanian",
"vectorcallfunc",
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -816,7 +816,9 @@ When it's enabled, the _~~subjectively~~_ risky overloads from the Standard will
using str = sz::string;
str("a:b").front(1) == "a"; // no checks, unlike `substr`
str("a:b").front(2) == "2"; // take first 2 characters
str("a:b").back(-1) == "b"; // accepting negative indices
str("a:b").back(-2) == ":b"; // similar to Python's `"a:b"[-2:]`
str("a:b").sub(1, -1) == ":"; // similar to Python's `"a:b"[1:-1]`
str("a:b").sub(-2, -1) == ":"; // similar to Python's `"a:b"[-2:-1]`
str("a:b").sub(-2, 1) == ""; // similar to Python's `"a:b"[-2:1]`
Expand Down
34 changes: 19 additions & 15 deletions include/stringzilla/stringzilla.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1242,27 +1242,31 @@ class basic_string_slice {
* @warning The behavior is @b undefined if the position is beyond bounds.
*/
reference sat(difference_type signed_offset) const noexcept {
size_type pos = (signed_offset < 0) ? size() + signed_offset : signed_offset;
size_type pos = static_cast<size_type>(signed_offset < 0 ? size() + signed_offset : signed_offset);
assert(pos < size() && "string_slice::sat(i) out of bounds");
return start_[pos];
}

/**
* @brief The opposite operation to `remove_prefix`, that does no bounds checking.
* @warning The behavior is @b undefined if `n > size()`.
* @brief The slice that would be dropped by `remove_prefix`, that accepts signed arguments
* and does no bounds checking. Equivalent to Python's `"abc"[:2]` and `"abc"[:-1]`.
* @warning The behavior is @b undefined if `n > size() || n < -size() || n == -0`.
*/
string_slice front(size_type n) const noexcept {
assert(n <= size() && "string_slice::front(n) out of bounds");
return {start_, n};
string_slice front(difference_type signed_offset) const noexcept {
size_type pos = static_cast<size_type>(signed_offset < 0 ? size() + signed_offset : signed_offset);
assert(pos <= size() && "string_slice::front(signed_offset) out of bounds");
return {start_, pos};
}

/**
* @brief The opposite operation to `remove_prefix`, that does no bounds checking.
* @warning The behavior is @b undefined if `n > size()`.
* @brief The slice that would be dropped by `remove_suffix`, that accepts signed arguments
* and does no bounds checking. Equivalent to Python's `"abc"[2:]` and `"abc"[-1:]`.
* @warning The behavior is @b undefined if `n > size() || n < -size() || n == -0`.
*/
string_slice back(size_type n) const noexcept {
assert(n <= size() && "string_slice::back(n) out of bounds");
return {start_ + length_ - n, n};
string_slice back(difference_type signed_offset) const noexcept {
size_type pos = static_cast<size_type>(signed_offset < 0 ? size() + signed_offset : signed_offset);
assert(pos <= size() && "string_slice::back(signed_offset) out of bounds");
return {start_ + pos, length_ - pos};
}

/**
Expand Down Expand Up @@ -2186,15 +2190,15 @@ class basic_string {
* @brief The opposite operation to `remove_prefix`, that does no bounds checking.
* @warning The behavior is @b undefined if `n > size()`.
*/
string_view front(size_type n) const noexcept { return view().front(n); }
string_span front(size_type n) noexcept { return span().front(n); }
string_view front(difference_type n) const noexcept { return view().front(n); }
string_span front(difference_type n) noexcept { return span().front(n); }

/**
* @brief The opposite operation to `remove_prefix`, that does no bounds checking.
* @warning The behavior is @b undefined if `n > size()`.
*/
string_view back(size_type n) const noexcept { return view().back(n); }
string_span back(size_type n) noexcept { return span().back(n); }
string_view back(difference_type n) const noexcept { return view().back(n); }
string_span back(difference_type n) noexcept { return span().back(n); }

/**
* @brief Equivalent to Python's `"abc"[-3:-1]`. Exception-safe, unlike STL's `substr`.
Expand Down
31 changes: 26 additions & 5 deletions scripts/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,11 +190,11 @@ static void test_api_readonly() {
assert(str("hello", 4) == "hell"); // Construct from substring

// Element access.
assert(str("test")[0] == 't');
assert(str("test").at(1) == 'e');
assert(str("rest")[0] == 'r');
assert(str("rest").at(1) == 'e');
assert(*str("rest").data() == 'r');
assert(str("front").front() == 'f');
assert(str("back").back() == 'k');
assert(*str("data").data() == 'd');

// Iterators.
assert(*str("begin").begin() == 'b' && *str("cbegin").cbegin() == 'c');
Expand Down Expand Up @@ -567,9 +567,30 @@ template <typename string_type>
static void test_api_readonly_extensions() {
using str = string_type;

// Signed offset lokups and slices.
// Signed offset lookups and slices.
assert(str("hello").sat(0) == 'h');
assert(str("hello").sat(-1) == 'o');
assert(str("rest").sat(1) == 'e');
assert(str("rest").sat(-1) == 't');
assert(str("rest").sat(-4) == 'r');

assert(str("front").front() == 'f');
assert(str("front").front(1) == "f");
assert(str("front").front(2) == "fr");
assert(str("front").front(2) == "fr");
assert(str("front").front(-2) == "fro");
assert(str("front").front(0) == "");
assert(str("front").front(5) == "front");
assert(str("front").front(-5) == "");

assert(str("back").back() == 'k');
assert(str("back").back(1) == "ack");
assert(str("back").back(2) == "ck");
assert(str("back").back(-1) == "k");
assert(str("back").back(-2) == "ck");
assert(str("back").back(-4) == "back");
assert(str("back").back(4) == "");

assert(str("hello").sub(1) == "ello");
assert(str("hello").sub(-1) == "o");
assert(str("hello").sub(1, 2) == "e");
Expand Down Expand Up @@ -1022,7 +1043,7 @@ void test_search_with_misaligned_repetitions(std::string_view haystack_pattern,
std::size_t misalignment) {
constexpr std::size_t max_repeats = 128;

// Allocate a buffer to store the haystack with enough padding to misalign it.
// Allocate a buffer to store the haystack with enough padding to mis-align it.
std::size_t haystack_buffer_length = max_repeats * haystack_pattern.size() + 2 * SZ_CACHE_LINE_WIDTH;
std::vector<char> haystack_buffer(haystack_buffer_length, 'x');
char *haystack = haystack_buffer.data();
Expand Down

0 comments on commit 1b2b9ef

Please sign in to comment.