diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml
index e9abbc34..0213f679 100644
--- a/.github/workflows/prerelease.yml
+++ b/.github/workflows/prerelease.yml
@@ -262,7 +262,43 @@ jobs:
     runs-on: windows-latest
     steps:
       - uses: actions/checkout@v4
+      - uses: ilammy/msvc-dev-cmd@v1
 
+      - name: Build C/C++
+        shell: cmd
+        run: |
+          cmake -GNinja -B build_artifacts ^
+            -DCMAKE_BUILD_TYPE=RelWithDebInfo ^
+            -DCMAKE_EXPORT_COMPILE_COMMANDS=1 ^
+            -DSTRINGZILLA_BUILD_BENCHMARK=1 ^
+            -DSTRINGZILLA_BUILD_TEST=1
+          
+          cmake --build build_artifacts --config RelWithDebInfo > build_artifacts/logs.txt 2>&1 || (
+            echo "Compilation failed. Here are the logs:"
+            type build_artifacts\logs.txt
+            echo "The original compilation commands:"
+            type build_artifacts\compile_commands.json
+            echo:
+            echo "CPU Features:"
+            wmic cpu list /format:list
+            exit 1
+          )
+      - name: Test C++
+        run: .\build_artifacts\stringzilla_test_cpp20.exe
+      - name: Test on Real World Data
+        run: |
+          .\build_artifacts\stringzilla_bench_search.exe ${DATASET_PATH}     # for substring search
+          .\build_artifacts\stringzilla_bench_token.exe ${DATASET_PATH}      # for hashing, equality comparisons, etc.
+          .\build_artifacts\stringzilla_bench_similarity.exe ${DATASET_PATH} # for edit distances and alignment scores
+          .\build_artifacts\stringzilla_bench_sort.exe ${DATASET_PATH}       # for sorting arrays of strings
+          .\build_artifacts\stringzilla_bench_container.exe ${DATASET_PATH}  # for STL containers with string keys
+        env:
+          DATASET_PATH: ./README.md
+        # Don't overload GitHub with our benchmarks.
+        # The results in such an unstable environment will be meaningless anyway.
+        if: 0
+        
+        # Python
       - name: Set up Python ${{ env.PYTHON_VERSION }}
         uses: actions/setup-python@v5
         with:
diff --git a/.gitignore b/.gitignore
index 412c78b6..6fd5cd1b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 build/
 build_debug/
 build_release/
+build_artifacts*
 
 # Yes, everyone loves keeping this file in the history.
 # But with a very minimalistic binding and just a couple of dependencies 
@@ -27,6 +28,7 @@ CMakeFiles
 *.pyd
 .venv/*
 node_modules/
+.vs/
 
 # Recommended datasets
 leipzig1M.txt
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 63d0f17a..e7194e7a 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -252,7 +252,8 @@
     "xmemory": "cpp",
     "xtr1common": "cpp",
     "xtree": "cpp",
-    "xutility": "cpp"
+    "xutility": "cpp",
+    "errno.h": "c"
   },
   "python.pythonPath": "~/miniconda3/bin/python"
 }
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3a52a1ef..153ed226 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -239,9 +239,15 @@ if(${STRINGZILLA_BUILD_TEST})
   # compile multiple backends: disabling all SIMD, enabling only AVX2, only AVX-512, only Arm Neon.
   if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64|amd64")
     # x86 specific backends
-    define_launcher(stringzilla_test_cpp20_x86_serial scripts/test.cpp 20 "ivybridge")
-    define_launcher(stringzilla_test_cpp20_x86_avx2 scripts/test.cpp 20 "haswell")
-    define_launcher(stringzilla_test_cpp20_x86_avx512 scripts/test.cpp 20 "sapphirerapids")
+    if (MSVC)
+      define_launcher(stringzilla_test_cpp20_x86_serial scripts/test.cpp 20 "AVX")
+      define_launcher(stringzilla_test_cpp20_x86_avx2 scripts/test.cpp 20 "AVX2")
+      define_launcher(stringzilla_test_cpp20_x86_avx512 scripts/test.cpp 20 "AVX512")
+    else()
+      define_launcher(stringzilla_test_cpp20_x86_serial scripts/test.cpp 20 "ivybridge")
+      define_launcher(stringzilla_test_cpp20_x86_avx2 scripts/test.cpp 20 "haswell")
+      define_launcher(stringzilla_test_cpp20_x86_avx512 scripts/test.cpp 20 "sapphirerapids")
+    endif()
   elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
     # ARM specific backends
     define_launcher(stringzilla_test_cpp20_arm_serial scripts/test.cpp 20 "armv8-a")
diff --git a/README.md b/README.md
index e6592fff..4bafcd1e 100644
--- a/README.md
+++ b/README.md
@@ -309,14 +309,14 @@ Consider contributing, if you need a feature that's not yet implemented.
 
 ### Basic Usage
 
-If you've ever used the Python `str` or `bytes` class, you'll know what to expect.
+If you've ever used the Python `str`, `bytes`, `bytearray`, `memoryview` class, you'll know what to expect.
 StringZilla's `Str` class is a hybrid of those two, providing `str`-like interface to byte-arrays.
 
 ```python
 from stringzilla import Str, File
 
-text_from_str = Str('some-string')
-text_from_file = Str(File('some-file.txt'))
+text_from_str = Str('some-string') # no copies, just a view
+text_from_file = Str(File('some-file.txt')) # memory-mapped file
 ```
 
 The `File` class memory-maps a file from persistent memory without loading its copy into RAM.
@@ -328,18 +328,23 @@ A standard dataset pre-processing use case would be to map a sizeable textual da
 - Length: `len(text) -> int`
 - Indexing: `text[42] -> str`
 - Slicing: `text[42:46] -> Str`
-- String conversion: `str(text) -> str`
 - Substring check: `'substring' in text -> bool`
 - Hashing: `hash(text) -> int`
+- String conversion: `str(text) -> str`
 
 ### Advanced Operations
 
-- `text.contains('substring', start=0, end=9223372036854775807) -> bool`
-- `text.find('substring', start=0, end=9223372036854775807) -> int`
-- `text.count('substring', start=0, end=9223372036854775807, allowoverlap=False) -> int`
-- `text.split(separator=' ', maxsplit=9223372036854775807, keepseparator=False) -> Strs`
-- `text.rsplit(separator=' ', maxsplit=9223372036854775807, keepseparator=False) -> Strs`
-- `text.splitlines(keeplinebreaks=False, maxsplit=9223372036854775807) -> Strs`
+```py
+import sys
+
+x: bool = text.contains('substring', start=0, end=sys.maxsize)
+x: int = text.find('substring', start=0, end=sys.maxsize)
+x: int = text.count('substring', start=0, end=sys.maxsize, allowoverlap=False)
+x: str = text.decode(encoding='utf-8', errors='strict')
+x: Strs = text.split(separator=' ', maxsplit=sys.maxsize, keepseparator=False)
+x: Strs = text.rsplit(separator=' ', maxsplit=sys.maxsize, keepseparator=False)
+x: Strs = text.splitlines(keeplinebreaks=False, maxsplit=sys.maxsize)
+```
 
 It's important to note, that the last function behavior is slightly different from Python's `str.splitlines`.
 The [native version][faq-splitlines] matches `\n`, `\r`, `\v` or `\x0b`, `\f` or `\x0c`, `\x1c`, `\x1d`, `\x1e`, `\x85`, `\r\n`, `\u2028`, `\u2029`, including 3x two-bytes-long runes.
@@ -353,15 +358,14 @@ Python strings don't natively support character set operations.
 This forces people to use regular expressions, which are slow and hard to read.
 To avoid the need for `re.finditer`, StringZilla provides the following interfaces:
 
-- `text.find_first_of('chars', start=0, end=9223372036854775807) -> int`
-- `text.find_last_of('chars', start=0, end=9223372036854775807) -> int`
-- `text.find_first_not_of('chars', start=0, end=9223372036854775807) -> int`
-- `text.find_last_not_of('chars', start=0, end=9223372036854775807) -> int`
-
-Similarly, for splitting operations:
-
-- `text.split_charset(separator='chars', maxsplit=9223372036854775807, keepseparator=False) -> Strs`
-- `text.rsplit_charset(separator='chars', maxsplit=9223372036854775807, keepseparator=False) -> Strs`
+```py
+x: int = text.find_first_of('chars', start=0, end=sys.maxsize)
+x: int = text.find_last_of('chars', start=0, end=sys.maxsize)
+x: int = text.find_first_not_of('chars', start=0, end=sys.maxsize)
+x: int = text.find_last_not_of('chars', start=0, end=sys.maxsize)
+x: Strs = text.split_charset(separator='chars', maxsplit=sys.maxsize, keepseparator=False)
+x: Strs = text.rsplit_charset(separator='chars', maxsplit=sys.maxsize, keepseparator=False)
+```
 
 ### Collection-Level Operations
 
@@ -420,9 +424,9 @@ Assuming StringZilla CPython bindings are implemented [without any intermediate
 ```py
 import stringzilla as sz
 
-contains: bool = sz.contains("haystack", "needle", start=0, end=9223372036854775807)
-offset: int = sz.find("haystack", "needle", start=0, end=9223372036854775807)
-count: int = sz.count("haystack", "needle", start=0, end=9223372036854775807, allowoverlap=False)
+contains: bool = sz.contains("haystack", "needle", start=0, end=sys.maxsize)
+offset: int = sz.find("haystack", "needle", start=0, end=sys.maxsize)
+count: int = sz.count("haystack", "needle", start=0, end=sys.maxsize, allowoverlap=False)
 ```
 
 ### Edit Distances
@@ -515,6 +519,20 @@ next_doc_offset = next_doc.offset_within(web_archieve)
 web_archieve.write_to("next_doc.html")
 ```
 
+#### PyArrow
+
+A `Str` is easy to cast to [PyArrow](https://arrow.apache.org/docs/python/arrays.html#string-and-binary-types) buffers.
+
+```py
+from pyarrow as foreign_buffer
+from stringzilla import Str
+
+original = "hello"
+view = Str(native)
+arrow = foreign_buffer(view.address, view.nbytes, view)
+```
+
+That means you can convert `Str` to `pyarrow.Buffer` and `Strs` to `pyarrow.Array` without extra copies.
 
 ## Quick Start: C/C++ 🛠️
 
@@ -1369,13 +1387,16 @@ Another one is the [Fibonacci hash trick](https://probablydance.com/2018/06/16/f
 
 ### Unicode, UTF-8, and Wide Characters
 
-StringZilla does not __yet__ implement any Unicode-specific algorithms.
-The content is addressed at byte-level, and the string is assumed to be encoded in UTF-8 or extended ASCII.
-Refer to [simdutf](https://github.com/simdutf/simdutf) for fast conversions and [icu](https://github.com/unicode-org/icu) for character metadata.
+Most StringZilla operations are byte-level, so they work well with ASCII and UTF8 content out of the box.
+In some cases, like edit-distance computation, the result of byte-level evaluation and character-level evaluation may differ.
+So StringZilla provides following functions to work with Unicode:
+
+- `sz_edit_distance_utf8` - computes the Levenshtein distance between two UTF-8 strings.
+- `sz_hamming_distance_utf8` - computes the Hamming distance between two UTF-8 strings.
 
-This may introduce frictions, when binding to some programming languages.
-Namely, Java, JavaScript, Python 2, C#, and Objective-C use wide characters (`wchar`) - two byte long codes.
+Java, JavaScript, Python 2, C#, and Objective-C, however, use wide characters (`wchar`) - two byte long codes, instead of the more reasonable fixed-length UTF32 or variable-length UTF8.
 This leads [to all kinds of offset-counting issues][wide-char-offsets] when facing four-byte long Unicode characters.
+So consider transcoding with [simdutf](https://github.com/simdutf/simdutf), if you are coming from such environments.
 
 [wide-char-offsets]: https://josephg.com/blog/string-length-lies/
 
diff --git a/include/stringzilla/stringzilla.h b/include/stringzilla/stringzilla.h
index fe55692a..ef066513 100644
--- a/include/stringzilla/stringzilla.h
+++ b/include/stringzilla/stringzilla.h
@@ -4507,10 +4507,10 @@ SZ_PUBLIC void sz_hashes_avx512(sz_cptr_t start, sz_size_t length, sz_size_t win
         chars_vec.zmm = _mm512_add_epi8(chars_vec.zmm, shift_vec.zmm);
 
         // ... and prefetch the next four characters into Level 2 or higher.
-        _mm_prefetch(text_fourth + 1, _MM_HINT_T1);
-        _mm_prefetch(text_third + 1, _MM_HINT_T1);
-        _mm_prefetch(text_second + 1, _MM_HINT_T1);
-        _mm_prefetch(text_first + 1, _MM_HINT_T1);
+        _mm_prefetch((sz_cptr_t)text_fourth + 1, _MM_HINT_T1);
+        _mm_prefetch((sz_cptr_t)text_third + 1, _MM_HINT_T1);
+        _mm_prefetch((sz_cptr_t)text_second + 1, _MM_HINT_T1);
+        _mm_prefetch((sz_cptr_t)text_first + 1, _MM_HINT_T1);
 
         // 3. Add the incoming characters.
         hash_vec.zmm = _mm512_add_epi64(hash_vec.zmm, chars_vec.zmm);
diff --git a/include/stringzilla/stringzilla.hpp b/include/stringzilla/stringzilla.hpp
index daf47d50..e64ebc44 100644
--- a/include/stringzilla/stringzilla.hpp
+++ b/include/stringzilla/stringzilla.hpp
@@ -458,8 +458,9 @@ class range_matches {
             return temp;
         }
 
-        bool operator!=(iterator const &other) const noexcept { return remaining_.begin() != other.remaining_.begin(); }
-        bool operator==(iterator const &other) const noexcept { return remaining_.begin() == other.remaining_.begin(); }
+        // Assumes both iterators point to the same underlying string.
+        bool operator!=(iterator const &other) const noexcept { return remaining_.data() != other.remaining_.data(); }
+        bool operator==(iterator const &other) const noexcept { return remaining_.data() == other.remaining_.data(); }
         bool operator!=(end_sentinel_type) const noexcept { return !remaining_.empty(); }
         bool operator==(end_sentinel_type) const noexcept { return remaining_.empty(); }
     };
@@ -550,8 +551,14 @@ class range_rmatches {
             return temp;
         }
 
-        bool operator!=(iterator const &other) const noexcept { return remaining_.end() != other.remaining_.end(); }
-        bool operator==(iterator const &other) const noexcept { return remaining_.end() == other.remaining_.end(); }
+        // Assumes both iterators point to the same underlying string.
+        // This has to be `.data() + .size()`, to be compatible with `std::string_view` on MSVC.
+        bool operator!=(iterator const &other) const noexcept {
+            return remaining_.data() + remaining_.size() != other.remaining_.data() + other.remaining_.size();
+        }
+        bool operator==(iterator const &other) const noexcept {
+            return remaining_.data() + remaining_.size() == other.remaining_.data() + other.remaining_.size();
+        }
         bool operator!=(end_sentinel_type) const noexcept { return !remaining_.empty(); }
         bool operator==(end_sentinel_type) const noexcept { return remaining_.empty(); }
     };
diff --git a/python/lib.c b/python/lib.c
index d4b74e84..75acfad6 100644
--- a/python/lib.c
+++ b/python/lib.c
@@ -35,6 +35,7 @@ typedef SSIZE_T ssize_t;
 
 #include <Python.h> // Core CPython interfaces
 
+#include <errno.h>  // `errno`
 #include <stdio.h>  // `fopen`
 #include <stdlib.h> // `rand`, `srand`
 #include <string.h> // `memset`, `memcpy`
@@ -78,7 +79,7 @@ typedef struct {
  *      - Str() # Empty string
  *      - Str("some-string") # Full-range slice of a Python `str`
  *      - Str(File("some-path.txt")) # Full-range view of a persisted file
- *      - Str(File("some-path.txt"), from=0, to=sys.maxint)
+ *      - Str(File("some-path.txt"), from=0, to=sys.maxsize)
  */
 typedef struct {
     PyObject ob_base;
@@ -441,9 +442,18 @@ static int File_init(File *self, PyObject *positional_args, PyObject *named_args
     if (!PyArg_ParseTuple(positional_args, "s", &path)) return -1;
 
 #if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__)
+    DWORD path_attributes = GetFileAttributes(path);
+    if (path_attributes == INVALID_FILE_ATTRIBUTES) {
+        PyErr_SetString(PyExc_OSError, "Couldn't get file attributes!");
+        return -1;
+    }
+    if (path_attributes & FILE_ATTRIBUTE_DIRECTORY) {
+        PyErr_SetString(PyExc_ValueError, "The provided path is a directory, not a normal file!");
+        return -1;
+    }
     self->file_handle = CreateFile(path, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
     if (self->file_handle == INVALID_HANDLE_VALUE) {
-        PyErr_SetString(PyExc_RuntimeError, "Couldn't map the file!");
+        PyErr_SetString(PyExc_OSError, "Couldn't map the file!");
         return -1;
     }
 
@@ -451,7 +461,7 @@ static int File_init(File *self, PyObject *positional_args, PyObject *named_args
     if (self->mapping_handle == 0) {
         CloseHandle(self->file_handle);
         self->file_handle = NULL;
-        PyErr_SetString(PyExc_RuntimeError, "Couldn't map the file!");
+        PyErr_SetString(PyExc_OSError, "Couldn't map the file!");
         return -1;
     }
 
@@ -461,18 +471,31 @@ static int File_init(File *self, PyObject *positional_args, PyObject *named_args
         self->mapping_handle = NULL;
         CloseHandle(self->file_handle);
         self->file_handle = NULL;
-        PyErr_SetString(PyExc_RuntimeError, "Couldn't map the file!");
+        PyErr_SetString(PyExc_OSError, "Couldn't map the file!");
         return -1;
     }
     self->start = file;
     self->length = GetFileSize(self->file_handle, 0);
 #else
-    struct stat sb;
     self->file_descriptor = open(path, O_RDONLY);
+    if (self->file_descriptor == -1) {
+        PyErr_Format(PyExc_OSError, "Couldn't open the file at '%s': %s", path, strerror(errno));
+        return -1;
+    }
+    // No permissions are required on the file itself to get it's properties from the existing descriptor.
+    // https://linux.die.net/man/2/fstat
+    struct stat sb;
     if (fstat(self->file_descriptor, &sb) != 0) {
         close(self->file_descriptor);
         self->file_descriptor = 0;
-        PyErr_SetString(PyExc_RuntimeError, "Can't retrieve file size!");
+        PyErr_Format(PyExc_OSError, "Can't retrieve file size at '%s': %s", path, strerror(errno));
+        return -1;
+    }
+    // Check if it's a regular file
+    if (!S_ISREG(sb.st_mode)) {
+        close(self->file_descriptor);
+        self->file_descriptor = 0;
+        PyErr_Format(PyExc_ValueError, "The provided path is not a normal file at '%s'", path);
         return -1;
     }
     size_t file_size = sb.st_size;
@@ -480,7 +503,7 @@ static int File_init(File *self, PyObject *positional_args, PyObject *named_args
     if (map == MAP_FAILED) {
         close(self->file_descriptor);
         self->file_descriptor = 0;
-        PyErr_SetString(PyExc_RuntimeError, "Couldn't map the file!");
+        PyErr_Format(PyExc_OSError, "Couldn't map the file at '%s': %s", path, strerror(errno));
         return -1;
     }
     self->start = map;
@@ -1162,6 +1185,48 @@ static PyObject *Strs_richcompare(PyObject *self, PyObject *other, int op) {
     }
 }
 
+static PyObject *Str_decode(PyObject *self, PyObject *args, PyObject *kwargs) {
+    int is_member = self != NULL && PyObject_TypeCheck(self, &StrType);
+    Py_ssize_t nargs = PyTuple_Size(args);
+    if (nargs < !is_member || nargs > !is_member + 2) {
+        PyErr_Format(PyExc_TypeError, "Invalid number of arguments");
+        return NULL;
+    }
+
+    PyObject *text_obj = is_member ? self : PyTuple_GET_ITEM(args, 0);
+    PyObject *encoding_obj = nargs > !is_member + 0 ? PyTuple_GET_ITEM(args, !is_member + 0) : NULL;
+    PyObject *errors_obj = nargs > !is_member + 1 ? PyTuple_GET_ITEM(args, !is_member + 1) : NULL;
+
+    if (kwargs) {
+        Py_ssize_t pos = 0;
+        PyObject *key, *value;
+        while (PyDict_Next(kwargs, &pos, &key, &value))
+            if (PyUnicode_CompareWithASCIIString(key, "encoding") == 0) { encoding_obj = value; }
+            else if (PyUnicode_CompareWithASCIIString(key, "errors") == 0) { errors_obj = value; }
+            else if (PyErr_Format(PyExc_TypeError, "Got an unexpected keyword argument '%U'", key))
+                return NULL;
+    }
+
+    // Convert `encoding` and `errors` to `NULL` if they are `None`
+    if (encoding_obj == Py_None) encoding_obj = NULL;
+    if (errors_obj == Py_None) errors_obj = NULL;
+
+    sz_string_view_t text, encoding, errors;
+    if ((!export_string_like(text_obj, &text.start, &text.length)) ||
+        (encoding_obj && !export_string_like(encoding_obj, &encoding.start, &encoding.length)) ||
+        (errors_obj && !export_string_like(errors_obj, &errors.start, &errors.length))) {
+        PyErr_Format(PyExc_TypeError, "text, encoding, and errors must be string-like");
+        return NULL;
+    }
+
+    if (encoding_obj == NULL) encoding = (sz_string_view_t) {"utf-8", 5};
+    if (errors_obj == NULL) errors = (sz_string_view_t) {"strict", 6};
+
+    // Python docs: https://docs.python.org/3/library/stdtypes.html#bytes.decode
+    // CPython docs: https://docs.python.org/3/c-api/unicode.html#c.PyUnicode_Decode
+    return PyUnicode_Decode(text.start, text.length, encoding.start, errors.start);
+}
+
 /**
  *  @brief  Saves a StringZilla string to disk.
  */
@@ -2335,12 +2400,13 @@ static PyGetSetDef Str_getsetters[] = {
 #define SZ_METHOD_FLAGS METH_VARARGS | METH_KEYWORDS
 
 static PyMethodDef Str_methods[] = {
-    // Basic `str`-like functionality
+    // Basic `str`, `bytes`, and `bytearray`-like functionality
     {"contains", Str_contains, SZ_METHOD_FLAGS, "Check if a string contains a substring."},
     {"count", Str_count, SZ_METHOD_FLAGS, "Count the occurrences of a substring."},
     {"splitlines", Str_splitlines, SZ_METHOD_FLAGS, "Split a string by line breaks."},
     {"startswith", Str_startswith, SZ_METHOD_FLAGS, "Check if a string starts with a given prefix."},
     {"endswith", Str_endswith, SZ_METHOD_FLAGS, "Check if a string ends with a given suffix."},
+    {"decode", Str_decode, SZ_METHOD_FLAGS, "Decode the bytes into `str` with a given encoding"},
 
     // Bidirectional operations
     {"find", Str_find, SZ_METHOD_FLAGS, "Find the first occurrence of a substring."},
@@ -2888,12 +2954,13 @@ static void stringzilla_cleanup(PyObject *m) {
 }
 
 static PyMethodDef stringzilla_methods[] = {
-    // Basic `str`-like functionality
+    // Basic `str`, `bytes`, and `bytearray`-like functionality
     {"contains", Str_contains, SZ_METHOD_FLAGS, "Check if a string contains a substring."},
     {"count", Str_count, SZ_METHOD_FLAGS, "Count the occurrences of a substring."},
     {"splitlines", Str_splitlines, SZ_METHOD_FLAGS, "Split a string by line breaks."},
     {"startswith", Str_startswith, SZ_METHOD_FLAGS, "Check if a string starts with a given prefix."},
     {"endswith", Str_endswith, SZ_METHOD_FLAGS, "Check if a string ends with a given suffix."},
+    {"decode", Str_decode, SZ_METHOD_FLAGS, "Decode the bytes into `str` with a given encoding"},
 
     // Bidirectional operations
     {"find", Str_find, SZ_METHOD_FLAGS, "Find the first occurrence of a substring."},
diff --git a/scripts/test.cpp b/scripts/test.cpp
index 80f43442..bc730d65 100644
--- a/scripts/test.cpp
+++ b/scripts/test.cpp
@@ -1,4 +1,10 @@
 #undef NDEBUG      // Enable all assertions
+
+// Enable assertions for iterators
+#if !defined(_ITERATOR_DEBUG_LEVEL) || _ITERATOR_DEBUG_LEVEL == 0
+#define _ITERATOR_DEBUG_LEVEL 1
+#endif
+
 #include <cassert> // assertions
 
 // Overload the following with caution.
diff --git a/scripts/test.py b/scripts/test.py
index 2c180cbc..735fbfd2 100644
--- a/scripts/test.py
+++ b/scripts/test.py
@@ -378,9 +378,38 @@ def test_unit_globals():
     assert sz.edit_distance("abababab", "aaaaaaaa", bound=2) == 2
 
 
-def test_unit_len():
-    w = sz.Str("abcd")
-    assert 4 == len(w)
+def test_string_lengths():
+    assert 4 == len(sz.Str("abcd"))
+    assert 8 == len(sz.Str("αβγδ"))
+
+
+@pytest.mark.parametrize(
+    "byte_string, encoding, expected",
+    [
+        (b"hello world", "utf-8", "hello world"),
+        (b"\xf0\x9f\x98\x81", "utf-8", "😁"),  # Emoji
+        (b"hello world", "ascii", "hello world"),
+        (b"\xf0hello world", "latin-1", "ðhello world"),
+        (b"", "utf-8", ""),  # Empty string case
+    ],
+)
+def test_decoding_valid_strings(byte_string, encoding, expected):
+    assert byte_string.decode(encoding) == expected
+    assert sz.Str(byte_string).decode(encoding) == expected
+
+
+@pytest.mark.parametrize(
+    "byte_string, encoding",
+    [
+        (b"\xff", "utf-8"),  # Invalid UTF-8 byte
+        (b"\x80hello", "ascii"),  # Non-ASCII byte in ASCII string
+    ],
+)
+def test_decoding_exceptions(byte_string, encoding):
+    with pytest.raises(UnicodeDecodeError):
+        byte_string.decode(encoding)
+    with pytest.raises(UnicodeDecodeError):
+        sz.Str(byte_string).decode(encoding)
 
 
 def test_slice_of_split():