Skip to content

Commit

Permalink
Add: Checksums in Python
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Dec 1, 2024
1 parent 9bec0eb commit 1b77de9
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 2 deletions.
35 changes: 34 additions & 1 deletion python/lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,40 @@ static PyObject *Str_like_hash(PyObject *self, PyObject *args, PyObject *kwargs)
}

sz_u64_t result = sz_hash(text.start, text.length);
return PyLong_FromSize_t((size_t)result);
return PyLong_FromUnsignedLongLong((unsigned long long)result);
}

static char const doc_like_checksum[] = //
"Compute the checksum of individual byte values in a string.\n"
"\n"
"This function can be called as a method on a Str object or as a standalone function.\n"
"Args:\n"
" text (Str or str or bytes): The string to hash.\n"
"Returns:\n"
" int: The checksum of individual byte values in a string.\n"
"Raises:\n"
" TypeError: If the argument is not string-like or incorrect number of arguments is provided.";

static PyObject *Str_like_checksum(PyObject *self, PyObject *args, PyObject *kwargs) {
// Check minimum arguments
int is_member = self != NULL && PyObject_TypeCheck(self, &StrType);
Py_ssize_t nargs = PyTuple_Size(args);
if (nargs < !is_member || nargs > !is_member + 1 || kwargs) {
PyErr_SetString(PyExc_TypeError, "checksum() expects exactly one positional argument");
return NULL;
}

PyObject *text_obj = is_member ? self : PyTuple_GET_ITEM(args, 0);
sz_string_view_t text;

// Validate and convert `text`
if (!export_string_like(text_obj, &text.start, &text.length)) {
wrap_current_exception("The text argument must be string-like");
return NULL;
}

sz_u64_t result = sz_checksum(text.start, text.length);
return PyLong_FromUnsignedLongLong((unsigned long long)result);
}

static char const doc_like_equal[] = //
Expand Down
12 changes: 11 additions & 1 deletion scripts/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,14 +767,24 @@ def test_translations(length: int):


@pytest.mark.repeat(3)
@pytest.mark.parametrize("length", range(1, 300))
@pytest.mark.parametrize("length", list(range(0, 300)) + [1024, 4096, 100000])
@pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
def test_translations_random(length: int):
body = get_random_string(length=length)
lut = np.random.randint(0, 256, size=256, dtype=np.uint8)
assert sz.translate(body, memoryview(lut)) == baseline_translate(body, lut)


@pytest.mark.repeat(3)
@pytest.mark.parametrize("length", list(range(0, 300)) + [1024, 4096, 100000])
def test_checksums_random(length: int):
def sum_bytes(body: str) -> int:
return sum([ord(c) for c in body])

body = get_random_string(length=length)
assert sum_bytes(body) == sz.checksum(body)


@pytest.mark.parametrize("list_length", [10, 20, 30, 40, 50])
@pytest.mark.parametrize("part_length", [5, 10])
@pytest.mark.parametrize("variability", [2, 3])
Expand Down

0 comments on commit 1b77de9

Please sign in to comment.