From 1b77de9e0051c199153a49eb210c9c65c7eb523a Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sun, 1 Dec 2024 09:55:04 +0000 Subject: [PATCH] Add: Checksums in Python --- python/lib.c | 35 ++++++++++++++++++++++++++++++++++- scripts/test.py | 12 +++++++++++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/python/lib.c b/python/lib.c index b6f57646..ba435fdb 100644 --- a/python/lib.c +++ b/python/lib.c @@ -714,7 +714,40 @@ static PyObject *Str_like_hash(PyObject *self, PyObject *args, PyObject *kwargs) } sz_u64_t result = sz_hash(text.start, text.length); - return PyLong_FromSize_t((size_t)result); + return PyLong_FromUnsignedLongLong((unsigned long long)result); +} + +static char const doc_like_checksum[] = // + "Compute the checksum of individual byte values in a string.\n" + "\n" + "This function can be called as a method on a Str object or as a standalone function.\n" + "Args:\n" + " text (Str or str or bytes): The string to hash.\n" + "Returns:\n" + " int: The checksum of individual byte values in a string.\n" + "Raises:\n" + " TypeError: If the argument is not string-like or incorrect number of arguments is provided."; + +static PyObject *Str_like_checksum(PyObject *self, PyObject *args, PyObject *kwargs) { + // Check minimum arguments + int is_member = self != NULL && PyObject_TypeCheck(self, &StrType); + Py_ssize_t nargs = PyTuple_Size(args); + if (nargs < !is_member || nargs > !is_member + 1 || kwargs) { + PyErr_SetString(PyExc_TypeError, "checksum() expects exactly one positional argument"); + return NULL; + } + + PyObject *text_obj = is_member ? self : PyTuple_GET_ITEM(args, 0); + sz_string_view_t text; + + // Validate and convert `text` + if (!export_string_like(text_obj, &text.start, &text.length)) { + wrap_current_exception("The text argument must be string-like"); + return NULL; + } + + sz_u64_t result = sz_checksum(text.start, text.length); + return PyLong_FromUnsignedLongLong((unsigned long long)result); } static char const doc_like_equal[] = // diff --git a/scripts/test.py b/scripts/test.py index 82f13376..93a01706 100644 --- a/scripts/test.py +++ b/scripts/test.py @@ -767,7 +767,7 @@ def test_translations(length: int): @pytest.mark.repeat(3) -@pytest.mark.parametrize("length", range(1, 300)) +@pytest.mark.parametrize("length", list(range(0, 300)) + [1024, 4096, 100000]) @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed") def test_translations_random(length: int): body = get_random_string(length=length) @@ -775,6 +775,16 @@ def test_translations_random(length: int): assert sz.translate(body, memoryview(lut)) == baseline_translate(body, lut) +@pytest.mark.repeat(3) +@pytest.mark.parametrize("length", list(range(0, 300)) + [1024, 4096, 100000]) +def test_checksums_random(length: int): + def sum_bytes(body: str) -> int: + return sum([ord(c) for c in body]) + + body = get_random_string(length=length) + assert sum_bytes(body) == sz.checksum(body) + + @pytest.mark.parametrize("list_length", [10, 20, 30, 40, 50]) @pytest.mark.parametrize("part_length", [5, 10]) @pytest.mark.parametrize("variability", [2, 3])