From e25f43862bc6c95069b251308bec3375303f3c8f Mon Sep 17 00:00:00 2001 From: Martin Domke Date: Sun, 26 May 2024 00:30:04 +0200 Subject: [PATCH] Stricter validation for user provided ULID values --- CHANGELOG.rst | 9 +++++++++ tests/test_ulid.py | 20 +++++++++++--------- ulid/__init__.py | 16 ++++++++++++++-- ulid/base32.py | 2 ++ 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 8fa9f97..8e306c8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,14 @@ Changelog Versions follow `Semantic Versioning `_ +`2.6.0`_ - 2024-05-26 +--------------------- +Changed +~~~~~~~ +* Provide more sophisticated validation when creating ``ULID``s from user input. When using + ``ULID.from_str`` we will check if the characters match the base32 alphabet. In general, it is + ensured that the timestamp part of the ULID is not out of range. + `2.5.0`_ - 2024-04-26 --------------------- @@ -159,6 +167,7 @@ Changed * The package now has no external dependencies. * The test-coverage has been raised to 100%. +.. _2.6.0: https://github.com/mdomke/python-ulid/compare/2.5.0...2.6.0 .. _2.5.0: https://github.com/mdomke/python-ulid/compare/2.4.0...2.5.0 .. _2.4.0: https://github.com/mdomke/python-ulid/compare/2.3.0...2.4.0 .. _2.3.0: https://github.com/mdomke/python-ulid/compare/2.2.0...2.3.0 diff --git a/tests/test_ulid.py b/tests/test_ulid.py index 0d78b4c..d957bb9 100644 --- a/tests/test_ulid.py +++ b/tests/test_ulid.py @@ -149,15 +149,17 @@ def test_ulid_from_timestamp() -> None: @pytest.mark.parametrize( ("constructor", "value"), [ - (ULID, b"sdf"), - (ULID.from_timestamp, b"not-a-timestamp"), - (ULID.from_datetime, time.time()), - (ULID.from_bytes, b"not-enough"), - (ULID.from_bytes, 123), - (ULID.from_str, "not-enough"), - (ULID.from_str, 123), - (ULID.from_int, "not-an-int"), - (ULID.from_uuid, "not-a-uuid"), + (ULID, b"sdf"), # invalid length + (ULID.from_timestamp, b"not-a-timestamp"), # invalid type + (ULID.from_datetime, time.time()), # invalid type + (ULID.from_bytes, b"not-enough"), # invalid length + (ULID.from_bytes, 123), # invalid type + (ULID.from_str, "not-enough"), # invalid length + (ULID.from_str, 123), # inavlid type + (ULID.from_str, "notavalidulidnotavalidulid"), # invalid alphabet + (ULID.from_str, "Z" * 26), # invalid timestamp + (ULID.from_int, "not-an-int"), # invalid type + (ULID.from_uuid, "not-a-uuid"), # invalid type ], ) def test_ulid_invalid_input(constructor: Callable[[Params], ULID], value: Params) -> None: diff --git a/ulid/__init__.py b/ulid/__init__.py index 891d61f..9a4821c 100644 --- a/ulid/__init__.py +++ b/ulid/__init__.py @@ -71,14 +71,26 @@ class ULID: >>> ulid = ULID() >>> str(ulid) '01E75PVKXA3GFABX1M1J9NZZNF' + + Args: + value (bytes, None): A sequence of 16 bytes representing an encoded ULID. + validate (bool): If set to `True` validate if the timestamp part is valid. + + Raises: + ValueError: If the provided value is not a valid encoded ULID. """ - def __init__(self, value: bytes | None = None) -> None: + def __init__(self, value: bytes | None = None, validate: bool = True) -> None: if value is not None and len(value) != constants.BYTES_LEN: raise ValueError("ULID has to be exactly 16 bytes long.") self.bytes: bytes = ( value or ULID.from_timestamp(time.time_ns() // constants.NANOSECS_IN_MILLISECS).bytes ) + if value is not None and validate: + try: + self.datetime # noqa: B018 + except ValueError as err: + raise ValueError("ULID timestamp is out of range.") from err @classmethod @validate_type(datetime) @@ -125,7 +137,7 @@ def from_uuid(cls: type[U], value: uuid.UUID) -> U: >>> ULID.from_uuid(uuid4()) ULID(27Q506DP7E9YNRXA0XVD8Z5YSG) """ - return cls(value.bytes) + return cls(value.bytes, validate=False) @classmethod @validate_type(bytes) diff --git a/ulid/base32.py b/ulid/base32.py index f2e7cbc..54fee70 100644 --- a/ulid/base32.py +++ b/ulid/base32.py @@ -198,6 +198,8 @@ def encode_randomness(binary: bytes) -> str: def decode(encoded: str) -> bytes: if len(encoded) != constants.REPR_LEN: raise ValueError("Encoded ULID has to be exactly 26 characters long.") + if any((c not in ENCODE) for c in encoded): + raise ValueError(f"Encoded ULID can only consist of letters in {ENCODE}.") return decode_timestamp(encoded[: constants.TIMESTAMP_REPR_LEN]) + decode_randomness( encoded[constants.TIMESTAMP_REPR_LEN :] )