Skip to content

Commit

Permalink
Stricter validation for user provided ULID values
Browse files Browse the repository at this point in the history
  • Loading branch information
mdomke committed May 25, 2024
1 parent 76d5740 commit e25f438
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 11 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ Changelog

Versions follow `Semantic Versioning <http://www.semver.org>`_

`2.6.0`_ - 2024-05-26
---------------------
Changed
~~~~~~~
* Provide more sophisticated validation when creating ``ULID``s from user input. When using
``ULID.from_str`` we will check if the characters match the base32 alphabet. In general, it is
ensured that the timestamp part of the ULID is not out of range.

`2.5.0`_ - 2024-04-26
---------------------

Expand Down Expand Up @@ -159,6 +167,7 @@ Changed
* The package now has no external dependencies.
* The test-coverage has been raised to 100%.

.. _2.6.0: https://github.com/mdomke/python-ulid/compare/2.5.0...2.6.0
.. _2.5.0: https://github.com/mdomke/python-ulid/compare/2.4.0...2.5.0
.. _2.4.0: https://github.com/mdomke/python-ulid/compare/2.3.0...2.4.0
.. _2.3.0: https://github.com/mdomke/python-ulid/compare/2.2.0...2.3.0
Expand Down
20 changes: 11 additions & 9 deletions tests/test_ulid.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,15 +149,17 @@ def test_ulid_from_timestamp() -> None:
@pytest.mark.parametrize(
("constructor", "value"),
[
(ULID, b"sdf"),
(ULID.from_timestamp, b"not-a-timestamp"),
(ULID.from_datetime, time.time()),
(ULID.from_bytes, b"not-enough"),
(ULID.from_bytes, 123),
(ULID.from_str, "not-enough"),
(ULID.from_str, 123),
(ULID.from_int, "not-an-int"),
(ULID.from_uuid, "not-a-uuid"),
(ULID, b"sdf"), # invalid length
(ULID.from_timestamp, b"not-a-timestamp"), # invalid type
(ULID.from_datetime, time.time()), # invalid type
(ULID.from_bytes, b"not-enough"), # invalid length
(ULID.from_bytes, 123), # invalid type
(ULID.from_str, "not-enough"), # invalid length
(ULID.from_str, 123), # inavlid type
(ULID.from_str, "notavalidulidnotavalidulid"), # invalid alphabet
(ULID.from_str, "Z" * 26), # invalid timestamp
(ULID.from_int, "not-an-int"), # invalid type
(ULID.from_uuid, "not-a-uuid"), # invalid type
],
)
def test_ulid_invalid_input(constructor: Callable[[Params], ULID], value: Params) -> None:
Expand Down
16 changes: 14 additions & 2 deletions ulid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,26 @@ class ULID:
>>> ulid = ULID()
>>> str(ulid)
'01E75PVKXA3GFABX1M1J9NZZNF'
Args:
value (bytes, None): A sequence of 16 bytes representing an encoded ULID.
validate (bool): If set to `True` validate if the timestamp part is valid.
Raises:
ValueError: If the provided value is not a valid encoded ULID.
"""

def __init__(self, value: bytes | None = None) -> None:
def __init__(self, value: bytes | None = None, validate: bool = True) -> None:
if value is not None and len(value) != constants.BYTES_LEN:
raise ValueError("ULID has to be exactly 16 bytes long.")
self.bytes: bytes = (
value or ULID.from_timestamp(time.time_ns() // constants.NANOSECS_IN_MILLISECS).bytes
)
if value is not None and validate:
try:
self.datetime # noqa: B018
except ValueError as err:
raise ValueError("ULID timestamp is out of range.") from err

@classmethod
@validate_type(datetime)
Expand Down Expand Up @@ -125,7 +137,7 @@ def from_uuid(cls: type[U], value: uuid.UUID) -> U:
>>> ULID.from_uuid(uuid4())
ULID(27Q506DP7E9YNRXA0XVD8Z5YSG)
"""
return cls(value.bytes)
return cls(value.bytes, validate=False)

@classmethod
@validate_type(bytes)
Expand Down
2 changes: 2 additions & 0 deletions ulid/base32.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ def encode_randomness(binary: bytes) -> str:
def decode(encoded: str) -> bytes:
if len(encoded) != constants.REPR_LEN:
raise ValueError("Encoded ULID has to be exactly 26 characters long.")
if any((c not in ENCODE) for c in encoded):
raise ValueError(f"Encoded ULID can only consist of letters in {ENCODE}.")
return decode_timestamp(encoded[: constants.TIMESTAMP_REPR_LEN]) + decode_randomness(
encoded[constants.TIMESTAMP_REPR_LEN :]
)
Expand Down

0 comments on commit e25f438

Please sign in to comment.