Skip to content

Commit 585bbe5

Browse files
committed
feat: compile regexes
1 parent 4ddb387 commit 585bbe5

File tree

3 files changed

+11
-4
lines changed

3 files changed

+11
-4
lines changed

doc/changelog.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Changelog
77
Added
88
^^^^^
99
- Support for Python 3.14.
10+
- Compile regexes.
1011

1112
Removed
1213
^^^^^^^

scim2_models/resources/schema.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,12 @@
3535

3636
T = TypeVar("T", bound=BaseModel)
3737

38+
_NON_WORD_OR_LEADING_DIGIT = re.compile(r"\W|^(?=\d)")
39+
3840

3941
def _make_python_identifier(identifier: str) -> str:
4042
"""Sanitize string to be a suitable Python/Pydantic class attribute name."""
41-
sanitized = re.sub(r"\W|^(?=\d)", "", identifier)
43+
sanitized = _NON_WORD_OR_LEADING_DIGIT.sub("", identifier)
4244
if sanitized in RESERVED_WORDS:
4345
sanitized = f"{sanitized}_"
4446

scim2_models/utils.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
# Python 3.9 has no UnionType
2222
UNION_TYPES = [Union]
2323

24+
_UNDERSCORE_ALPHANUMERIC = re.compile(r"_+([0-9A-Za-z]+)")
25+
_NON_WORD_UNDERSCORE = re.compile(r"[\W_]+")
26+
_VALID_PATH_PATTERN = re.compile(r'^[a-zA-Z][a-zA-Z0-9._:\-\[\]"=\s]*$')
27+
2428

2529
def _int_to_str(status: int | None) -> str | None:
2630
return None if status is None else str(status)
@@ -86,7 +90,7 @@ def _to_camel(string: str) -> str:
8690
'$ref' stays '$ref'.
8791
"""
8892
snake = to_snake(string)
89-
camel = re.sub(r"_+([0-9A-Za-z]+)", lambda m: m.group(1).title(), snake)
93+
camel = _UNDERSCORE_ALPHANUMERIC.sub(lambda m: m.group(1).title(), snake)
9094
return camel
9195

9296

@@ -97,7 +101,7 @@ def _normalize_attribute_name(attribute_name: str) -> str:
97101
"""
98102
is_extension_attribute = ":" in attribute_name
99103
if not is_extension_attribute:
100-
attribute_name = re.sub(r"[\W_]+", "", attribute_name)
104+
attribute_name = _NON_WORD_UNDERSCORE.sub("", attribute_name)
101105

102106
return attribute_name.lower()
103107

@@ -121,7 +125,7 @@ def _validate_scim_path_syntax(path: str) -> bool:
121125

122126
# Cannot contain invalid characters (basic check)
123127
# Allow alphanumeric, dots, underscores, hyphens, colons (for URNs), brackets
124-
if not re.match(r'^[a-zA-Z][a-zA-Z0-9._:\-\[\]"=\s]*$', path):
128+
if not _VALID_PATH_PATTERN.match(path):
125129
return False
126130

127131
# If it contains a colon, validate it's a proper URN format

0 commit comments

Comments
 (0)