From f095d5fd621343b11aaea5f13a307d485bad790f Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Thu, 18 Sep 2025 18:12:59 +0000 Subject: [PATCH 01/13] feat: optimize equality check with string literals --- mypyc/irbuild/ll_builder.py | 16 +++++++++++++++- mypyc/lib-rt/str_ops.c | 29 +++++++++++++++++++++-------- mypyc/primitives/str_ops.py | 8 ++++++++ 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/mypyc/irbuild/ll_builder.py b/mypyc/irbuild/ll_builder.py index 4b85c13892c1..e290b7c91adb 100644 --- a/mypyc/irbuild/ll_builder.py +++ b/mypyc/irbuild/ll_builder.py @@ -185,6 +185,7 @@ from mypyc.primitives.str_ops import ( str_check_if_true, str_eq, + str_eq_literal, str_ssize_t_size_op, unicode_compare, ) @@ -1551,9 +1552,22 @@ def check_tagged_short_int(self, val: Value, line: int, negated: bool = False) - def compare_strings(self, lhs: Value, rhs: Value, op: str, line: int) -> Value: """Compare two strings""" if op == "==": + if isinstance(lhs, LoadLiteral) and is_str_rprimitive(lhs.type): + literal_length = Integer(len(lhs.value), c_pyssize_t_rprimitive, line) + return self.primitive_op(str_eq_literal, [rhs, lhs, literal_length], line) + elif isinstance(rhs, LoadLiteral) and is_str_rprimitive(rhs.type): + literal_length = Integer(len(rhs.value), c_pyssize_t_rprimitive, line) + return self.primitive_op(str_eq_literal, [lhs, rhs, literal_length], line) return self.primitive_op(str_eq, [lhs, rhs], line) elif op == "!=": - eq = self.primitive_op(str_eq, [lhs, rhs], line) + if isinstance(lhs, LoadLiteral) and is_str_rprimitive(lhs.type): + literal_length = Integer(len(lhs.value), c_pyssize_t_rprimitive, line) + eq = self.primitive_op(str_eq_literal, [rhs, lhs, literal_length]) + elif isinstance(rhs, LoadLiteral) and is_str_rprimitive(rhs.type): + literal_length = Integer(len(rhs.value), c_pyssize_t_rprimitive, line) + eq = self.primitive_op(str_eq_literal, [lhs, rhs, literal_length]) + else: + eq = self.primitive_op(str_eq, [lhs, rhs], line) return self.add(ComparisonOp(eq, self.false(), ComparisonOp.EQ, line)) # TODO: modify 'str' to use same interface as 'compare_bytes' as it would avoid diff --git a/mypyc/lib-rt/str_ops.c b/mypyc/lib-rt/str_ops.c index 337ef14fc955..01e85002f9f7 100644 --- a/mypyc/lib-rt/str_ops.c +++ b/mypyc/lib-rt/str_ops.c @@ -64,20 +64,33 @@ make_bloom_mask(int kind, const void* ptr, Py_ssize_t len) #undef BLOOM_UPDATE } -// Adapted from CPython 3.13.1 (_PyUnicode_Equal) -char CPyStr_Equal(PyObject *str1, PyObject *str2) { - if (str1 == str2) { - return 1; - } - Py_ssize_t len = PyUnicode_GET_LENGTH(str1); - if (PyUnicode_GET_LENGTH(str2) != len) +static char _CPyStr_Equal_NoIdentCheck(PyObject *str1, PyObject *str2, Py_ssize_t str2_length) { + // This helper function only exists to deduplicate code + Py_ssize_t str1_length = PyUnicode_GET_LENGTH(str1); + if (str1_length != str2_length) return 0; int kind = PyUnicode_KIND(str1); if (PyUnicode_KIND(str2) != kind) return 0; const void *data1 = PyUnicode_DATA(str1); const void *data2 = PyUnicode_DATA(str2); - return memcmp(data1, data2, len * kind) == 0; + return memcmp(data1, data2, str1_length * kind) == 0; +} + +// Adapted from CPython 3.13.1 (_PyUnicode_Equal) +char CPyStr_Equal(PyObject *str1, PyObject *str2) { + if (str1 == str2) { + return 1; + } + Py_ssize_t str2_length = PyUnicode_GET_LENGTH(str2); + return _CPyStr_Equal_NoIdentCheck(str1, str2, str2_length); +} + +char CPyStr_EqualLiteral(PyObject *str1, PyObject *str2, Py_ssize_t literal_length) { + if (str1 == str2) { + return 1; + } + return _CPyStr_Equal_NoIdentCheck(str1, str2, literal_length) } PyObject *CPyStr_GetItem(PyObject *str, CPyTagged index) { diff --git a/mypyc/primitives/str_ops.py b/mypyc/primitives/str_ops.py index a8f4e4df74c2..d39f1f872763 100644 --- a/mypyc/primitives/str_ops.py +++ b/mypyc/primitives/str_ops.py @@ -88,6 +88,14 @@ error_kind=ERR_NEVER, ) +str_eq_literal = custom_primitive_op( + name="str_eq_literal", + c_function_name="CPyStr_EqualLiteral", + arg_types=[str_rprimitive, str_rprimitive, c_pyssize_t_rprimitive], + return_type=bool_rprimitive, + error_kind=ERR_NEVER, +) + unicode_compare = custom_op( arg_types=[str_rprimitive, str_rprimitive], return_type=c_int_rprimitive, From c3c04a57b61780e97c3b881919aaf7fec14f122f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 18 Sep 2025 18:20:41 +0000 Subject: [PATCH 02/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/lib-rt/str_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/lib-rt/str_ops.c b/mypyc/lib-rt/str_ops.c index 01e85002f9f7..fb4e956ad4cf 100644 --- a/mypyc/lib-rt/str_ops.c +++ b/mypyc/lib-rt/str_ops.c @@ -90,7 +90,7 @@ char CPyStr_EqualLiteral(PyObject *str1, PyObject *str2, Py_ssize_t literal_leng if (str1 == str2) { return 1; } - return _CPyStr_Equal_NoIdentCheck(str1, str2, literal_length) + return _CPyStr_Equal_NoIdentCheck(str1, str2, literal_length) } PyObject *CPyStr_GetItem(PyObject *str, CPyTagged index) { From 6528245920f0f9fa3a36ee4295432f95d7f54491 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Thu, 18 Sep 2025 14:36:13 -0400 Subject: [PATCH 03/13] refactor --- mypyc/irbuild/ll_builder.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/mypyc/irbuild/ll_builder.py b/mypyc/irbuild/ll_builder.py index e290b7c91adb..868f8eac8618 100644 --- a/mypyc/irbuild/ll_builder.py +++ b/mypyc/irbuild/ll_builder.py @@ -1551,21 +1551,31 @@ def check_tagged_short_int(self, val: Value, line: int, negated: bool = False) - def compare_strings(self, lhs: Value, rhs: Value, op: str, line: int) -> Value: """Compare two strings""" + + def is_string_literal(value: Value) -> bool: + return isinstance(value, LoadLiteral) and is_str_rprimitive(value.type) + if op == "==": - if isinstance(lhs, LoadLiteral) and is_str_rprimitive(lhs.type): - literal_length = Integer(len(lhs.value), c_pyssize_t_rprimitive, line) + if is_string_literal(lhs): + if is_string_literal(rhs): + # we can optimize out the check entirely in some Final cases + return self.true() if lhs.value == rhs.value else self.false() + literal_length = Integer(len(lhs.value), c_pyssize_t_rprimitive, line) # type: ignore [arg-type] return self.primitive_op(str_eq_literal, [rhs, lhs, literal_length], line) - elif isinstance(rhs, LoadLiteral) and is_str_rprimitive(rhs.type): - literal_length = Integer(len(rhs.value), c_pyssize_t_rprimitive, line) + elif is_string_literal(rhs): + literal_length = Integer(len(rhs.value), c_pyssize_t_rprimitive, line) # type: ignore [arg-type] return self.primitive_op(str_eq_literal, [lhs, rhs, literal_length], line) return self.primitive_op(str_eq, [lhs, rhs], line) elif op == "!=": - if isinstance(lhs, LoadLiteral) and is_str_rprimitive(lhs.type): - literal_length = Integer(len(lhs.value), c_pyssize_t_rprimitive, line) - eq = self.primitive_op(str_eq_literal, [rhs, lhs, literal_length]) - elif isinstance(rhs, LoadLiteral) and is_str_rprimitive(rhs.type): - literal_length = Integer(len(rhs.value), c_pyssize_t_rprimitive, line) - eq = self.primitive_op(str_eq_literal, [lhs, rhs, literal_length]) + if is_string_literal(lhs): + if is_string_literal(rhs): + # we can optimize out the check entirely in some Final cases + return self.true() if lhs.value != rhs.value else self.false() + literal_length = Integer(len(lhs.value), c_pyssize_t_rprimitive, line) # type: ignore [arg-type] + eq = self.primitive_op(str_eq_literal, [rhs, lhs, literal_length], line) + elif is_string_literal(rhs): + literal_length = Integer(len(rhs.value), c_pyssize_t_rprimitive, line) # type: ignore [arg-type] + eq = self.primitive_op(str_eq_literal, [lhs, rhs, literal_length], line) else: eq = self.primitive_op(str_eq, [lhs, rhs], line) return self.add(ComparisonOp(eq, self.false(), ComparisonOp.EQ, line)) From d781dde9471dea619b9e905429f448ea43b76adb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 18 Sep 2025 18:37:40 +0000 Subject: [PATCH 04/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/irbuild/ll_builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mypyc/irbuild/ll_builder.py b/mypyc/irbuild/ll_builder.py index 868f8eac8618..891ca93eb6e3 100644 --- a/mypyc/irbuild/ll_builder.py +++ b/mypyc/irbuild/ll_builder.py @@ -1551,10 +1551,10 @@ def check_tagged_short_int(self, val: Value, line: int, negated: bool = False) - def compare_strings(self, lhs: Value, rhs: Value, op: str, line: int) -> Value: """Compare two strings""" - + def is_string_literal(value: Value) -> bool: return isinstance(value, LoadLiteral) and is_str_rprimitive(value.type) - + if op == "==": if is_string_literal(lhs): if is_string_literal(rhs): From 2580b10f83d735f508c8ee0c9eeb88342ea8e0d5 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Thu, 18 Sep 2025 14:41:02 -0400 Subject: [PATCH 05/13] Update ll_builder.py --- mypyc/irbuild/ll_builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mypyc/irbuild/ll_builder.py b/mypyc/irbuild/ll_builder.py index 891ca93eb6e3..d71c480b961f 100644 --- a/mypyc/irbuild/ll_builder.py +++ b/mypyc/irbuild/ll_builder.py @@ -8,7 +8,7 @@ import sys from collections.abc import Sequence -from typing import Callable, Final, Optional +from typing import Callable, Final, Optional, TypeGuard from mypy.argmap import map_actuals_to_formals from mypy.nodes import ARG_POS, ARG_STAR, ARG_STAR2, ArgKind @@ -1552,7 +1552,7 @@ def check_tagged_short_int(self, val: Value, line: int, negated: bool = False) - def compare_strings(self, lhs: Value, rhs: Value, op: str, line: int) -> Value: """Compare two strings""" - def is_string_literal(value: Value) -> bool: + def is_string_literal(value: Value) -> TypeGuard[LoadLiteral]: return isinstance(value, LoadLiteral) and is_str_rprimitive(value.type) if op == "==": From fea651dc8b9df12cb2d965d5ee6575ee299fb458 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Thu, 18 Sep 2025 14:41:44 -0400 Subject: [PATCH 06/13] Update ll_builder.py --- mypyc/irbuild/ll_builder.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mypyc/irbuild/ll_builder.py b/mypyc/irbuild/ll_builder.py index d71c480b961f..bdd2eeeda515 100644 --- a/mypyc/irbuild/ll_builder.py +++ b/mypyc/irbuild/ll_builder.py @@ -8,7 +8,9 @@ import sys from collections.abc import Sequence -from typing import Callable, Final, Optional, TypeGuard +from typing import Callable, Final, Optional + +from typing_extensions import TypeGuard from mypy.argmap import map_actuals_to_formals from mypy.nodes import ARG_POS, ARG_STAR, ARG_STAR2, ArgKind From 9ed369cefa50af65ccab22b6a41482bf337b321a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 18 Sep 2025 18:45:47 +0000 Subject: [PATCH 07/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/irbuild/ll_builder.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mypyc/irbuild/ll_builder.py b/mypyc/irbuild/ll_builder.py index bdd2eeeda515..0a58369d10b2 100644 --- a/mypyc/irbuild/ll_builder.py +++ b/mypyc/irbuild/ll_builder.py @@ -9,7 +9,6 @@ import sys from collections.abc import Sequence from typing import Callable, Final, Optional - from typing_extensions import TypeGuard from mypy.argmap import map_actuals_to_formals From fb2118795dc0eb019b26cb3162f572024f87f6f1 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Thu, 18 Sep 2025 14:54:57 -0400 Subject: [PATCH 08/13] fix: missing ; --- mypyc/lib-rt/str_ops.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mypyc/lib-rt/str_ops.c b/mypyc/lib-rt/str_ops.c index fb4e956ad4cf..d61e14f96e14 100644 --- a/mypyc/lib-rt/str_ops.c +++ b/mypyc/lib-rt/str_ops.c @@ -65,7 +65,7 @@ make_bloom_mask(int kind, const void* ptr, Py_ssize_t len) } static char _CPyStr_Equal_NoIdentCheck(PyObject *str1, PyObject *str2, Py_ssize_t str2_length) { - // This helper function only exists to deduplicate code + // This helper function only exists to deduplicate code in CPyStr_Equal and CPyStr_EqualLiteral Py_ssize_t str1_length = PyUnicode_GET_LENGTH(str1); if (str1_length != str2_length) return 0; @@ -86,11 +86,11 @@ char CPyStr_Equal(PyObject *str1, PyObject *str2) { return _CPyStr_Equal_NoIdentCheck(str1, str2, str2_length); } -char CPyStr_EqualLiteral(PyObject *str1, PyObject *str2, Py_ssize_t literal_length) { - if (str1 == str2) { +char CPyStr_EqualLiteral(PyObject *str, PyObject *literal_str, Py_ssize_t literal_length) { + if (str == literal_str) { return 1; } - return _CPyStr_Equal_NoIdentCheck(str1, str2, literal_length) + return _CPyStr_Equal_NoIdentCheck(str1, str2, literal_length); } PyObject *CPyStr_GetItem(PyObject *str, CPyTagged index) { From a0d36ec5597bd2f6b7694d4bfccc73ff3ce8083b Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:03:19 -0400 Subject: [PATCH 09/13] fix name err --- mypyc/lib-rt/str_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/lib-rt/str_ops.c b/mypyc/lib-rt/str_ops.c index d61e14f96e14..abc0de5db2d8 100644 --- a/mypyc/lib-rt/str_ops.c +++ b/mypyc/lib-rt/str_ops.c @@ -90,7 +90,7 @@ char CPyStr_EqualLiteral(PyObject *str, PyObject *literal_str, Py_ssize_t litera if (str == literal_str) { return 1; } - return _CPyStr_Equal_NoIdentCheck(str1, str2, literal_length); + return _CPyStr_Equal_NoIdentCheck(str, literal_str, literal_length); } PyObject *CPyStr_GetItem(PyObject *str, CPyTagged index) { From 577cd74ebb7ca9a189f7897251c498c3e55f1b42 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:21:23 -0400 Subject: [PATCH 10/13] Update CPy.h --- mypyc/lib-rt/CPy.h | 1 + 1 file changed, 1 insertion(+) diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index 5dec7509ac7b..d75ae31b6662 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -734,6 +734,7 @@ static inline char CPyDict_CheckSize(PyObject *dict, Py_ssize_t size) { #define BOTHSTRIP 2 char CPyStr_Equal(PyObject *str1, PyObject *str2); +char CPyStr_EqualLiteral(PyObject *str, PyObject *literal_str, Py_ssize_t literal_length); PyObject *CPyStr_Build(Py_ssize_t len, ...); PyObject *CPyStr_GetItem(PyObject *str, CPyTagged index); PyObject *CPyStr_GetItemUnsafe(PyObject *str, Py_ssize_t index); From 9bec5811fa255d36896f03e37dcc03e1b1a79956 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:24:29 -0400 Subject: [PATCH 11/13] Update irbuild-dict.test --- mypyc/test-data/irbuild-dict.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/test-data/irbuild-dict.test b/mypyc/test-data/irbuild-dict.test index e0c014f07813..e7a330951ab0 100644 --- a/mypyc/test-data/irbuild-dict.test +++ b/mypyc/test-data/irbuild-dict.test @@ -410,7 +410,7 @@ L2: k = r8 v = r7 r9 = 'name' - r10 = CPyStr_Equal(k, r9) + r10 = CPyStr_EqualLiteral(k, r9, 4) if r10 goto L3 else goto L4 :: bool L3: name = v From bdee878ae78dc78d7be60e18a84665b0e6d36f94 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:25:09 -0400 Subject: [PATCH 12/13] Update irbuild-unreachable.test --- mypyc/test-data/irbuild-unreachable.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mypyc/test-data/irbuild-unreachable.test b/mypyc/test-data/irbuild-unreachable.test index a4f1ef8c7dba..8eafede66b56 100644 --- a/mypyc/test-data/irbuild-unreachable.test +++ b/mypyc/test-data/irbuild-unreachable.test @@ -20,7 +20,7 @@ L0: r2 = CPyObject_GetAttr(r0, r1) r3 = cast(str, r2) r4 = 'x' - r5 = CPyStr_Equal(r3, r4) + r5 = CPyStr_EqualLiteral(r3, r4, 1) if r5 goto L2 else goto L1 :: bool L1: r6 = r5 @@ -54,7 +54,7 @@ L0: r2 = CPyObject_GetAttr(r0, r1) r3 = cast(str, r2) r4 = 'x' - r5 = CPyStr_Equal(r3, r4) + r5 = CPyStr_EqualLiteral(r3, r4, 1) if r5 goto L2 else goto L1 :: bool L1: r6 = r5 From 613f644e2f6abfeb3eb693053e9c84e66ebf5b74 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 30 Sep 2025 10:35:53 -0400 Subject: [PATCH 13/13] Update irbuild-classes.test --- mypyc/test-data/irbuild-classes.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mypyc/test-data/irbuild-classes.test b/mypyc/test-data/irbuild-classes.test index a98b3a7d3dcf..b2313ccba911 100644 --- a/mypyc/test-data/irbuild-classes.test +++ b/mypyc/test-data/irbuild-classes.test @@ -2302,7 +2302,7 @@ def SetAttr.__setattr__(self, key, val): r12 :: bit L0: r0 = 'regular_attr' - r1 = CPyStr_Equal(key, r0) + r1 = CPyStr_EqualLiteral(key, r0, 12) if r1 goto L1 else goto L2 :: bool L1: r2 = unbox(int, val) @@ -2310,7 +2310,7 @@ L1: goto L6 L2: r4 = 'class_var' - r5 = CPyStr_Equal(key, r4) + r5 = CPyStr_EqualLiteral(key, r4, 9) if r5 goto L3 else goto L4 :: bool L3: r6 = builtins :: module