From 68e1b6386db241d49f7713dae4ba3b59c0d30ba6 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Tue, 22 Oct 2024 15:37:12 +0100 Subject: [PATCH 1/7] fix regex --- CHANGELOG.md | 7 +++++++ rich/cells.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3e1a20c1..8e4a24bbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [13.9.3] - 2024-10-22 + +### Fixed + +- Fixed broken regex that may have resulted in poor performance. + ## [13.9.2] - 2024-10-04 ### Fixed diff --git a/rich/cells.py b/rich/cells.py index f85f928f7..93ecf6abc 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -7,7 +7,7 @@ from ._cell_widths import CELL_WIDTHS # Regex to match sequence of the most common character ranges -_is_single_cell_widths = re.compile("^[\u0020-\u006f\u00a0\u02ff\u0370-\u0482]*$").match +_is_single_cell_widths = re.compile("^[\u0020-\u007f\u00a0\u02ff\u0370-\u0482]*$").match @lru_cache(4096) From db2e3e89307d30e4096686083f03ef5689572bdf Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Tue, 22 Oct 2024 15:43:33 +0100 Subject: [PATCH 2/7] assert cut --- rich/segment.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rich/segment.py b/rich/segment.py index 0f0676876..edcb52dd3 100644 --- a/rich/segment.py +++ b/rich/segment.py @@ -161,10 +161,14 @@ def split_cells(self, cut: int) -> Tuple["Segment", "Segment"]: If the cut point falls in the middle of a 2-cell wide character then it is replaced by two spaces, to preserve the display width of the parent segment. + Args: + cut (int): Offset within the segment to cut. + Returns: Tuple[Segment, Segment]: Two segments. """ text, style, control = self + assert cut >= 0 if _is_single_cell_widths(text): # Fast path with all 1 cell characters From ad6b8865ff88bcaddb42573befd32ce1c0ae170d Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Tue, 22 Oct 2024 15:45:57 +0100 Subject: [PATCH 3/7] version bump --- CHANGELOG.md | 1 + pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e4a24bbf..182d6c591 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2104,6 +2104,7 @@ Major version bump for a breaking change to `Text.stylize signature`, which corr - First official release, API still to be stabilized +[13.9.3]: https://github.com/textualize/rich/compare/v13.9.2...v13.9.3 [13.9.2]: https://github.com/textualize/rich/compare/v13.9.1...v13.9.2 [13.9.1]: https://github.com/textualize/rich/compare/v13.9.0...v13.9.1 [13.9.0]: https://github.com/textualize/rich/compare/v13.8.1...v13.9.0 diff --git a/pyproject.toml b/pyproject.toml index e1adb8e62..701232853 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "rich" homepage = "https://github.com/Textualize/rich" documentation = "https://rich.readthedocs.io/en/latest/" -version = "13.9.2" +version = "13.9.3" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" authors = ["Will McGugan "] license = "MIT" From be42f1b082477f1ec4bdf5d05da7145c78418cda Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Tue, 22 Oct 2024 16:10:04 +0100 Subject: [PATCH 4/7] test and added box drawing characters --- rich/cells.py | 4 +++- tests/test_segment.py | 28 +++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/rich/cells.py b/rich/cells.py index 93ecf6abc..af8e81c7b 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -7,7 +7,9 @@ from ._cell_widths import CELL_WIDTHS # Regex to match sequence of the most common character ranges -_is_single_cell_widths = re.compile("^[\u0020-\u007f\u00a0\u02ff\u0370-\u0482]*$").match +_is_single_cell_widths = re.compile( + "^[\u0020-\u007f\u00a0\u02ff\u0370-\u0482\u2500-\u25FF]*$" +).match @lru_cache(4096) diff --git a/tests/test_segment.py b/tests/test_segment.py index 2264dbe50..a9e649410 100644 --- a/tests/test_segment.py +++ b/tests/test_segment.py @@ -1,9 +1,16 @@ +import string from io import StringIO import pytest from rich.cells import cell_len -from rich.segment import ControlType, Segment, SegmentLines, Segments +from rich.segment import ( + ControlType, + Segment, + SegmentLines, + Segments, + _is_single_cell_widths, +) from rich.style import Style @@ -378,3 +385,22 @@ def test_align_bottom(): [Segment(" ", Style())], [Segment("X")], ] + + +def test_is_single_cell_widths() -> None: + # Check _is_single_cell_widths reports correctly + for character in string.printable: + if ord(character) >= 32: + assert _is_single_cell_widths(character) + + BOX = "┌─┬┐│ ││├─┼┤│ ││├─┼┤├─┼┤│ ││└─┴┘" + + for character in BOX: + print(repr(character)) + assert _is_single_cell_widths(character) + + for character in "💩": + assert not _is_single_cell_widths(character) + + for character in "わさび": + assert not _is_single_cell_widths(character) From b93d3b6d98547b88320a5c82c362d5825bcdca4b Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Tue, 22 Oct 2024 16:13:14 +0100 Subject: [PATCH 5/7] test single cell widths --- rich/cells.py | 2 +- tests/test_cells.py | 23 ++++++++++++++++++++++- tests/test_segment.py | 28 +--------------------------- 3 files changed, 24 insertions(+), 29 deletions(-) diff --git a/rich/cells.py b/rich/cells.py index af8e81c7b..4debea0ca 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -8,7 +8,7 @@ # Regex to match sequence of the most common character ranges _is_single_cell_widths = re.compile( - "^[\u0020-\u007f\u00a0\u02ff\u0370-\u0482\u2500-\u25FF]*$" + "^[\u0020-\u007e\u00a0\u02ff\u0370-\u0482\u2500-\u25FF]*$" ).match diff --git a/tests/test_cells.py b/tests/test_cells.py index fe451fa57..d694fb379 100644 --- a/tests/test_cells.py +++ b/tests/test_cells.py @@ -1,5 +1,7 @@ +import string + from rich import cells -from rich.cells import chop_cells +from rich.cells import _is_single_cell_widths, chop_cells def test_cell_len_long_string(): @@ -59,3 +61,22 @@ def test_chop_cells_mixed_width(): """Mixed single and double-width characters.""" text = "あ1り234が5と6う78" assert chop_cells(text, 3) == ["あ1", "り2", "34", "が5", "と6", "う7", "8"] + + +def test_is_single_cell_widths() -> None: + # Check _is_single_cell_widths reports correctly + for character in string.printable: + if ord(character) >= 32: + assert _is_single_cell_widths(character) + + BOX = "┌─┬┐│ ││├─┼┤│ ││├─┼┤├─┼┤│ ││└─┴┘" + + for character in BOX: + print(repr(character)) + assert _is_single_cell_widths(character) + + for character in "💩": + assert not _is_single_cell_widths(character) + + for character in "わさび": + assert not _is_single_cell_widths(character) diff --git a/tests/test_segment.py b/tests/test_segment.py index a9e649410..2264dbe50 100644 --- a/tests/test_segment.py +++ b/tests/test_segment.py @@ -1,16 +1,9 @@ -import string from io import StringIO import pytest from rich.cells import cell_len -from rich.segment import ( - ControlType, - Segment, - SegmentLines, - Segments, - _is_single_cell_widths, -) +from rich.segment import ControlType, Segment, SegmentLines, Segments from rich.style import Style @@ -385,22 +378,3 @@ def test_align_bottom(): [Segment(" ", Style())], [Segment("X")], ] - - -def test_is_single_cell_widths() -> None: - # Check _is_single_cell_widths reports correctly - for character in string.printable: - if ord(character) >= 32: - assert _is_single_cell_widths(character) - - BOX = "┌─┬┐│ ││├─┼┤│ ││├─┼┤├─┼┤│ ││└─┴┘" - - for character in BOX: - print(repr(character)) - assert _is_single_cell_widths(character) - - for character in "💩": - assert not _is_single_cell_widths(character) - - for character in "わさび": - assert not _is_single_cell_widths(character) From 04db8c2946373fdd3249cb2a6889481a43305aed Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Tue, 22 Oct 2024 16:23:41 +0100 Subject: [PATCH 6/7] update regex --- rich/cells.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rich/cells.py b/rich/cells.py index 4debea0ca..17793eb5e 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -8,7 +8,7 @@ # Regex to match sequence of the most common character ranges _is_single_cell_widths = re.compile( - "^[\u0020-\u007e\u00a0\u02ff\u0370-\u0482\u2500-\u25FF]*$" + "^[\u0020-\u007e\u00a0-\u02ff\u0370-\u0482\u2500-\u25FF]*$" ).match From 60f3b615a706949f6ae9734eeaea519573af4522 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Tue, 22 Oct 2024 16:29:46 +0100 Subject: [PATCH 7/7] changelog --- CHANGELOG.md | 2 +- tests/test_cells.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 182d6c591..a2a537279 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -- Fixed broken regex that may have resulted in poor performance. +- Fixed broken regex that may have resulted in poor performance. https://github.com/Textualize/rich/pull/3535 ## [13.9.2] - 2024-10-04 diff --git a/tests/test_cells.py b/tests/test_cells.py index d694fb379..8ba8169e3 100644 --- a/tests/test_cells.py +++ b/tests/test_cells.py @@ -72,10 +72,9 @@ def test_is_single_cell_widths() -> None: BOX = "┌─┬┐│ ││├─┼┤│ ││├─┼┤├─┼┤│ ││└─┴┘" for character in BOX: - print(repr(character)) assert _is_single_cell_widths(character) - for character in "💩": + for character in "💩😽": assert not _is_single_cell_widths(character) for character in "わさび":