From 4652bf541b7cb50bc5d3a33c86b0f66681c81da7 Mon Sep 17 00:00:00 2001 From: Pierre-Anthony Lemieux Date: Mon, 10 Feb 2025 10:28:21 -0800 Subject: [PATCH] Fix empty line check --- src/main/python/ttconv/vtt/reader.py | 6 ++---- src/test/python/test_vtt_reader.py | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/main/python/ttconv/vtt/reader.py b/src/main/python/ttconv/vtt/reader.py index ad0728cd..86e1e54d 100644 --- a/src/main/python/ttconv/vtt/reader.py +++ b/src/main/python/ttconv/vtt/reader.py @@ -191,7 +191,7 @@ def _make_span(self, parent: model.ContentElement) -> model.Span: return span -_EMPTY_RE = re.compile(r"\s+") +_EMPTY_RE = re.compile(r"[\n\r]*") _DEFAULT_FONT_STACK = (styles.GenericFontFamilyType.sansSerif,) _DEFAULT_FONT_SIZE = styles.LengthType(15 * 5, styles.LengthType.Units.pct) # 5vh for ttp:cellResolution="32 15" _DEFAULT_TEXT_COLOR = styles.NamedColors.white.value @@ -451,7 +451,6 @@ class _State(Enum): state = _State.START current_p = None - subtitle_text = None for line_index, line in enumerate(_none_terminated(lines)): @@ -519,7 +518,7 @@ class _State(Enum): current_p.set_region(_get_or_make_region(doc, cue_params[3:])) state = _State.TEXT - + subtitle_text = None continue if state in (_State.TEXT, _State.TEXT_MORE): @@ -535,7 +534,6 @@ class _State(Enum): LOGGER.warning("Ignoring cue due to a spurious blank line at line %s", line_index) state = _State.LOOKING - subtitle_text = None continue if state is _State.TEXT: diff --git a/src/test/python/test_vtt_reader.py b/src/test/python/test_vtt_reader.py index 677a3a72..013932c8 100644 --- a/src/test/python/test_vtt_reader.py +++ b/src/test/python/test_vtt_reader.py @@ -119,6 +119,28 @@ def test_malformed_blank_lines(self): div = list(body[0]) self.assertEqual(len(div), 1) + def test_single_line_with_space(self): + # from https://github.com/sandflow/ttconv/issues/439 + # the first cue is not ignored since the first line contains a single space + SAMPLE = """WEBVTT +Kind: captions +Language: en + +00:00:00.799 --> 00:00:02.869 align:start position:0% + +hi<00:00:01.040> everyone<00:00:01.920> today<00:00:02.240> we're<00:00:02.399> going<00:00:02.639> to<00:00:02.720> be + +00:00:02.869 --> 00:00:02.879 align:start position:0% +hi everyone today we're going to be +""" + + doc = to_model(io.StringIO(SAMPLE)) + self.assertIsNotNone(doc) + body = list(doc.get_body()) + self.assertEqual(len(body), 1) + div = list(body[0]) + self.assertEqual(len(div), 2) + def test_italic(self): f = io.StringIO(r"""WEBVTT