Skip to content

Commit

Permalink
Fix empty line check
Browse files Browse the repository at this point in the history
  • Loading branch information
palemieux committed Feb 10, 2025
1 parent ff75c94 commit 4652bf5
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
6 changes: 2 additions & 4 deletions src/main/python/ttconv/vtt/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def _make_span(self, parent: model.ContentElement) -> model.Span:
return span


_EMPTY_RE = re.compile(r"\s+")
_EMPTY_RE = re.compile(r"[\n\r]*")
_DEFAULT_FONT_STACK = (styles.GenericFontFamilyType.sansSerif,)
_DEFAULT_FONT_SIZE = styles.LengthType(15 * 5, styles.LengthType.Units.pct) # 5vh for ttp:cellResolution="32 15"
_DEFAULT_TEXT_COLOR = styles.NamedColors.white.value
Expand Down Expand Up @@ -451,7 +451,6 @@ class _State(Enum):

state = _State.START
current_p = None
subtitle_text = None

for line_index, line in enumerate(_none_terminated(lines)):

Expand Down Expand Up @@ -519,7 +518,7 @@ class _State(Enum):
current_p.set_region(_get_or_make_region(doc, cue_params[3:]))

state = _State.TEXT

subtitle_text = None
continue

if state in (_State.TEXT, _State.TEXT_MORE):
Expand All @@ -535,7 +534,6 @@ class _State(Enum):
LOGGER.warning("Ignoring cue due to a spurious blank line at line %s", line_index)

state = _State.LOOKING
subtitle_text = None
continue

if state is _State.TEXT:
Expand Down
22 changes: 22 additions & 0 deletions src/test/python/test_vtt_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,28 @@ def test_malformed_blank_lines(self):
div = list(body[0])
self.assertEqual(len(div), 1)

def test_single_line_with_space(self):
# from https://github.com/sandflow/ttconv/issues/439
# the first cue is not ignored since the first line contains a single space
SAMPLE = """WEBVTT
Kind: captions
Language: en
00:00:00.799 --> 00:00:02.869 align:start position:0%
hi<00:00:01.040><c> everyone</c><00:00:01.920><c> today</c><00:00:02.240><c> we're</c><00:00:02.399><c> going</c><00:00:02.639><c> to</c><00:00:02.720><c> be</c>
00:00:02.869 --> 00:00:02.879 align:start position:0%
hi everyone today we're going to be
"""

doc = to_model(io.StringIO(SAMPLE))
self.assertIsNotNone(doc)
body = list(doc.get_body())
self.assertEqual(len(body), 1)
div = list(body[0])
self.assertEqual(len(div), 2)

def test_italic(self):
f = io.StringIO(r"""WEBVTT
Expand Down

0 comments on commit 4652bf5

Please sign in to comment.