Skip to content

Commit

Permalink
fix(tests): silence all remaining noisy test suites
Browse files Browse the repository at this point in the history
  • Loading branch information
joanise committed Nov 7, 2024
1 parent 99da050 commit 806f50c
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 76 deletions.
31 changes: 18 additions & 13 deletions test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
"""

import os
from contextlib import redirect_stderr
from io import StringIO
from unittest import main

import click
Expand All @@ -23,13 +25,14 @@ def test_call_align(self):
# API accepts them too.
langs = ("fra",) # make sure language can be an iterable, not just a list.
with SoundSwallowerStub("t0b0d0p0s0w0:920:1520", "t0b0d0p0s1w0:1620:1690"):
(status, exception, log) = api.align(
self.data_dir / "ej-fra.txt",
self.data_dir / "ej-fra.m4a",
self.tempdir / "output",
langs,
output_formats=["html", "TextGrid", "srt"],
)
with redirect_stderr(StringIO()):
(status, exception, log) = api.align(
self.data_dir / "ej-fra.txt",
self.data_dir / "ej-fra.m4a",
self.tempdir / "output",
langs,
output_formats=["html", "TextGrid", "srt"],
)
self.assertEqual(status, 0)
self.assertTrue(exception is None)
self.assertIn("Words (<w>) not present; tokenizing", log)
Expand All @@ -53,16 +56,18 @@ def test_call_align(self):
"Make sure the API call doesn't not modify my variables",
)

(status, exception, log) = api.align("", "", self.tempdir / "errors")
with redirect_stderr(StringIO()):
(status, exception, log) = api.align("", "", self.tempdir / "errors")
self.assertNotEqual(status, 0)
self.assertFalse(exception is None)

def test_call_make_xml(self):
(status, exception, log) = api.make_xml(
self.data_dir / "ej-fra.txt",
self.tempdir / "prepared.readalong",
("fra", "eng"),
)
with redirect_stderr(StringIO()):
(status, exception, log) = api.make_xml(
self.data_dir / "ej-fra.txt",
self.tempdir / "prepared.readalong",
("fra", "eng"),
)
self.assertEqual(status, 0)
self.assertTrue(exception is None)
self.assertIn("Wrote ", log)
Expand Down
3 changes: 0 additions & 3 deletions test/test_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ def align(self, input_text_path, input_audio_path, output_path, flags):
input_audio_path,
output_path,
] + flags
LOGGER.info(
f"Aligning {input_text_path} and {input_audio_path}, outputting to {output_path}"
)
return run(args, capture_output=True, check=False, encoding="utf-8")

def test_mute_section(self):
Expand Down
17 changes: 12 additions & 5 deletions test/test_dna_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

"""Test handling of DNA text in tokenization"""

from contextlib import redirect_stderr
from io import StringIO
from unittest import main

from basic_test_case import BasicTestCase
Expand All @@ -23,7 +25,8 @@ def test_tok_all_words(self):
<s>Voici une deuxième phrase.</s>
</document>"""
xml = parse_xml(txt)
tokenized = tokenize_xml.tokenize_xml(xml)
with redirect_stderr(StringIO()):
tokenized = tokenize_xml.tokenize_xml(xml)
as_txt = etree.tounicode(tokenized)
# print(etree.tounicode(tokenized))

Expand Down Expand Up @@ -54,7 +57,8 @@ def test_tok_some_words(self):
<s>Un <foo do-not-align="1">mot ou deux</foo> à exclure.</s>
</document>"""
xml = parse_xml(txt)
tokenized = tokenize_xml.tokenize_xml(xml)
with redirect_stderr(StringIO()):
tokenized = tokenize_xml.tokenize_xml(xml)
as_txt = etree.tounicode(tokenized)
# print('as_txt="' + as_txt +'"')

Expand Down Expand Up @@ -96,7 +100,8 @@ def test_tok_div_p_s(self):
</div>
</document>"""
xml = parse_xml(txt)
tokenized = tokenize_xml.tokenize_xml(xml)
with redirect_stderr(StringIO()):
tokenized = tokenize_xml.tokenize_xml(xml)
as_txt = etree.tounicode(tokenized)
# print('as_txt="' + as_txt +'"')

Expand Down Expand Up @@ -143,15 +148,17 @@ def test_dna_word(self):

txt = """<s xml:lang="fra">Une <w do-not-align="true">exclude</w> phrase.</s>"""
xml = parse_xml(txt)
tokenized = tokenize_xml.tokenize_xml(xml)
with redirect_stderr(StringIO()):
tokenized = tokenize_xml.tokenize_xml(xml)
self.assertRaises(RuntimeError, add_ids, tokenized)

def test_dna_word_nested(self):
"""You also can't have a <w> element inside a DNA element"""

txt = """<s xml:lang="fra">Une <foo do-not-align="true"><bar><w>exclude</w></bar></foo> phrase.</s>"""
xml = parse_xml(txt)
tokenized = tokenize_xml.tokenize_xml(xml)
with redirect_stderr(StringIO()):
tokenized = tokenize_xml.tokenize_xml(xml)
self.assertRaises(RuntimeError, add_ids, tokenized)


Expand Down
17 changes: 11 additions & 6 deletions test/test_g2p_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import os
import re
from contextlib import redirect_stderr
from io import StringIO
from unittest import main

from basic_test_case import BasicTestCase
Expand Down Expand Up @@ -303,8 +305,9 @@ def test_align_with_invalid_preg2p(self):
self.assertIn('<w ARPABET="NOT ARPABET" id="s0w2">error</w>', results.output)

audio_file = os.path.join(self.data_dir, "ej-fra.m4a")
with self.assertRaises(RuntimeError) as e:
results = align_audio(input_file, audio_file)
with redirect_stderr(StringIO()):
with self.assertRaises(RuntimeError) as e:
results = align_audio(input_file, audio_file)
self.assertIn("could not be g2p'd", str(e.exception))

def test_align_with_preg2p(self):
Expand All @@ -330,9 +333,10 @@ def test_align_with_preg2p(self):
"t0b0d0p0s3w2:15:16",
"t0b0d0p0s3w3:16:17",
):
_ = align_audio(
text_file, audio_file, save_temps=os.path.join(self.tempdir, "foo")
)
with redirect_stderr(StringIO()):
_ = align_audio(
text_file, audio_file, save_temps=os.path.join(self.tempdir, "foo")
)
with open(os.path.join(self.tempdir, "foo.dict"), "r", encoding="utf8") as f:
dict_file = f.read()
self.assertIn("S AH S IY", dict_file) # "ceci" in fra
Expand Down Expand Up @@ -452,7 +456,8 @@ def test_convert_xml_invalid(self):
self.assertTrue(valid, "convert_xml with valid pre-g2p'd text")

xml = parse_xml('<s><w ARPABET="invalid">invalid</w></s>')
c_xml, valid = convert_xml(xml)
with redirect_stderr(StringIO()):
c_xml, valid = convert_xml(xml)
self.assertEqual(
etree.tounicode(c_xml), '<s><w ARPABET="invalid">invalid</w></s>'
)
Expand Down
14 changes: 10 additions & 4 deletions test/test_tokenize_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

"""Unit test suite for our XML tokenizer module"""

from contextlib import redirect_stderr
from io import StringIO
from unittest import TestCase, main

from lxml import etree
Expand All @@ -23,7 +25,8 @@ def test_simple(self):
<s xml:lang="atj"><w>Kwei</w>! <w>Tan</w> <w>e</w> <w>ici</w> <w>matisihin</w>?</s>
</document>"""
xml = parse_xml(txt)
tokenized = tokenize_xml.tokenize_xml(xml)
with redirect_stderr(StringIO()):
tokenized = tokenize_xml.tokenize_xml(xml)
# print(etree.tounicode(tokenized))
self.assertEqual(etree.tounicode(tokenized), ref)

Expand All @@ -39,7 +42,8 @@ def test_mixed_lang(self):
<s xml:lang="fra"><w>Bonjour</w>! <w>Comment</w> <w>ça</w> <w>va</w>?</s>
</document>"""
xml = parse_xml(txt)
tokenized = tokenize_xml.tokenize_xml(xml)
with redirect_stderr(StringIO()):
tokenized = tokenize_xml.tokenize_xml(xml)
# print(etree.tounicode(tokenized))
self.assertEqual(etree.tounicode(tokenized), ref)

Expand All @@ -55,7 +59,8 @@ def test_mixed_words(self):
<s xml:lang="atj">Tan e ici matisihin?</s>
</document>"""
xml = parse_xml(txt)
tokenized = tokenize_xml.tokenize_xml(xml)
with redirect_stderr(StringIO()):
tokenized = tokenize_xml.tokenize_xml(xml)
# print(etree.tounicode(tokenized))
self.assertEqual(etree.tounicode(tokenized), ref)

Expand All @@ -73,7 +78,8 @@ def test_comments(self):
<s xml:lang="atj"><w>Tan</w> <w>e</w> <w>ici</w> <w>matisihin</w>?</s>
</document>"""
xml = parse_xml(txt)
tokenized = tokenize_xml.tokenize_xml(xml)
with redirect_stderr(StringIO()):
tokenized = tokenize_xml.tokenize_xml(xml)
# print(etree.tounicode(tokenized))
self.assertEqual(etree.tounicode(tokenized), ref)

Expand Down
Loading

0 comments on commit 806f50c

Please sign in to comment.