Skip to content

Commit

Permalink
add prefix for tts corpus process
Browse files Browse the repository at this point in the history
  • Loading branch information
JackTemaki committed Dec 6, 2024
1 parent 3136606 commit 1454d51
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions users/rossenbach/setups/tts/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def extend_lexicon_with_blank(lexicon: tk.Path) -> tk.Path:
return lexicon


def process_corpus_text_with_extended_lexicon(bliss_corpus: tk.Path, lexicon: tk.Path) -> tk.Path:
def process_corpus_text_with_extended_lexicon(bliss_corpus: tk.Path, lexicon: tk.Path, prefix=None) -> tk.Path:
"""
Apply the lexicon to a corpus file, and insert [start], [end] and [space] tokens.
Expand All @@ -86,5 +86,9 @@ def process_corpus_text_with_extended_lexicon(bliss_corpus: tk.Path, lexicon: tk
add_end_command]).out
processed_bliss_corpus = CorpusReplaceOrthFromTxtJob(bliss_corpus, tokenized_text).out_corpus

converted_bliss_corpus = ApplyLexiconToCorpusJob(processed_bliss_corpus, lexicon, word_separation_orth="[space]").out_corpus
converted_bliss_corpus_job = ApplyLexiconToCorpusJob(processed_bliss_corpus, lexicon, word_separation_orth="[space]")
converted_bliss_corpus = converted_bliss_corpus_job.out_corpus
if prefix is not None:
converted_bliss_corpus_job.add_alias(prefix + "/apply_lexicon")

return converted_bliss_corpus

0 comments on commit 1454d51

Please sign in to comment.