From f52fb8f0e09607c9c9d913c33706e7885758b98b Mon Sep 17 00:00:00 2001 From: David Marx Date: Sat, 8 Oct 2022 17:28:16 -0700 Subject: [PATCH] More timedeltas (#89) --- VERSION | 2 +- vktrs/asr.py | 3 +-- vktrs/youtube.py | 11 +++++------ 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/VERSION b/VERSION index 9faa1b7..c946ee6 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.1.5 +0.1.6 diff --git a/vktrs/asr.py b/vktrs/asr.py index 112e8d3..429168c 100644 --- a/vktrs/asr.py +++ b/vktrs/asr.py @@ -5,13 +5,13 @@ pip install git+https://github.com/openai/whisper """ -import datetime as dt import time import tokenizations from vktrs.utils import remove_punctuation import whisper + def whisper_transcribe( audio_fpath="audio.mp3", ): @@ -175,7 +175,6 @@ def whisper_segment_transcription( return prompt_starts - def whisper_lyrics(audio_fpath="audio.mp3"): whispers = whisper_transcribe(audio_fpath) tiny2large, large2tiny, whispers_tokens = whisper_align(whispers) diff --git a/vktrs/youtube.py b/vktrs/youtube.py index 4b00482..64dc0b8 100644 --- a/vktrs/youtube.py +++ b/vktrs/youtube.py @@ -53,7 +53,7 @@ def parse_timestamp(ts): minutes=t.minute, seconds=t.second, microseconds=t.microsecond, - ) + ).total_seconds() def vtt_to_token_timestamps(captions): @@ -76,9 +76,9 @@ def vtt_to_token_timestamps(captions): token_start_times = [] for line in all_word_starts_raw: starts_ = [ - {'ts':hit[1], + {'ts_str':hit[1], 'tok':hit[3].strip(), - 'td':parse_timestamp(hit[1]) + 'ts':parse_timestamp(hit[1]) } for hit in re.findall(pat, line)] token_start_times.extend(starts_) @@ -89,10 +89,9 @@ def srv2_to_token_timestamps(srv2_xml): srv2_soup = BeautifulSoup(srv2_xml, 'xml') return [ { - 'ts':e['t'], + 'ts_str':e['t'], 'tok':e.text, - #'td':dt.timedelta(microseconds=int(e['t'])) - 'td':dt.timedelta(milliseconds=int(e['t'])) + 'ts':dt.timedelta(milliseconds=int(e['t'])).total_seconds() } for e in srv2_soup.find_all('text') if e.text.strip()