Skip to content

Commit

Permalink
More timedeltas (dmarx#89)
Browse files Browse the repository at this point in the history
  • Loading branch information
dmarx authored Oct 9, 2022
1 parent 6bf6673 commit f52fb8f
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 9 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.5
0.1.6
3 changes: 1 addition & 2 deletions vktrs/asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
pip install git+https://github.com/openai/whisper
"""

import datetime as dt
import time

import tokenizations
from vktrs.utils import remove_punctuation
import whisper


def whisper_transcribe(
audio_fpath="audio.mp3",
):
Expand Down Expand Up @@ -175,7 +175,6 @@ def whisper_segment_transcription(
return prompt_starts



def whisper_lyrics(audio_fpath="audio.mp3"):
whispers = whisper_transcribe(audio_fpath)
tiny2large, large2tiny, whispers_tokens = whisper_align(whispers)
Expand Down
11 changes: 5 additions & 6 deletions vktrs/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def parse_timestamp(ts):
minutes=t.minute,
seconds=t.second,
microseconds=t.microsecond,
)
).total_seconds()

def vtt_to_token_timestamps(captions):

Expand All @@ -76,9 +76,9 @@ def vtt_to_token_timestamps(captions):
token_start_times = []
for line in all_word_starts_raw:
starts_ = [
{'ts':hit[1],
{'ts_str':hit[1],
'tok':hit[3].strip(),
'td':parse_timestamp(hit[1])
'ts':parse_timestamp(hit[1])
}
for hit in re.findall(pat, line)]
token_start_times.extend(starts_)
Expand All @@ -89,10 +89,9 @@ def srv2_to_token_timestamps(srv2_xml):
srv2_soup = BeautifulSoup(srv2_xml, 'xml')
return [
{
'ts':e['t'],
'ts_str':e['t'],
'tok':e.text,
#'td':dt.timedelta(microseconds=int(e['t']))
'td':dt.timedelta(milliseconds=int(e['t']))
'ts':dt.timedelta(milliseconds=int(e['t'])).total_seconds()
}
for e in srv2_soup.find_all('text')
if e.text.strip()
Expand Down

0 comments on commit f52fb8f

Please sign in to comment.