From 6bf66739968081140210835823f156c0ff91615b Mon Sep 17 00:00:00 2001 From: David Marx Date: Sat, 8 Oct 2022 17:03:36 -0700 Subject: [PATCH] Fewer timedeltas (#88) * drop unnecessary timedelta stuff * sigh * dealt with td's * handle tds from asr * sigh --- Video_Killed_The_Radio_Star_Defusion.ipynb | 49 ++++------------------ vktrs/asr.py | 8 +--- 2 files changed, 9 insertions(+), 48 deletions(-) diff --git a/Video_Killed_The_Radio_Star_Defusion.ipynb b/Video_Killed_The_Radio_Star_Defusion.ipynb index e4e13b0..6f614db 100644 --- a/Video_Killed_The_Radio_Star_Defusion.ipynb +++ b/Video_Killed_The_Radio_Star_Defusion.ipynb @@ -537,12 +537,13 @@ " ### checkpoint the processing work we've done to this point\n", "\n", " prompt_starts_copy = copy.deepcopy(prompt_starts)\n", - "\n", + " \n", + " # to do: deal with timedeltas in asr.py and yt.py\n", " for rec in prompt_starts_copy:\n", " for k,v in list(rec.items()):\n", " if isinstance(v, dt.timedelta):\n", " rec[k] = v.total_seconds()\n", - "\n", + " \n", " storyboard.prompt_starts = prompt_starts_copy\n", "\n", " with open(storyboard_fname) as fp:\n", @@ -567,7 +568,7 @@ ")\n", "\n", "if 'td' in df:\n", - " del df['td']\n", + " del df['td']\n", "\n", "df_pre = copy.deepcopy(df)\n", "pn.widgets.Tabulator(df, formatters=tabulator_formatters)\n" @@ -635,7 +636,6 @@ " df_pre = copy.deepcopy(df)\n", " for i, rec in enumerate(prompt_starts):\n", " rec['ts'] = float(df.loc[i,'Timestamp (sec)'])\n", - " rec['td'] = rec['ts']\n", " rec['prompt'] = df.loc[i,'Lyric']\n", " \n", " # ...actually, I think the above code might not do anything\n", @@ -802,13 +802,6 @@ "##############\n", "\n", "prompt_starts_copy = copy.deepcopy(prompt_starts)\n", - "\n", - "# deal with timedeltas\n", - "for rec in prompt_starts_copy:\n", - " for k,v in list(rec.items()):\n", - " if isinstance(v, dt.timedelta):\n", - " rec[k] = v.total_seconds()\n", - "\n", "storyboard.prompt_starts = prompt_starts_copy\n", "\n", "with open(storyboard_fname) as fp:\n", @@ -881,12 +874,6 @@ " ##########################\n", " prompt_starts_copy = copy.deepcopy(prompt_starts)\n", "\n", - " # to do: this should be rendered unnecessary before this branch is merged\n", - " for rec in prompt_starts_copy:\n", - " for k,v in list(rec.items()):\n", - " if isinstance(v, dt.timedelta):\n", - " rec[k] = v.total_seconds()\n", - "\n", " storyboard.prompt_starts = prompt_starts_copy\n", "\n", " with open(storyboard_fname) as fp:\n", @@ -903,29 +890,26 @@ "\n", "# to do: \n", "# * make this more portable and add to vktrs lib\n", - "# * don't write timedelta objects into the prompt_starts... yeesh\n", "\n", "fps = 12 # @param {type:'integer'}\n", "storyboard.params.fps = fps\n", "\n", - "ifps = dt.timedelta(seconds=1/fps)\n", + "ifps = 1/fps\n", "\n", "# estimate video end\n", "video_duration = storyboard.params['video_duration']\n", "\n", "# dummy prompt for last scene duration\n", "prompt_starts = OmegaConf.to_container(storyboard.prompt_starts)\n", - "for rec in prompt_starts:\n", - " rec['td'] = dt.timedelta(seconds=rec['td'])\n", - "prompt_starts.append({'td':dt.timedelta(seconds=video_duration)})\n", + "prompt_starts.append({'ts':video_duration})\n", "\n", "# make sure we respect the duration of the previous phrase\n", - "frame_start=dt.timedelta(seconds=0)\n", + "frame_start=0\n", "prompt_starts[0]['anim_start']=frame_start\n", "for i, rec in enumerate(prompt_starts[1:], start=1):\n", " rec_prev = prompt_starts[i-1]\n", " k=0\n", - " while (rec_prev['anim_start'] + k*ifps) < rec['td']:\n", + " while (rec_prev['anim_start'] + k*ifps) < rec['ts']:\n", " k+=1\n", " k-=1\n", " rec_prev['frames'] = k\n", @@ -933,12 +917,6 @@ " frame_start+=k*ifps\n", " rec['anim_start']=frame_start\n", "\n", - "# make sure we respect the duration of the previous phrase\n", - "# to do: push end time into a timedelta and consider it... somewhere near here\n", - "for i, rec1 in enumerate(prompt_starts):\n", - " rec0 = prompt_starts[i-1]\n", - " rec0['duration'] = rec1['td'] - rec0['td']\n", - "\n", "# drop the dummy frame\n", "prompt_starts = prompt_starts[:-1]\n", "\n", @@ -952,11 +930,6 @@ "\n", "prompt_starts_copy = copy.deepcopy(prompt_starts)\n", "\n", - "for rec in prompt_starts_copy:\n", - " for k,v in list(rec.items()):\n", - " if isinstance(v, dt.timedelta):\n", - " rec[k] = v.total_seconds()\n", - "\n", "storyboard.prompt_starts = prompt_starts_copy\n", "\n", "with open(storyboard_fname) as fp:\n", @@ -1034,12 +1007,6 @@ "\n", "prompt_starts_copy = copy.deepcopy(prompt_starts)\n", "\n", - "# to do: this should be rendered unnecessary before this branch is merged\n", - "for rec in prompt_starts_copy:\n", - " for k,v in list(rec.items()):\n", - " if isinstance(v, dt.timedelta):\n", - " rec[k] = v.total_seconds()\n", - "\n", "storyboard.prompt_starts = prompt_starts_copy\n", "\n", "# to do: deal with these td objects\n", diff --git a/vktrs/asr.py b/vktrs/asr.py index 2eeefa4..112e8d3 100644 --- a/vktrs/asr.py +++ b/vktrs/asr.py @@ -119,17 +119,14 @@ def whisper_segment_transcription( apply whisper-tiny segmentations to whisper-large transcriptions """ - token_large_phrase_segmentations = [] start_prev = 0 end_prev=0 current_phrase = [] for rec in token_large_index_segmentations.values(): - #print(current_phrase) - #print(start_prev, end_prev, rec) + # we're in the same phrase as previous step if rec['start'] == start_prev: - #print("still in phrase") current_phrase.append(rec['token']) start_prev = rec['start'] end_prev = rec.get('end') @@ -145,7 +142,6 @@ def whisper_segment_transcription( # ...which starts immediately after the previous phrase if rec['start'] == end_prev: - #print("new starts where expected") current_phrase.append(rec['token']) start_prev = rec['start'] end_prev = rec['end'] @@ -156,7 +152,6 @@ def whisper_segment_transcription( else: #raise NotImplementedError # let's just do.. this? for now? I guess? - #print("ruh roh") current_phrase.append(rec['token']) start_prev = rec['start'] end_prev = rec['end'] @@ -173,7 +168,6 @@ def whisper_segment_transcription( # reshape the data structure prompt_starts = [ {'ts':rec['start'], - 'td':dt.timedelta(seconds=rec['start']), 'prompt':' '.join(rec['tokens']) } for rec in token_large_phrase_segmentations]