From 6bf66739968081140210835823f156c0ff91615b Mon Sep 17 00:00:00 2001
From: David Marx <david.marx84@gmail.com>
Date: Sat, 8 Oct 2022 17:03:36 -0700
Subject: [PATCH] Fewer timedeltas (#88)

* drop unnecessary timedelta stuff

* sigh

* dealt with td's

* handle tds from asr

* sigh
---
 Video_Killed_The_Radio_Star_Defusion.ipynb | 49 ++++------------------
 vktrs/asr.py                               |  8 +---
 2 files changed, 9 insertions(+), 48 deletions(-)

diff --git a/Video_Killed_The_Radio_Star_Defusion.ipynb b/Video_Killed_The_Radio_Star_Defusion.ipynb
index e4e13b0..6f614db 100644
--- a/Video_Killed_The_Radio_Star_Defusion.ipynb
+++ b/Video_Killed_The_Radio_Star_Defusion.ipynb
@@ -537,12 +537,13 @@
         "    ### checkpoint the processing work we've done to this point\n",
         "\n",
         "    prompt_starts_copy = copy.deepcopy(prompt_starts)\n",
-        "\n",
+        "    \n",
+        "    # to do: deal with timedeltas in asr.py and yt.py\n",
         "    for rec in prompt_starts_copy:\n",
         "        for k,v in list(rec.items()):\n",
         "            if isinstance(v, dt.timedelta):\n",
         "                rec[k] = v.total_seconds()\n",
-        "\n",
+        "    \n",
         "    storyboard.prompt_starts = prompt_starts_copy\n",
         "\n",
         "    with open(storyboard_fname) as fp:\n",
@@ -567,7 +568,7 @@
         ")\n",
         "\n",
         "if 'td' in df:\n",
-        "  del df['td']\n",
+        "    del df['td']\n",
         "\n",
         "df_pre = copy.deepcopy(df)\n",
         "pn.widgets.Tabulator(df, formatters=tabulator_formatters)\n"
@@ -635,7 +636,6 @@
         "        df_pre = copy.deepcopy(df)\n",
         "        for i, rec in enumerate(prompt_starts):\n",
         "            rec['ts'] = float(df.loc[i,'Timestamp (sec)'])\n",
-        "            rec['td'] = rec['ts']\n",
         "            rec['prompt'] = df.loc[i,'Lyric']\n",
         "        \n",
         "        # ...actually, I think the above code might not do anything\n",
@@ -802,13 +802,6 @@
         "##############\n",
         "\n",
         "prompt_starts_copy = copy.deepcopy(prompt_starts)\n",
-        "\n",
-        "# deal with timedeltas\n",
-        "for rec in prompt_starts_copy:\n",
-        "    for k,v in list(rec.items()):\n",
-        "        if isinstance(v, dt.timedelta):\n",
-        "            rec[k] = v.total_seconds()\n",
-        "\n",
         "storyboard.prompt_starts = prompt_starts_copy\n",
         "\n",
         "with open(storyboard_fname) as fp:\n",
@@ -881,12 +874,6 @@
         "    ##########################\n",
         "    prompt_starts_copy = copy.deepcopy(prompt_starts)\n",
         "\n",
-        "    # to do: this should be rendered unnecessary before this branch is merged\n",
-        "    for rec in prompt_starts_copy:\n",
-        "        for k,v in list(rec.items()):\n",
-        "            if isinstance(v, dt.timedelta):\n",
-        "                rec[k] = v.total_seconds()\n",
-        "\n",
         "    storyboard.prompt_starts = prompt_starts_copy\n",
         "\n",
         "    with open(storyboard_fname) as fp:\n",
@@ -903,29 +890,26 @@
         "\n",
         "# to do: \n",
         "# * make this more portable and add to vktrs lib\n",
-        "# * don't write timedelta objects into the prompt_starts... yeesh\n",
         "\n",
         "fps = 12 # @param {type:'integer'}\n",
         "storyboard.params.fps = fps\n",
         "\n",
-        "ifps = dt.timedelta(seconds=1/fps)\n",
+        "ifps = 1/fps\n",
         "\n",
         "# estimate video end\n",
         "video_duration = storyboard.params['video_duration']\n",
         "\n",
         "# dummy prompt for last scene duration\n",
         "prompt_starts = OmegaConf.to_container(storyboard.prompt_starts)\n",
-        "for rec in prompt_starts:\n",
-        "    rec['td'] = dt.timedelta(seconds=rec['td'])\n",
-        "prompt_starts.append({'td':dt.timedelta(seconds=video_duration)})\n",
+        "prompt_starts.append({'ts':video_duration})\n",
         "\n",
         "# make sure we respect the duration of the previous phrase\n",
-        "frame_start=dt.timedelta(seconds=0)\n",
+        "frame_start=0\n",
         "prompt_starts[0]['anim_start']=frame_start\n",
         "for i, rec in enumerate(prompt_starts[1:], start=1):\n",
         "    rec_prev = prompt_starts[i-1]\n",
         "    k=0\n",
-        "    while (rec_prev['anim_start'] + k*ifps) < rec['td']:\n",
+        "    while (rec_prev['anim_start'] + k*ifps) < rec['ts']:\n",
         "        k+=1\n",
         "    k-=1\n",
         "    rec_prev['frames'] = k\n",
@@ -933,12 +917,6 @@
         "    frame_start+=k*ifps\n",
         "    rec['anim_start']=frame_start\n",
         "\n",
-        "# make sure we respect the duration of the previous phrase\n",
-        "# to do: push end time into a timedelta and consider it... somewhere near here\n",
-        "for i, rec1 in enumerate(prompt_starts):\n",
-        "    rec0 = prompt_starts[i-1]\n",
-        "    rec0['duration'] = rec1['td'] - rec0['td']\n",
-        "\n",
         "# drop the dummy frame\n",
         "prompt_starts = prompt_starts[:-1]\n",
         "\n",
@@ -952,11 +930,6 @@
         "\n",
         "prompt_starts_copy = copy.deepcopy(prompt_starts)\n",
         "\n",
-        "for rec in prompt_starts_copy:\n",
-        "    for k,v in list(rec.items()):\n",
-        "        if isinstance(v, dt.timedelta):\n",
-        "            rec[k] = v.total_seconds()\n",
-        "\n",
         "storyboard.prompt_starts = prompt_starts_copy\n",
         "\n",
         "with open(storyboard_fname) as fp:\n",
@@ -1034,12 +1007,6 @@
         "\n",
         "prompt_starts_copy = copy.deepcopy(prompt_starts)\n",
         "\n",
-        "# to do: this should be rendered unnecessary before this branch is merged\n",
-        "for rec in prompt_starts_copy:\n",
-        "    for k,v in list(rec.items()):\n",
-        "        if isinstance(v, dt.timedelta):\n",
-        "            rec[k] = v.total_seconds()\n",
-        "\n",
         "storyboard.prompt_starts = prompt_starts_copy\n",
         "\n",
         "# to do: deal with these td objects\n",
diff --git a/vktrs/asr.py b/vktrs/asr.py
index 2eeefa4..112e8d3 100644
--- a/vktrs/asr.py
+++ b/vktrs/asr.py
@@ -119,17 +119,14 @@ def whisper_segment_transcription(
     apply whisper-tiny segmentations to whisper-large transcriptions
 
     """
-
     token_large_phrase_segmentations = []
     start_prev = 0
     end_prev=0
     current_phrase = []
     for rec in token_large_index_segmentations.values():
-        #print(current_phrase)
-        #print(start_prev, end_prev, rec)
+
         # we're in the same phrase as previous step
         if rec['start'] == start_prev:
-            #print("still in phrase")
             current_phrase.append(rec['token'])
             start_prev = rec['start']
             end_prev = rec.get('end')
@@ -145,7 +142,6 @@ def whisper_segment_transcription(
 
         # ...which starts immediately after the previous phrase
         if rec['start'] == end_prev:
-            #print("new starts where expected")
             current_phrase.append(rec['token'])
             start_prev = rec['start']
             end_prev = rec['end']
@@ -156,7 +152,6 @@ def whisper_segment_transcription(
         else:
             #raise NotImplementedError
             # let's just do.. this? for now? I guess?
-            #print("ruh roh")
             current_phrase.append(rec['token'])
             start_prev = rec['start']
             end_prev = rec['end']
@@ -173,7 +168,6 @@ def whisper_segment_transcription(
     # reshape the data structure
     prompt_starts = [
             {'ts':rec['start'],
-            'td':dt.timedelta(seconds=rec['start']),
             'prompt':' '.join(rec['tokens'])
             }
         for rec in token_large_phrase_segmentations]