diff --git a/Video_Killed_The_Radio_Star_Defusion.ipynb b/Video_Killed_The_Radio_Star_Defusion.ipynb index 1af7c84..f568367 100644 --- a/Video_Killed_The_Radio_Star_Defusion.ipynb +++ b/Video_Killed_The_Radio_Star_Defusion.ipynb @@ -6,7 +6,7 @@ "id": "mgXxoDhMAiti" }, "source": [ - "# $ \\text{Video Killed The Radio Star}$ $...\\Diffusion$\n", + "# $ \\text{Video Killed The Radio Star}$ $\\color{red}{...Diffusion}$\n", "\n", "Notebook by David Marx ([@DigThatData](https://twitter.com/digthatdata))\n", "\n", @@ -67,16 +67,55 @@ }, { "cell_type": "markdown", - "source": [ - "## $0.$ Setup" - ], "metadata": { "id": "sM147HP4kAdY" - } + }, + "source": [ + "## $0.$ Setup" + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "cellView": "form", + "id": "ZnTe8clZuZuj" + }, + "outputs": [], + "source": [ + "# @title # 📊 Check GPU Status\n", + "\n", + "try:\n", + " from vktrs.utils import gpu_info\n", + "except:\n", + " import pandas as pd\n", + " import subprocess\n", + " \n", + " def gpu_info():\n", + " outv = subprocess.run([\n", + " 'nvidia-smi',\n", + " # these lines concatenate into a single query string\n", + " '--query-gpu='\n", + " 'timestamp,'\n", + " 'name,'\n", + " 'utilization.gpu,'\n", + " 'utilization.memory,'\n", + " 'memory.used,'\n", + " 'memory.free,'\n", + " ,\n", + " '--format=csv'\n", + " ],\n", + " stdout=subprocess.PIPE).stdout.decode('utf-8')\n", + "\n", + " header, rec = outv.split('\\n')[:-1]\n", + " return pd.DataFrame({' '.join(k.strip().split('.')).capitalize():v for k,v in zip(header.split(','), rec.split(','))}, index=[0]).T\n", + "\n", + "gpu_info()" + ] + }, + { + "cell_type": "code", + "execution_count": 1, "metadata": { "cellView": "form", "id": "oPbeyWtesAoh" @@ -98,22 +137,6 @@ "!pip install panel" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "ZnTe8clZuZuj" - }, - "outputs": [], - "source": [ - "# @title # 📊 Check GPU Status\n", - "\n", - "from vktrs.utils import gpu_info\n", - "\n", - "gpu_info()" - ] - }, { "cell_type": "code", "execution_count": null, @@ -140,10 +163,14 @@ "# @markdown \n", "\n", "use_stability_api = False # @param {type:'boolean'}\n", - "mount_gdrive = False # @param {type:'boolean'}\n", + "mount_gdrive = True # @param {type:'boolean'}\n", "\n", "import os\n", "from pathlib import Path\n", + "import time\n", + "\n", + "from omegaconf import OmegaConf\n", + "\n", "\n", "os.environ['XDG_CACHE_HOME'] = os.environ.get(\n", " 'XDG_CACHE_HOME',\n", @@ -160,34 +187,12 @@ " # Following line will be sufficient pending merge of https://github.com/openai/whisper/pull/257\n", " os.environ['XDG_CACHE_HOME']='/content/drive/MyDrive/AI/models/.cache'\n", "\n", - "if use_stability_api:\n", - " import os, getpass\n", - " os.environ['STABILITY_KEY'] = getpass.getpass('Enter your API Key')\n", - "else:\n", - " try:\n", - " from google.colab import output\n", - " output.enable_custom_widget_manager()\n", - " except ImportError:\n", - " # assume local use\n", - " pass\n", - " \n", - " from huggingface_hub import notebook_login\n", - "\n", - " # to do: if gdrive mounted, check for API token... somewhere on drive?\n", - " # looks like we should be able to find the token through an environment variable\n", - " notebook_login()\n", - "\n", - "from omegaconf import OmegaConf\n", - "from pathlib import Path\n", - "\n", "model_dir_str=str(Path(os.environ['XDG_CACHE_HOME']))\n", "proj_root_str = '${active_project}'\n", "if mount_gdrive:\n", " proj_root_str = '/content/drive/MyDrive/AI/VideoKilledTheRadioStar/${active_project}'\n", "\n", "\n", - "import time\n", - "\n", "# notebook config\n", "cfg = OmegaConf.create({\n", " 'active_project':str(time.time()),\n", @@ -199,41 +204,72 @@ "})\n", "\n", "with open('config.yaml','w') as fp:\n", - " OmegaConf.save(config=cfg, f=fp.name)" + " OmegaConf.save(config=cfg, f=fp.name)\n", + "\n", + "###################\n", + "\n", + "if use_stability_api:\n", + " import os, getpass\n", + " os.environ['STABILITY_KEY'] = getpass.getpass('Enter your API Key')\n", + "else:\n", + " try:\n", + " from google.colab import output\n", + " output.enable_custom_widget_manager()\n", + " except ImportError:\n", + " # assume local use\n", + " pass\n", + " \n", + " from huggingface_hub import notebook_login\n", + "\n", + " # to do: if gdrive mounted, check for API token... somewhere on drive?\n", + " # looks like we should be able to find the token through an environment variable\n", + " notebook_login()\n" ] }, { "cell_type": "markdown", - "source": [ - "## $1.$ Create New Project" - ], "metadata": { "id": "rt9Mu97fk_bp" - } + }, + "source": [ + "## $1.$ 📋 Create New Project" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "cellView": "form", "id": "s-9xjgy0iHhS" }, "outputs": [], "source": [ - "# @title ## 📋 Set Project Name\n", - "\n", "# @markdown To create a new project, enter a unique project name.\n", "# @markdown If you leave `project_name` blank, the current unix timestamp will be used\n", "# @markdown (seconds since 1970-01-01 00:00).\n", "\n", "# @markdown If you use the name of an existing project, the workspace will switch to that project.\n", "\n", + "# @markdown Non-alphanumeric characters (excluding '-' and '_') will be replaced with hyphens.\n", + "\n", "import time\n", "\n", "project_name = '' # @param {type:'string'}\n", "if not project_name:\n", " project_name = str(time.time())\n", "\n", + "import string\n", + "def sanitize_folder_name(fp):\n", + " outv = ''\n", + " whitelist = string.ascii_letters + string.digits + '-_'\n", + " for token in str(fp):\n", + " if token not in whitelist:\n", + " token = '-'\n", + " outv += token\n", + " return outv\n", + "\n", + "project_name = sanitize_folder_name(project_name)\n", + "\n", "workspace = OmegaConf.load('config.yaml')\n", "workspace.active_project = project_name\n", "\n", @@ -243,12 +279,12 @@ }, { "cell_type": "markdown", - "source": [ - "## $2.$ Audio processing" - ], "metadata": { "id": "7eTPNhcBomtL" - } + }, + "source": [ + "## $2.$ 💬 Parse speech from audio" + ] }, { "cell_type": "code", @@ -259,8 +295,6 @@ }, "outputs": [], "source": [ - "# @title 📋 Parameters\n", - "\n", "from omegaconf import OmegaConf\n", "from pathlib import Path\n", "\n", @@ -273,11 +307,12 @@ "root.mkdir(parents=True, exist_ok=True)\n", "\n", "\n", + "import copy\n", "import datetime as dt\n", + "import gc\n", "from itertools import chain, cycle\n", "import json\n", "import os\n", - "\n", "import re\n", "import string\n", "from subprocess import Popen, PIPE\n", @@ -287,11 +322,22 @@ "\n", "from IPython.display import display\n", "import numpy as np\n", + "import pandas as pd\n", + "import panel as pn\n", "from tqdm.autonotebook import tqdm\n", "\n", "import tokenizations\n", "import webvtt\n", + "import whisper\n", "\n", + "from vktrs.utils import remove_punctuation\n", + "from vktrs.utils import get_audio_duration_seconds\n", + "from vktrs.youtube import (\n", + " YoutubeHelper,\n", + " parse_timestamp,\n", + " vtt_to_token_timestamps,\n", + " srv2_to_token_timestamps,\n", + ")\n", "\n", "storyboard = OmegaConf.create()\n", "\n", @@ -308,7 +354,6 @@ " #, use_stability_api = use_stability_api\n", ")\n", "\n", - "\n", "if not storyboard.params.audio_fpath:\n", " storyboard.params.audio_fpath = None\n", "\n", @@ -324,38 +369,12 @@ "storyboard_fname = root / 'storyboard.yaml'\n", "with open(storyboard_fname,'wb') as fp:\n", " OmegaConf.save(config=storyboard, f=fp.name)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "8OaQYVfYgBH-" - }, - "outputs": [], - "source": [ - "%%capture\n", - "\n", - "# @title ## 📥 Download audio from youtube\n", "\n", - "from vktrs.utils import get_audio_duration_seconds\n", - "from vktrs.youtube import (\n", - " YoutubeHelper,\n", - " parse_timestamp,\n", - " vtt_to_token_timestamps,\n", - " srv2_to_token_timestamps,\n", - ")\n", "\n", - "from omegaconf import OmegaConf\n", - "from pathlib import Path\n", + "##############################################\n", "\n", - "workspace = OmegaConf.load('config.yaml')\n", - "root = Path(workspace.project_root)\n", + "## 📥 Download audio from youtube\n", "\n", - "storyboard_fname = root / 'storyboard.yaml'\n", - "storyboard = OmegaConf.load(storyboard_fname)\n", "\n", "video_url = storyboard.params.video_url\n", "\n", @@ -419,19 +438,11 @@ "with open(storyboard_fname,'wb') as fp:\n", " OmegaConf.save(config=storyboard, f=fp.name)\n", "\n", - "whisper_seg = storyboard.params.whisper_seg" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "73lfb0gZvGW5" - }, - "outputs": [], - "source": [ - "# @title ## 💬 Transcribe and segment speech using whisper\n", + "whisper_seg = storyboard.params.whisper_seg\n", + "\n", + "#############################################\n", + "\n", + "## 💬 Transcribe and segment speech using whisper\n", "\n", "# handle OOM... or try to, anyway\n", "if 'hf_helper' in locals():\n", @@ -439,22 +450,6 @@ " del hf_helper.text2img\n", " del hf_helper\n", "\n", - "import gc\n", - "from omegaconf import OmegaConf\n", - "from pathlib import Path\n", - "import time\n", - "\n", - "import tokenizations\n", - "from vktrs.utils import remove_punctuation\n", - "import whisper\n", - "\n", - "workspace = OmegaConf.load('config.yaml')\n", - "root = Path(workspace.project_root)\n", - "\n", - "storyboard_fname = root / 'storyboard.yaml'\n", - "storyboard = OmegaConf.load(storyboard_fname)\n", - "\n", - "whisper_seg = storyboard.params.whisper_seg\n", "\n", "if whisper_seg:\n", " from vktrs.asr import (\n", @@ -547,24 +542,29 @@ " token_large_index_segmentations,\n", " )\n", "\n", + "\n", + " ### checkpoint the processing work we've done to this point\n", + "\n", + " prompt_starts_copy = copy.deepcopy(prompt_starts)\n", + "\n", + " for rec in prompt_starts_copy:\n", + " for k,v in list(rec.items()):\n", + " if isinstance(v, dt.timedelta):\n", + " rec[k] = v.total_seconds()\n", + "\n", + " storyboard.prompt_starts = prompt_starts_copy\n", + "\n", " with open(storyboard_fname) as fp:\n", " OmegaConf.save(config=storyboard, f=fp.name)\n", "\n", "######################################################\n", + "# Review/Modify transcription\n", "\n", - "# title # 4.b (optional) Review/Modify transcription\n", - "\n", - "# markdown Run this cell for an opportunity to review and modify the\n", - "# markdown transcription.\n", - "\n", - "import pandas as pd\n", - "import panel as pn\n", "\n", "# https://panel.holoviz.org/reference/widgets/Tabulator.html\n", "pn.extension('tabulator') # I don't know that specifying 'tabulator' here is even necessary...\n", "\n", "tabulator_formatters = {\n", - " #'float': {'type': 'progress', 'max': 10},\n", " 'bool': {'type': 'tickCross'}\n", "}\n", "\n", @@ -578,124 +578,19 @@ "if 'td' in df:\n", " del df['td']\n", "\n", - "import copy\n", "df_pre = copy.deepcopy(df)\n", - "pn.widgets.Tabulator(df, formatters=tabulator_formatters)" + "pn.widgets.Tabulator(df, formatters=tabulator_formatters)\n" ] }, { "cell_type": "markdown", - "source": [ - "## $3.$🧮 Math" - ], "metadata": { - "id": "mkPO7_cVpRU3" - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "_WH4yvk5_UDp" + "id": "_RTUFeyQqCfd" }, - "outputs": [], "source": [ - "# to do: move this to video compilation\n", - "\n", - "import datetime as dt\n", - "\n", - "# update prompt_starts if any changes were made above\n", - "import numpy as np\n", - "if not np.all(df_pre.values == df.values):\n", - " df_pre = copy.deepcopy(df)\n", - " for i, rec in enumerate(prompt_starts):\n", - " rec['ts'] = df.loc[i,'Timestamp (sec)']\n", - " rec['td'] = dt.timedelta(rec['ts'])\n", - " rec['prompt'] = df.loc[i,'Lyric']\n", - "\n", - "############################################\n", - "\n", - "workspace = OmegaConf.load('config.yaml')\n", - "#OmegaConf.resolve(workspace)\n", - "root = Path(workspace.project_root)\n", - "\n", - "storyboard_fname = root / 'storyboard.yaml'\n", - "storyboard = OmegaConf.load(storyboard_fname)\n", - "\n", - "### This cell computes how many frames are needed for each segment\n", - "### based on the start times for each prompt\n", - "\n", - "import datetime as dt\n", - "#fps = storyboard.params.fps\n", - "\n", - "\n", - "# @markdown `fps` - Frames-per-second of generated animations\n", - "\n", - "fps = 12 # @param {type:'integer'}\n", - "storyboard.params.fps = fps\n", - "\n", - "ifps = dt.timedelta(seconds=1/fps)\n", - "\n", - "# estimate video end\n", - "video_duration = storyboard.params['video_duration']\n", - "\n", - "# dummy prompt for last scene duration\n", - "prompt_starts.append({'td':dt.timedelta(seconds=video_duration)})\n", - "\n", - "# make sure we respect the duration of the previous phrase\n", - "frame_start=dt.timedelta(seconds=0)\n", - "prompt_starts[0]['anim_start']=frame_start\n", - "for i, rec in enumerate(prompt_starts[1:], start=1):\n", - " rec_prev = prompt_starts[i-1]\n", - " k=0\n", - " while (rec_prev['anim_start'] + k*ifps) < rec['td']:\n", - " k+=1\n", - " k-=1\n", - " rec_prev['frames'] = k\n", - " rec_prev['anim_duration'] = k*ifps\n", - " frame_start+=k*ifps\n", - " rec['anim_start']=frame_start\n", - "\n", - "# make sure we respect the duration of the previous phrase\n", - "# to do: push end time into a timedelta and consider it... somewhere near here\n", - "for i, rec1 in enumerate(prompt_starts):\n", - " rec0 = prompt_starts[i-1]\n", - " rec0['duration'] = rec1['td'] - rec0['td']\n", - "\n", - "# drop the dummy frame\n", - "prompt_starts = prompt_starts[:-1]\n", - "\n", - "# to do: given a 0 duration prompt, assume its duration is captured in the next prompt \n", - "# and guesstimate a corrected prompt start time and duration \n", - "\n", - "\n", - "### checkpoint the processing work we've done to this point\n", - "\n", - "import copy\n", - "\n", - "prompt_starts_copy = copy.deepcopy(prompt_starts)\n", - "\n", - "for rec in prompt_starts_copy:\n", - " for k,v in list(rec.items()):\n", - " if isinstance(v, dt.timedelta):\n", - " rec[k] = v.total_seconds()\n", - "\n", - "storyboard.prompt_starts = prompt_starts_copy\n", - "\n", - "with open(storyboard_fname) as fp:\n", - " OmegaConf.save(config=storyboard, f=fp.name)" + "## $3.$ Animate" ] }, - { - "cell_type": "markdown", - "source": [ - "## $4.$ Animate" - ], - "metadata": { - "id": "_RTUFeyQqCfd" - } - }, { "cell_type": "code", "execution_count": null, @@ -705,17 +600,23 @@ }, "outputs": [], "source": [ - "# @title ## 🎨 Generate init images\n", - "\n", "import copy\n", "import datetime as dt\n", - "from omegaconf import OmegaConf\n", "from pathlib import Path\n", "import random\n", "import string\n", - "from tqdm.autonotebook import tqdm\n", "\n", + "from bokeh.models.widgets.tables import (\n", + " NumberFormatter, \n", + " BooleanFormatter,\n", + " CheckboxEditor,\n", + ")\n", + "import numpy as np\n", + "from omegaconf import OmegaConf, DictConfig\n", + "import pandas as pd\n", + "import panel as pn\n", "import PIL\n", + "from tqdm.autonotebook import tqdm\n", "\n", "from vktrs.tsp import (\n", " tsp_permute_frames,\n", @@ -728,6 +629,39 @@ " remove_punctuation,\n", ")\n", "\n", + "# to do: is there a way to check if this is in the env already?\n", + "pn.extension('tabulator')\n", + "\n", + "# this processes optional edits to the transcription (above) \n", + "if ('prompt_starts' in locals()) \\\n", + "and ('df_pre' in locals()):\n", + " if isinstance(prompt_starts, DictConfig):\n", + " prompt_starts = OmegaConf.to_container(prompt_starts)\n", + " # update prompt_starts if any changes were made above\n", + " if not np.all(df_pre.values == df.values):\n", + " df_pre = copy.deepcopy(df)\n", + " for i, rec in enumerate(prompt_starts):\n", + " rec['ts'] = float(df.loc[i,'Timestamp (sec)'])\n", + " #rec['td'] = dt.timedelta(rec['ts'])\n", + " rec['td'] = rec['ts']\n", + " rec['prompt'] = df.loc[i,'Lyric']\n", + " \n", + " # ...actually, I think the above code might not do anything\n", + " # probably need to checkpoint prompt_starts into the storyboard on disk.\n", + " # let's do that here just to be safe. \n", + " workspace = OmegaConf.load('config.yaml')\n", + " root = Path(workspace.project_root)\n", + "\n", + " storyboard_fname = root / 'storyboard.yaml'\n", + " storyboard = OmegaConf.load(storyboard_fname)\n", + "\n", + " storyboard.prompt_starts = prompt_starts\n", + " with open(storyboard_fname) as fp:\n", + " OmegaConf.save(config=storyboard, f=fp.name)\n", + "\n", + "\n", + "# @title ## 🎨 Generate init images\n", + "\n", "\n", "workspace = OmegaConf.load('config.yaml')\n", "root = Path(workspace.project_root)\n", @@ -816,7 +750,6 @@ " im_name = f\"{im_name.stem}-{time.time()}{im_name.suffix}\"\n", " new_path = archive_root / im_name\n", " old_fp.rename(new_path)\n", - " print(\"archival process complete\")\n", "\n", "\n", "d_ = dict(\n", @@ -829,13 +762,14 @@ "\n", ")\n", "\n", + "regenerate_all_init_images = False # @param {type:'boolean'}\n", "\n", "# @markdown `theme_prompt` - Text that will be appended to the end of each lyric, useful for e.g. applying a consistent aesthetic style\n", "\n", "# @markdown `display_frames_as_we_get_them` - Displaying frames will make the notebook slightly slower\n", "\n", "# regenerate all images if the theme prompt has changed or user specifies\n", - "regenerate_all_init_images = False # @param {type:'boolean'}\n", + "\n", "if d_['theme_prompt'] != storyboard.params.get('theme_prompt'):\n", " regenerate_all_init_images = True\n", "\n", @@ -845,6 +779,7 @@ " for i, rec in enumerate(prompt_starts):\n", " rec['frame0_fpath'] = None\n", " archive_images(i)\n", + " print(\"archival process complete\")\n", "\n", "# anchor images will be regenerated if there's no associated frame0_fpath\n", "# regenerate specific images if\n", @@ -862,8 +797,7 @@ " if regen:\n", " rec['frame0_fpath'] = None\n", " archive_images(i)\n", - "\n", - " \n", + " print(\"archival process complete\")\n", "\n", "\n", "theme_prompt = storyboard.params.theme_prompt\n", @@ -871,15 +805,12 @@ "height = storyboard.params.height\n", "width = storyboard.params.width\n", "\n", - "\n", - "# to do: move this up to run params\n", - "#proj_name = 'test'\n", "proj_name = workspace.active_project\n", "\n", "print(\"Ensuring each prompt has an associated image\")\n", "for idx, rec in enumerate(prompt_starts):\n", " print(\n", - " f\"[{rec['anim_start']} | {rec['ts']}] [{rec['duration']} | {rec['anim_duration']}] - {rec['frames']} - {rec['prompt']}\"\n", + " f\"[{idx} | {rec['ts']}] - {rec['prompt']}\"\n", " )\n", " lyric = rec['prompt']\n", " prompt = f\"{lyric}, {theme_prompt}\"\n", @@ -926,11 +857,6 @@ "\n", "# flag regens in the table\n", "\n", - "from bokeh.models.widgets.tables import (\n", - " NumberFormatter, \n", - " BooleanFormatter,\n", - " CheckboxEditor,\n", - ")\n", "\n", "df_regen = pd.DataFrame(prompt_starts)[['ts','prompt']].rename(\n", " columns={\n", @@ -957,54 +883,158 @@ }, "outputs": [], "source": [ + "\n", "# @title ## 🚀 Generate animation frames\n", "\n", - "from omegaconf import OmegaConf\n", - "from PIL import Image\n", + "\n", + "######################\n", + "# improved resume\n", "\n", "import copy\n", "import datetime as dt\n", "from itertools import cycle\n", "from pathlib import Path\n", "\n", - "# reload config\n", + "from omegaconf import OmegaConf\n", + "from PIL import Image\n", + "from vktrs.utils import add_caption2image\n", + "\n", "workspace = OmegaConf.load('config.yaml')\n", "root = Path(workspace.project_root)\n", "\n", "storyboard_fname = root / 'storyboard.yaml'\n", "storyboard = OmegaConf.load(storyboard_fname)\n", "\n", - "prompt_starts = OmegaConf.to_container(storyboard.prompt_starts, resolve=True)\n", + "if not 'prompt_starts' in locals():\n", + " prompt_starts = OmegaConf.to_container(storyboard.prompt_starts)\n", "\n", "\n", - "# `nsfw_regens` - Max number of times to attempt regenerating an image after triggering the NSFW classifier (huggingface only, see [Open RAIL-M restrictions](https://huggingface.co/spaces/CompVis/stable-diffusion-license))\n", + "# to do: move to utils\n", + "def get_image_sequence(idx, root=root, init_first=True):\n", + " images = (root / 'frames' ).glob(f'{idx}-*.png')\n", + " images = [str(fp) for fp in images]\n", + " if init_first:\n", + " init_image = None\n", + " images2 = []\n", + " for i, fp in enumerate(images):\n", + " if 'anchor' in fp:\n", + " init_image = fp\n", + " else:\n", + " images2.append(fp)\n", + " if not init_image:\n", + " try:\n", + " init_image, images2 = images2[0], images2[1:]\n", + " images = [init_image] + images2\n", + " except IndexError:\n", + " images = images2\n", + " return images\n", "\n", - "# @markdown `n_variations` - How many unique variations to generate for a given text prompt. This determines the frequency of the visual \"pulsing\" effect\n", "\n", - "# @markdown `image_consistency` - controls similarity between images generated by the prompt.\n", - "# @markdown - 0: ignore the init image\n", - "# @markdown - 1: true as possible to the init image\n", + "########################\n", + "# update config\n", "\n", - "# @markdown `add_caption` - Whether or not to overlay the prompt text on the image\n", + "prompt_starts_copy = copy.deepcopy(prompt_starts)\n", "\n", - "# @markdown `optimal_ordering` - Intelligently permutes animation frames to provide a smoother animation.\n", + "# to do: this should be rendered unnecessary before this branch is merged\n", + "for rec in prompt_starts_copy:\n", + " for k,v in list(rec.items()):\n", + " if isinstance(v, dt.timedelta):\n", + " rec[k] = v.total_seconds()\n", + "\n", + "storyboard.prompt_starts = prompt_starts_copy\n", + "\n", + "with open(storyboard_fname) as fp:\n", + " OmegaConf.save(config=storyboard, f=fp.name)\n", + "\n", + "\n", + "############################################\n", + "\n", + "# 🧮 Math\n", + "### This block computes how many frames are needed for each segment\n", + "### based on the start times for each prompt\n", + "\n", + "\n", + "fps = 12 # @param {type:'integer'}\n", + "storyboard.params.fps = fps\n", + "\n", + "ifps = dt.timedelta(seconds=1/fps)\n", + "\n", + "# estimate video end\n", + "video_duration = storyboard.params['video_duration']\n", + "\n", + "# dummy prompt for last scene duration\n", + "#prompt_starts = OmegaConf.to_container(prompt_starts, resolve=True)\n", + "prompt_starts = OmegaConf.to_container(storyboard.prompt_starts) # I don't think I need to resolve here..\n", + "for rec in prompt_starts:\n", + " rec['td'] = dt.timedelta(seconds=rec['td'])\n", + "prompt_starts.append({'td':dt.timedelta(seconds=video_duration)})\n", + "\n", + "# make sure we respect the duration of the previous phrase\n", + "frame_start=dt.timedelta(seconds=0)\n", + "prompt_starts[0]['anim_start']=frame_start\n", + "for i, rec in enumerate(prompt_starts[1:], start=1):\n", + " rec_prev = prompt_starts[i-1]\n", + " k=0\n", + " while (rec_prev['anim_start'] + k*ifps) < rec['td']:\n", + " k+=1\n", + " k-=1\n", + " rec_prev['frames'] = k\n", + " rec_prev['anim_duration'] = k*ifps\n", + " frame_start+=k*ifps\n", + " rec['anim_start']=frame_start\n", + "\n", + "# make sure we respect the duration of the previous phrase\n", + "# to do: push end time into a timedelta and consider it... somewhere near here\n", + "for i, rec1 in enumerate(prompt_starts):\n", + " rec0 = prompt_starts[i-1]\n", + " rec0['duration'] = rec1['td'] - rec0['td']\n", + "\n", + "# drop the dummy frame\n", + "prompt_starts = prompt_starts[:-1]\n", + "\n", + "# to do: given a 0 duration prompt, assume its duration is captured in the next prompt \n", + "# and guesstimate a corrected prompt start time and duration \n", + "\n", + "\n", + "### checkpoint the processing work we've done to this point\n", + "\n", + "prompt_starts_copy = copy.deepcopy(prompt_starts)\n", + "\n", + "for rec in prompt_starts_copy:\n", + " for k,v in list(rec.items()):\n", + " if isinstance(v, dt.timedelta):\n", + " rec[k] = v.total_seconds()\n", + "\n", + "storyboard.prompt_starts = prompt_starts_copy\n", + "\n", + "with open(storyboard_fname) as fp:\n", + " OmegaConf.save(config=storyboard, f=fp.name)\n", "\n", - "# @markdown `max_video_duration_in_seconds` - Early stopping if you don't want to generate a video the full duration of the provided audio. Default = 5min.\n", + "\n", + "\n", + "## 🚀 Generate animation frames\n", "\n", "\n", "d_ = dict(\n", " _=''\n", - "\n", " , n_variations=5 # @param {type:'integer'}\n", " , image_consistency=0.8 # @param {type:\"slider\", min:0, max:1, step:0.01} \n", - " , add_caption = False # @param {type:'boolean'}\n", - " , optimal_ordering = True # @param {type:'boolean'}\n", " , max_video_duration_in_seconds = 300 # @param {type:'integer'}\n", - "\n", - " # this parameter is currently not exposed in the form\n", - " , max_variations_per_opt_pass = 15\n", ")\n", "\n", + "\n", + "# @markdown `fps` - Frames-per-second of generated animations\n", + "\n", + "# @markdown `n_variations` - How many unique variations to generate for a given text prompt. This determines the frequency of the visual \"pulsing\" effect\n", + "\n", + "# @markdown `image_consistency` - controls similarity between images generated by the prompt.\n", + "# @markdown - 0: ignore the init image\n", + "# @markdown - 1: true as possible to the init image\n", + "\n", + "# @markdown `max_video_duration_in_seconds` - Early stopping if you don't want to generate a video the full duration of the provided audio. Default = 5min.\n", + "\n", + "\n", + "\n", "storyboard.params.update(d_)\n", "storyboard.params.max_frames = storyboard.params.fps * storyboard.params.max_video_duration_in_seconds\n", "\n", @@ -1014,7 +1044,7 @@ "display_frames_as_we_get_them = storyboard.params.display_frames_as_we_get_them\n", "image_consistency = storyboard.params.image_consistency\n", "max_frames = storyboard.params.max_frames\n", - "max_variations_per_opt_pass = storyboard.params.max_variations_per_opt_pass\n", + "\n", "n_variations = storyboard.params.n_variations\n", "theme_prompt = storyboard.params.get('theme_prompt')\n", "\n", @@ -1046,10 +1076,6 @@ " if display_frames_as_we_get_them:\n", " display(img)\n", "\n", - " #frames.extend(sequence)\n", - " #if len(frames) >= max_frames:\n", - " # break\n", - "\n", "########################\n", "# update config\n", "\n", @@ -1084,6 +1110,17 @@ "from omegaconf import OmegaConf\n", "from tqdm.autonotebook import tqdm\n", "\n", + "from vktrs.tsp import (\n", + " tsp_permute_frames,\n", + " batched_tsp_permute_frames,\n", + ")\n", + "\n", + "from vktrs.utils import (\n", + " add_caption2image,\n", + " save_frame,\n", + " remove_punctuation,\n", + ")\n", + "\n", "# reload config\n", "workspace = OmegaConf.load('config.yaml')\n", "root = Path(workspace.project_root)\n", @@ -1093,24 +1130,43 @@ "\n", "#####################################\n", "# variation parameters\n", + "# do we save this to storyboard for posterity?\n", + "\n", + "output_filename = 'output.mp4' # @param {type:'string'}\n", + "add_caption = False # @param {type:'boolean'}\n", + "optimal_ordering = True # @param {type:'boolean'}\n", + "upscale = False # @param {type:'boolean'}\n", + "\n", + "\n", + "# @markdown `add_caption` - Whether or not to overlay the prompt text on the image\n", + "\n", + "# @markdown `optimal_ordering` - Intelligently permutes animation frames to provide a smoother animation.\n", + "\n", + "# @markdown `upscale`: Naively (lanczos interpolation) upscale video 2x. This can be a way to force\n", + "# @markdown services like youtube to deliver your video without mangling it with compression\n", + "# @markdown artifacts. Thanks [@gandamu_ml](https://twitter.com/gandamu_ml) for this trick!\n", + "\n", + "\n", + "# this parameter is currently not exposed in the form\n", + "max_variations_per_opt_pass = 15\n", + "\n", "\n", - "add_caption = storyboard.params.get('add_caption')\n", - "optimal_ordering = storyboard.params.optimal_ordering\n", "if optimal_ordering:\n", - " opt_batch_size = storyboard.params.n_variations\n", - " while opt_batch_size > storyboard.params.max_variations_per_opt_pass:\n", - " opt_batch_size /= 2\n", - " print(f\"Frames per re-ordering batch: {opt_batch_size}\")\n", - " storyboard.params.opt_batch_size = opt_batch_size\n", + " opt_batch_size = min(storyboard.params.n_variations, max_variations_per_opt_pass)\n", "\n", "#####################################\n", "# video parameters\n", "\n", - "output_filename = 'output.mp4' # @param {type:'string'}\n", - "output_filename = str( root / output_filename )\n", - "storyboard.params.output_filename = output_filename\n", "\n", - "# to do: move fps computations here\n", + "#output_filename = str( root / output_filename )\n", + "#storyboard.params.output_filename = output_filename\n", + "# I think it might be more efficient to write the video to the local disk first, then move it\n", + "# afterwards, rather than writing into google drive\n", + "final_output_filename = str( root / output_filename )\n", + "storyboard.params.output_filename = final_output_filename\n", + "\n", + "\n", + "# to do: move/duplicate fps computations here (?)\n", "fps = storyboard.params.fps\n", "input_audio = storyboard.params.audio_fpath\n", "\n", @@ -1127,7 +1183,8 @@ " if optimal_ordering:\n", " images = batched_tsp_permute_frames(\n", " images,\n", - " max_variations_per_opt_pass\n", + " #max_variations_per_opt_pass\n", + " opt_batch_size\n", " )\n", " return images\n", "\n", @@ -1139,16 +1196,13 @@ "if input_audio:\n", " cmd_in += ['-i', str(input_audio)]\n", "\n", - "upscale = False # @param {type:'boolean'}\n", + "# NB: it might be more efficient to perform this upscaling step as a \n", + "# separate step after compiling the video frames\n", "if upscale:\n", " height=storyboard.params.height\n", " width=storyboard.params.width\n", " cmd_out = ['-vf', f'scale={2*width}x{2*height}:flags=lanczos'] + cmd_out\n", "\n", - "# @markdown `upscale`: Naively (lanczos interpolation) upscale video 2x. This can be a way to force\n", - "# @markdown services like youtube to deliver your video without mangling it with compression\n", - "# @markdown artifacts. Thanks [@gandamu_ml](https://twitter.com/gandamu_ml) for this trick!\n", - "\n", "\n", "cmd = cmd_in + cmd_out\n", "\n", @@ -1164,8 +1218,12 @@ "\n", "print(\"Encoding video...\")\n", "p.wait()\n", - "print(\"Video complete.\")\n", - "print(f\"Video saved to: {storyboard.params.output_filename}\")" + "\n", + "if output_filename != final_output_filename:\n", + " print(f\"Local video compilation complete. Moving video to: {final_output_filename}\")\n", + " import shutil\n", + " shutil.move(output_filename, final_output_filename)\n", + "print(\"Video complete.\")" ] }, { @@ -1181,13 +1239,14 @@ "\n", "output_filename = storyboard.params.output_filename\n", "\n", - "download_video = False # @param {type:'boolean'}\n", + "download_video = True # @param {type:'boolean'}\n", "compress_video = False # @param {type:'boolean'}\n", "\n", "# @markdown Compressing to `*.tar.gz`` format can reduce filesize, which in turn reduces\n", "# @markdown your download time. You may need to install additional software\n", "# @markdown to \"decompress\" the file after downloading to view your video.\n", "\n", + "# @markdown NB: Your video will probably download way faster from https://drive.google.com\n", "\n", "# NB: only embed short videos\n", "embed_video_in_notebook = False\n", @@ -1291,4 +1350,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +}