Initial release

QubitPi · Oct 5, 2024 · ac3c148 · ac3c148
1 parent 78ce46b
commit ac3c148
Showing 1 changed file with 114 additions and 130 deletions.
diff --git a/app.py b/app.py
@@ -1,72 +1,52 @@
 import whisper
-from pytubefix import YouTube
-from pytubefix.cli import on_progress
-import requests
-import time
 import streamlit as st
 from streamlit_lottie import st_lottie
-import numpy as np
-import os
-from typing import Iterator
-from io import StringIO
 from utils import write_vtt, write_srt
 import ffmpeg
-from languages import LANGUAGES
-import torch
-from zipfile import ZipFile
-import base64
+import requests
+from typing import Iterator
+from io import StringIO
+import numpy as np
 import pathlib
-import re
+import os
 
 st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
 
-torch.cuda.is_available()
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# Model options: tiny, base, small, medium, large
-loaded_model = whisper.load_model("small", device=DEVICE)
-current_size = "None"
 
 # Define a function that we can use to load lottie files from a link.
+@st.cache(allow_output_mutation=True)
 def load_lottieurl(url: str):
     r = requests.get(url)
     if r.status_code != 200:
         return None
     return r.json()
 
+
 APP_DIR = pathlib.Path(__file__).parent.absolute()
 
-LOCAL_DIR = APP_DIR / "local_youtube"
+LOCAL_DIR = APP_DIR / "local"
 LOCAL_DIR.mkdir(exist_ok=True)
 save_dir = LOCAL_DIR / "output"
 save_dir.mkdir(exist_ok=True)
 
-
+loaded_model = whisper.load_model("base")
+current_size = "None"
 
 col1, col2 = st.columns([1, 3])
 with col1:
-    lottie = load_lottieurl("https://assets8.lottiefiles.com/packages/lf20_jh9gfdye.json")
+    lottie = load_lottieurl("https://assets1.lottiefiles.com/packages/lf20_HjK9Ol.json")
     st_lottie(lottie)
 
 with col2:
     st.write("""
     ## Auto Subtitled Video Generator 
-    ##### Input a YouTube video link and get a video with subtitles.
+    ##### Upload a video file and get a video with subtitles.
     ###### ➠ If you want to transcribe the video in its original language, select the task as "Transcribe"
     ###### ➠ If you want to translate the subtitles to English, select the task as "Translate" 
     ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
 
 
-def download_video(link):
-    yt = YouTube(link, on_progress_callback=on_progress)
-    ys = yt.streams.get_highest_resolution()
-    video = ys.download(filename=f"{save_dir}/youtube_video.mp4")
-    return video
-
-
-def convert(seconds):
-    return time.strftime("%H:%M:%S", time.gmtime(seconds))
-
-
+@st.cache(allow_output_mutation=True)
 def change_model(current_size, size):
     if current_size != size:
         loaded_model = whisper.load_model(size)
@@ -75,20 +55,23 @@ def change_model(current_size, size):
         raise Exception("Model size is the same as the current size.")
 
 
-def inference(link, loaded_model, task):
-    yt = YouTube(link, on_progress_callback=on_progress)
-    ys = yt.streams.get_audio_only()
-    path = ys.download(filename=f"{save_dir}/audio.mp3", mp3=True)
+@st.cache(allow_output_mutation=True)
+def inferecence(loaded_model, uploaded_file, task):
+    with open(f"{save_dir}/input.mp4", "wb") as f:
+        f.write(uploaded_file.read())
+    audio = ffmpeg.input(f"{save_dir}/input.mp4")
+    audio = ffmpeg.output(audio, f"{save_dir}/output.wav", acodec="pcm_s16le", ac=1, ar="16k")
+    ffmpeg.run(audio, overwrite_output=True)
     if task == "Transcribe":
         options = dict(task="transcribe", best_of=5)
-        results = loaded_model.transcribe(path, **options)
+        results = loaded_model.transcribe(f"{save_dir}/output.wav", **options)
         vtt = getSubs(results["segments"], "vtt", 80)
         srt = getSubs(results["segments"], "srt", 80)
         lang = results["language"]
         return results["text"], vtt, srt, lang
     elif task == "Translate":
         options = dict(task="translate", best_of=5)
-        results = loaded_model.transcribe(path, **options)
+        results = loaded_model.transcribe(f"{save_dir}/output.wav", **options)
         vtt = getSubs(results["segments"], "vtt", 80)
         srt = getSubs(results["segments"], "srt", 80)
         lang = results["language"]
@@ -111,145 +94,146 @@ def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
     return segmentStream.read()
 
 
-def get_language_code(language):
-    if language in LANGUAGES.keys():
-        detected_language = LANGUAGES[language]
-        return detected_language
-    else:
-        raise ValueError("Language not supported")
-
-
 def generate_subtitled_video(video, audio, transcript):
     video_file = ffmpeg.input(video)
     audio_file = ffmpeg.input(audio)
-    ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("youtube_sub.mp4").run(quiet=True, overwrite_output=True)
-    video_with_subs = open("youtube_sub.mp4", "rb")
-    return video_with_subs        
-
+    ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("final.mp4").run(quiet=True,
+                                                                                                            overwrite_output=True)
+    video_with_subs = open("final.mp4", "rb")
+    return video_with_subs
+
 
 def main():
-    size = st.selectbox("Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)", ["tiny", "base", "small", "medium", "large-v3"], index=1)
+    size = st.selectbox(
+        "Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)",
+        ["tiny", "base", "small", "medium", "large"], index=1)
     loaded_model = change_model(current_size, size)
     st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
-        f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
-    link = st.text_input("YouTube Link (The longer the video, the longer the processing time)", placeholder="Input YouTube link and press enter")
+             f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
+    input_file = st.file_uploader("File", type=["mp4", "avi", "mov", "mkv"])
+    # get the name of the input_file
+    if input_file is not None:
+        filename = input_file.name[:-4]
+    else:
+        filename = None
     task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
     if task == "Transcribe":
         if st.button("Transcribe"):
-            with st.spinner("Transcribing the video..."):
-                results = inference(link, loaded_model, task)
-            video = download_video(link)
-            lang = results[3]
-            detected_language = get_language_code(lang)
-
+            results = inferecence(loaded_model, input_file, task)
             col3, col4 = st.columns(2)
+            col5, col6, col7, col8 = st.columns(4)
+            col9, col10 = st.columns(2)
             with col3:
-                st.video(video)
-
-            # Split result["text"]  on !,? and . , but save the punctuation
-            sentences = re.split("([!?.])", results[0])
-            # Join the punctuation back to the sentences
-            sentences = ["".join(i) for i in zip(sentences[0::2], sentences[1::2])]
-            text = "\n\n".join(sentences)
+                st.video(input_file)
+
             with open("transcript.txt", "w+", encoding='utf8') as f:
-                f.writelines(text)
+                f.writelines(results[0])
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
                 datatxt = f.read()
-                
-            with open("transcript.vtt", "w+",encoding='utf8') as f:
+
+            with open("transcript.vtt", "w+", encoding='utf8') as f:
                 f.writelines(results[1])
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
                 datavtt = f.read()
-                
-            with open("transcript.srt", "w+",encoding='utf8') as f:
+
+            with open("transcript.srt", "w+", encoding='utf8') as f:
                 f.writelines(results[2])
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
                 datasrt = f.read()
-
+
+            with col5:
+                st.download_button(label="Download Transcript (.txt)",
+                                   data=datatxt,
+                                   file_name="transcript.txt")
+            with col6:
+                st.download_button(label="Download Transcript (.vtt)",
+                                   data=datavtt,
+                                   file_name="transcript.vtt")
+            with col7:
+                st.download_button(label="Download Transcript (.srt)",
+                                   data=datasrt,
+                                   file_name="transcript.srt")
+            with col9:
+                st.success(
+                    "You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
+            with col10:
+                st.info(
+                    "Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
+
             with col4:
                 with st.spinner("Generating Subtitled Video"):
-                    video_with_subs = generate_subtitled_video(video, f"{save_dir}/audio.mp3", "transcript.srt")
+                    video_with_subs = generate_subtitled_video(f"{save_dir}/input.mp4", f"{save_dir}/output.wav",
+                                                               "transcript.srt")
                 st.video(video_with_subs)
-                st.balloons()
-
-            zipObj = ZipFile("YouTube_transcripts_and_video.zip", "w")
-            zipObj.write("transcript.txt")
-            zipObj.write("transcript.vtt")
-            zipObj.write("transcript.srt")
-            zipObj.write("youtube_sub.mp4")
-            zipObj.close()
-            ZipfileDotZip = "YouTube_transcripts_and_video.zip"
-            with open(ZipfileDotZip, "rb") as f:
-                datazip = f.read()
-                b64 = base64.b64encode(datazip).decode()
-                href = f"<a href=\"data:file/zip;base64,{b64}\" download='{ZipfileDotZip}'>\
-        Download Transcripts and Video\
-    </a>"
-            st.markdown(href, unsafe_allow_html=True)
-
+                st.snow()
+            with col8:
+                st.download_button(label="Download Video with Subtitles",
+                                   data=video_with_subs,
+                                   file_name=f"{filename}_with_subs.mp4")
     elif task == "Translate":
         if st.button("Translate to English"):
-            with st.spinner("Translating to English..."):
-                results = inference(link, loaded_model, task)
-            video = download_video(link)
-            lang = results[3]
-            detected_language = get_language_code(lang)
-
+            results = inferecence(loaded_model, input_file, task)
             col3, col4 = st.columns(2)
+            col5, col6, col7, col8 = st.columns(4)
+            col9, col10 = st.columns(2)
             with col3:
-                st.video(video)
-
-            # Split result["text"]  on !,? and . , but save the punctuation
-            sentences = re.split("([!?.])", results[0])
-            # Join the punctuation back to the sentences
-            sentences = ["".join(i) for i in zip(sentences[0::2], sentences[1::2])]
-            text = "\n\n".join(sentences)
+                st.video(input_file)
+
             with open("transcript.txt", "w+", encoding='utf8') as f:
-                f.writelines(text)
+                f.writelines(results[0])
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
                 datatxt = f.read()
-                
-            with open("transcript.vtt", "w+",encoding='utf8') as f:
+
+            with open("transcript.vtt", "w+", encoding='utf8') as f:
                 f.writelines(results[1])
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
                 datavtt = f.read()
-                
-            with open("transcript.srt", "w+",encoding='utf8') as f:
+
+            with open("transcript.srt", "w+", encoding='utf8') as f:
                 f.writelines(results[2])
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
                 datasrt = f.read()
-
+
+            with col5:
+                st.download_button(label="Download Transcript (.txt)",
+                                   data=datatxt,
+                                   file_name="transcript.txt")
+            with col6:
+                st.download_button(label="Download Transcript (.vtt)",
+                                   data=datavtt,
+                                   file_name="transcript.vtt")
+            with col7:
+                st.download_button(label="Download Transcript (.srt)",
+                                   data=datasrt,
+                                   file_name="transcript.srt")
+            with col9:
+                st.success(
+                    "You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
+            with col10:
+                st.info(
+                    "Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
+
             with col4:
                 with st.spinner("Generating Subtitled Video"):
-                    video_with_subs = generate_subtitled_video(video, f"{save_dir}/audio.mp3", "transcript.srt")
+                    video_with_subs = generate_subtitled_video(f"{save_dir}/input.mp4", f"{save_dir}/output.wav",
+                                                               "transcript.srt")
                 st.video(video_with_subs)
-                st.balloons()
-
-            zipObj = ZipFile("YouTube_transcripts_and_video.zip", "w")
-            zipObj.write("transcript.txt")
-            zipObj.write("transcript.vtt")
-            zipObj.write("transcript.srt")
-            zipObj.write("youtube_sub.mp4")
-            zipObj.close()
-            ZipfileDotZip = "YouTube_transcripts_and_video.zip"
-            with open(ZipfileDotZip, "rb") as f:
-                datazip = f.read()
-                b64 = base64.b64encode(datazip).decode()
-                href = f"<a href=\"data:file/zip;base64,{b64}\" download='{ZipfileDotZip}'>\
-        Download Transcripts and Video\
-    </a>"
-            st.markdown(href, unsafe_allow_html=True)
-
+                st.snow()
+            with col8:
+                st.download_button(label="Download Video with Subtitles ",
+                                   data=video_with_subs,
+                                   file_name=f"{filename}_with_subs.mp4")
     else:
-        st.info("Please select a task.")
+        st.error("Please select a task.")
 
 
 if __name__ == "__main__":
     main()
-
+    st.markdown(
+        "###### Made with :heart: by [@BatuhanYılmaz](https://github.com/BatuhanYilmaz26) [![this is an image link](https://i.imgur.com/thJhzOO.png)](https://www.buymeacoffee.com/batuhanylmz)")