updated prompts and temperatures

alexfazio · Aug 6, 2024 · 3e59e6c · 3e59e6c
1 parent e33773f
commit 3e59e6c
Show file tree

Hide file tree

Showing 8 changed files with 64 additions and 55 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/__pycache__/extracts.cpython-312.pyc b/__pycache__/extracts.cpython-312.pyc
diff --git a/app.py b/app.py
@@ -113,4 +113,5 @@ def main():
     main()
 
 # TODO: Change the options to: 1. Download YouTube video and transcribe locally 2. Download YouTube video and use remote transcript 3. Use existing video file to transcribe locally
-# TODO: Add an API key validator before proceeding with the execution to avoid discovering that the API key is invalid during later stages of the process.
+# TODO: Add an API key validator before proceeding with the execution to avoid discovering that the API key is invalid during later stages of the process.
+# TODO: Request the aspect ratio input before initiating the local transcription process.
diff --git a/crew.py b/crew.py
@@ -71,7 +71,7 @@ def main(extracts):
         max_rpm=1,
         llm=ChatGoogleGenerativeAI(model="gemini-1.5-pro-exp-0801",
                                    verbose=True,
-                                   temperature=0.5,
+                                   temperature=0.0,
                                    google_api_key=gemini_api_key)
     )
 
@@ -94,7 +94,7 @@ def main(extracts):
         max_rpm=1,
         llm=ChatGoogleGenerativeAI(model="gemini-1.5-pro-exp-0801",
                                    verbose=True,
-                                   temperature=0.5,
+                                   temperature=0.0,
                                    google_api_key=gemini_api_key)
     )
 
@@ -117,35 +117,33 @@ def main(extracts):
         max_rpm=1,
         llm=ChatGoogleGenerativeAI(model="gemini-1.5-pro-exp-0801",
                                    verbose=True,
-                                   temperature=0.5,
+                                   temperature=0.0,
                                    google_api_key=gemini_api_key)
     )
 
     return_subtitles_1 = Task(
         description=dedent((
             f"""
-            You will be provided with a list of transcription extracts from a video clip, and the full content of an .srt subtitle file corresponding to that clip. Your task is to match each transcription extract to the subtitle segment it best aligns with, and return the results in a specific format.
+            You will be provided with a transcription extract from a video clip and the full content of an .srt subtitle file corresponding to that clip. Your task is to match the transcription extract to the subtitle segment it best aligns with and return the results in a specific format.
         
-            Here are the transcription extracts:
+            Here is the transcription extract:
             <segments>
             {extracts[0]}
             </segments>
         
-            And here is the full content of the .srt subtitle file:
+            Here is the full content of the .srt subtitle file:
             <srt_file>
             {subtitles}
             </srt_file>
         
             Please follow these steps:
-            1. Carefully read through each transcription extract within the <segments> tags.
-            2. For each extract, search through the <srt_file> content to find the subtitle segment that best matches the extract. To determine the best match, look for segments that contain the most overlapping words or phrases with the extract.
-            3. Once you've found the best matching subtitle segment for an extract, format the match like this:
+            1. Carefully read through the transcription excerpt within the <segments> tags.
+            2. Given the extract, search through the <srt_file> content to find the subtitle segment that best matches the extract. To determine the best match, look for segments that contain the most overlapping words or phrases with the extract.
+            3. Once you've found the best matching subtitle segment for the excerpt, format the match as follows:
             [segment number]
             [start time] --> [end time] 
             [matched transcription extract]
-        
-            4. Repeat steps 1-3 for each transcription extract, keeping the extracts in the same order they appeared in the <segments> list.
-            5. After processing all the extracts, combine the formatted matches into a single block of text. This should look like a valid .srt subtitle file, with each match separated by a blank line.
+            5. After processing the extract, combine the formatted matches into a single block of text. This should resemble a valid .srt subtitle file, with each match separated by a blank line.
         
             Please note: .srt files have a specific format that must be followed exactly in order for them to be readable. Therefore, it is crucial that you do not include any extra content beyond the raw subtitle data itself. This means:
             - No comments explaining your work
@@ -194,28 +192,26 @@ def main(extracts):
     return_subtitles_2 = Task(
         description=dedent((
             f"""
-            You will be provided with a list of transcription extracts from a video clip, and the full content of an .srt subtitle file corresponding to that clip. Your task is to match each transcription extract to the subtitle segment it best aligns with, and return the results in a specific format.
+            You will be provided with a transcription extract from a video clip and the full content of an .srt subtitle file corresponding to that clip. Your task is to match the transcription extract to the subtitle segment it best aligns with and return the results in a specific format.
 
-            Here are the transcription extracts:
+            Here is the transcription extract:
             <segments>
             {extracts[1]}
             </segments>
 
-            And here is the full content of the .srt subtitle file:
+            Here is the full content of the .srt subtitle file:
             <srt_file>
             {subtitles}
             </srt_file>
 
             Please follow these steps:
-            1. Carefully read through each transcription extract within the <segments> tags.
-            2. For each extract, search through the <srt_file> content to find the subtitle segment that best matches the extract. To determine the best match, look for segments that contain the most overlapping words or phrases with the extract.
-            3. Once you've found the best matching subtitle segment for an extract, format the match like this:
+            1. Carefully read through the transcription excerpt within the <segments> tags.
+            2. Given the extract, search through the <srt_file> content to find the subtitle segment that best matches the extract. To determine the best match, look for segments that contain the most overlapping words or phrases with the extract.
+            3. Once you've found the best matching subtitle segment for the excerpt, format the match as follows:
             [segment number]
             [start time] --> [end time] 
             [matched transcription extract]
-
-            4. Repeat steps 1-3 for each transcription extract, keeping the extracts in the same order they appeared in the <segments> list.
-            5. After processing all the extracts, combine the formatted matches into a single block of text. This should look like a valid .srt subtitle file, with each match separated by a blank line.
+            5. After processing the extract, combine the formatted matches into a single block of text. This should resemble a valid .srt subtitle file, with each match separated by a blank line.
 
             Please note: .srt files have a specific format that must be followed exactly in order for them to be readable. Therefore, it is crucial that you do not include any extra content beyond the raw subtitle data itself. This means:
             - No comments explaining your work
@@ -265,28 +261,26 @@ def main(extracts):
     return_subtitles_3 = Task(
         description=dedent((
             f"""
-            You will be provided with a list of transcription extracts from a video clip, and the full content of an .srt subtitle file corresponding to that clip. Your task is to match each transcription extract to the subtitle segment it best aligns with, and return the results in a specific format.
+            You will be provided with a transcription extract from a video clip and the full content of an .srt subtitle file corresponding to that clip. Your task is to match the transcription extract to the subtitle segment it best aligns with and return the results in a specific format.
 
-            Here are the transcription extracts:
+            Here is the transcription extract:
             <segments>
             {extracts[2]}
             </segments>
 
-            And here is the full content of the .srt subtitle file:
+            Here is the full content of the .srt subtitle file:
             <srt_file>
             {subtitles}
             </srt_file>
 
             Please follow these steps:
-            1. Carefully read through each transcription extract within the <segments> tags.
-            2. For each extract, search through the <srt_file> content to find the subtitle segment that best matches the extract. To determine the best match, look for segments that contain the most overlapping words or phrases with the extract.
-            3. Once you've found the best matching subtitle segment for an extract, format the match like this:
+            1. Carefully read through the transcription excerpt within the <segments> tags.
+            2. Given the extract, search through the <srt_file> content to find the subtitle segment that best matches the extract. To determine the best match, look for segments that contain the most overlapping words or phrases with the extract.
+            3. Once you've found the best matching subtitle segment for the excerpt, format the match as follows:
             [segment number]
             [start time] --> [end time] 
             [matched transcription extract]
-
-            4. Repeat steps 1-3 for each transcription extract, keeping the extracts in the same order they appeared in the <segments> list.
-            5. After processing all the extracts, combine the formatted matches into a single block of text. This should look like a valid .srt subtitle file, with each match separated by a blank line.
+            5. After processing the extract, combine the formatted matches into a single block of text. This should resemble a valid .srt subtitle file, with each match separated by a blank line.
 
             Please note: .srt files have a specific format that must be followed exactly in order for them to be readable. Therefore, it is crucial that you do not include any extra content beyond the raw subtitle data itself. This means:
             - No comments explaining your work

diff --git a/extracts.py b/extracts.py
@@ -47,39 +47,45 @@ def call_openai_api(transcript):
     logging.info("STARTING call_openai_api")
 
     prompt = dedent(f"""
-        Here is the full transcript from the video:
-        
+        You will be given a complete transcript from a video. Your task is to identify three different 1-minute long clips from this video (approximately 8 spoken sentences for each clip) that have the highest potential to become popular on social media.
+
+        Here is the full transcript:
+
         <transcript>
         {transcript}
         </transcript>
         
-        Your task is to identify the three 1-minute long clips from this video that have the highest potential to go viral on social media.  
+        Carefully read through the entire transcript above, looking for the most powerful, emotionally impactful, surprising, thought-provoking or otherwise memorable moments. Your goal is to select three 1-minute long segments centered around these powerful moments that you think have the best chance of getting widely shared and going viral.
+        
+        Follow these steps:
+
+        1. Read the entire transcript carefully, identifying key moments that stand out as particularly impactful or shareable.
+        
+        2. For each of these moments, extract a 1-minute segment of text from the transcript, centered around that moment. Ensure each segment is approximately 1 minute long when spoken (about 8 sentences).
         
-        Carefully read through the entire transcript above, looking for the most powerful, emotionally impactful, surprising, thought-provoking or otherwise memorable moments. Select three 1-minute long segments centered around those powerful moments that you think have the best chance of getting widely shared and going viral.
+        3. From these segments, choose the top three that you believe have the highest potential to go viral.
         
-        For each clip you select, extract the full text of the selected 1-minute segment from the transcript. 
+        4. Rank these three clips from most to least viral potential based on your assessment.
         
-        Order the three clips from most to least viral potential based on your assessment.
+        Format your final output as a JSON object containing an ordered list of the selected clips, each with its extracted text. The JSON object should look like this:
         
-        Format the final output as a JSON object containing an ordered list of the selected clips, each with its extracted text. The JSON object should look like this:
-    
         {{
         "clips": [
             {{
             "rank": 1,
-            "text": "<extracted text of clip 1>"
+            "text": "<extracted text of key moment 1>"
             }},
             {{
             "rank": 2, 
-            "text": "<extracted text of clip 2>"
+            "text": "<extracted text of key moment 2>"
             }},
             {{
             "rank": 3,
-            "text": "<extracted text of clip 3>"
+            "text": "<extracted text of key moment 3>"
             }}
         ]
         }}
-    
+        
         Return nothing else but the raw content of the JSON object itself - no comments, no extra text. Just the JSON.
     """)
 
@@ -91,7 +97,7 @@ def call_openai_api(transcript):
                 {"role": "user", "content": prompt}
             ],
             temperature=0.8,
-            max_tokens=4095,
+            max_tokens=4096,
             top_p=1,
             frequency_penalty=0,
             presence_penalty=0
@@ -136,4 +142,10 @@ def main():
     return response
 
 if __name__ == "__main__":
-    main()
+    main()
+
+# TODO: Split the below tasks into separate API queries for different large language model (LLM) calls or agents to implement a divide-and-conquer approach.
+# 1. Read the entire transcript carefully, identifying key moments that stand out as particularly impactful or shareable.
+# 2. For each of these moments, extract a 1-minute segment of text from the transcript, centered around that moment. Ensure each segment is approximately 1 minute long when spoken (about 8 sentences).
+# 3. From these segments, choose the top three that you believe have the highest potential to go viral.
+# 4. Rank these three clips from most to least viral potential based on your assessment.
diff --git a/poetry.lock b/poetry.lock
diff --git a/subtitler_output/.DS_Store b/subtitler_output/.DS_Store