-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added youtube transcript and code base analyzer
- Loading branch information
Showing
16 changed files
with
744 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,6 @@ old | |
*photo* | ||
*.png | ||
|
||
*.mp4 | ||
|
||
|
68 changes: 68 additions & 0 deletions
68
...5 Combining Components Together with Chains/1 Chains and Why They Are Used/1_llm_chain.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
from langchain_core.prompts import PromptTemplate | ||
from langchain.chains import LLMChain | ||
from langchain_openai import OpenAI | ||
|
||
|
||
prompt_template = "What is a word to replace the following: {word}?" | ||
|
||
# Set the "OPENAI_API_KEY" environment variable before running following line. | ||
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0) | ||
|
||
llm_chain = LLMChain( | ||
llm=llm, | ||
prompt=PromptTemplate.from_template(prompt_template) | ||
) | ||
|
||
result = llm_chain("artificial") | ||
print(result) | ||
|
||
# It is also possible to use the .apply() method to pass multiple inputs | ||
# at once and receive a list for each input. | ||
# The sole difference lies in the exclusion of inputs within the returned list. | ||
# Nonetheless, the returned list will maintain the identical order as the input. | ||
|
||
input_list = [ | ||
{"word": "artificial"}, | ||
{"word": "intelligence"}, | ||
{"word": "robot"} | ||
] | ||
|
||
result = llm_chain.apply(input_list) | ||
print(result) | ||
|
||
# The .generate() method will return an instance of LLMResult, | ||
# hich provides more information. | ||
# For example, the finish_reason key indicates the reason | ||
# behind the stop of the generation process. | ||
# It could be stopped, meaning the model decided | ||
# to finish or reach the length limit. | ||
# There is other self-explanatory information | ||
# like the number of total used tokens or the used model. | ||
|
||
result = llm_chain.generate(input_list) | ||
print(result) | ||
|
||
# The next method we will discuss is .predict(). (which could be used interchangeably with .run()) | ||
# Its best use case is to pass multiple inputs for a single prompt. | ||
# However, it is possible to use it with one input variable as well. | ||
# The following prompt will pass both the word we want a substitute | ||
# for and the context the model must consider. | ||
|
||
prompt_template = "Looking at the context of '{context}'. What is an appropriate word to replace the following: {word}?" | ||
|
||
llm_chain = LLMChain( | ||
llm=llm, | ||
prompt=PromptTemplate(template=prompt_template, input_variables=["word", "context"])) | ||
|
||
result = llm_chain.predict(word="fan", context="object") | ||
# or llm_chain.run(word="fan", context="object") | ||
print(result) | ||
|
||
# The model correctly suggested that a Ventilator would be a suitable replacement | ||
# for the word fan in the context of objects. Furthermore, | ||
# when we repeat the experiment with a different context, humans, | ||
# the output will change the Admirer. | ||
|
||
result = llm_chain.predict(word="fan", context="humans") | ||
# or llm_chain.run(word="fan", context="object") | ||
print(result) |
20 changes: 20 additions & 0 deletions
20
...n/5 Combining Components Together with Chains/1 Chains and Why They Are Used/2_parsers.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
|
||
from langchain_core.prompts import PromptTemplate | ||
from langchain.chains import LLMChain | ||
from langchain.output_parsers import CommaSeparatedListOutputParser | ||
from langchain_openai import OpenAI | ||
|
||
|
||
# Set the "OPENAI_API_KEY" environment variable before running following line. | ||
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0) | ||
|
||
output_parser = CommaSeparatedListOutputParser() | ||
template = """List all possible words as substitute for 'artificial' as comma separated.""" | ||
|
||
llm_chain = LLMChain( | ||
llm=llm, | ||
prompt=PromptTemplate(template=template, output_parser=output_parser, input_variables=[]), | ||
output_parser=output_parser) | ||
|
||
result = llm_chain.predict() | ||
print(result) |
32 changes: 32 additions & 0 deletions
32
...ents Together with Chains/1 Chains and Why They Are Used/3_conversational_chain_memory.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from langchain_core.prompts import PromptTemplate | ||
from langchain.chains import LLMChain | ||
from langchain.output_parsers import CommaSeparatedListOutputParser | ||
from langchain_openai import OpenAI | ||
|
||
|
||
# Set the "OPENAI_API_KEY" environment variable before running following line. | ||
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0) | ||
|
||
# Depending on the application, memory is the next component | ||
# that will complete a chain. LangChain provides a ConversationalChain | ||
# to track previous prompts and responses using the ConversationalBufferMemory class. | ||
|
||
from langchain.chains import ConversationChain | ||
from langchain.memory import ConversationBufferMemory | ||
|
||
output_parser = CommaSeparatedListOutputParser() | ||
conversation = ConversationChain( | ||
llm=llm, | ||
memory=ConversationBufferMemory() | ||
) | ||
|
||
result = conversation.predict(input="List all possible words as substitute for 'artificial' as comma separated.") | ||
print(result) | ||
|
||
# Now, we can ask it to return the following four replacement words. | ||
# It uses the memory to find the next options. | ||
|
||
result=conversation.predict(input="And the next 4?") | ||
print(result) | ||
|
||
|
56 changes: 56 additions & 0 deletions
56
...ning Components Together with Chains/1 Chains and Why They Are Used/4_sequential_chain.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
|
||
from langchain_core.prompts import PromptTemplate | ||
from langchain.chains import LLMChain | ||
from langchain.output_parsers import CommaSeparatedListOutputParser | ||
from langchain_openai import OpenAI | ||
|
||
|
||
# Set the "OPENAI_API_KEY" environment variable before running following line. | ||
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0) | ||
|
||
|
||
|
||
# Another helpful feature is using a sequential chain that concatenates | ||
# multiple chains into one. The following code shows a sample usage. | ||
|
||
|
||
# poet | ||
poet_template: str = """You are an American poet, your job is to come up with\ | ||
poems based on a given theme. | ||
Here is the theme you have been asked to generate a poem on: | ||
{input}\ | ||
""" | ||
|
||
poet_prompt_template: PromptTemplate = PromptTemplate( | ||
input_variables=["input"], template=poet_template) | ||
|
||
# creating the poet chain | ||
poet_chain: LLMChain = LLMChain( | ||
llm=llm, output_key="poem", prompt=poet_prompt_template) | ||
|
||
# critic | ||
critic_template: str = """You are a critic of poems, you are tasked\ | ||
to inspect the themes of poems. Identify whether the poem includes romantic expressions or descriptions of nature. | ||
Your response should be in the following format, as a Python Dictionary. | ||
poem: this should be the poem you received | ||
Romantic_expressions: True or False | ||
Nature_descriptions: True or False | ||
Here is the poem submitted to you: | ||
{poem}\ | ||
""" | ||
|
||
critic_prompt_template: PromptTemplate = PromptTemplate( | ||
input_variables=["poem"], template=critic_template) | ||
|
||
# creating the critic chain | ||
#critic_chain: LLMChain = LLMChain( | ||
# llm=llm, output_key="critic_verified", prompt=critic_prompt_template) | ||
|
||
critic_chain = critic_prompt_template | llm | ||
result = critic_chain.invoke(critic_chain("The sun is shining bright")) | ||
|
||
print(critic_chain) | ||
print(result) |
29 changes: 29 additions & 0 deletions
29
...ion/5 Combining Components Together with Chains/1 Chains and Why They Are Used/5_debug.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
|
||
from langchain_core.prompts import PromptTemplate | ||
from langchain.chains import LLMChain | ||
from langchain_openai import OpenAI | ||
from langchain.chains import ConversationChain | ||
from langchain.memory import ConversationBufferMemory | ||
from langchain.output_parsers import CommaSeparatedListOutputParser | ||
|
||
|
||
output_parser = CommaSeparatedListOutputParser() | ||
|
||
# Set the "OPENAI_API_KEY" environment variable before running following line. | ||
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0) | ||
|
||
|
||
template = """List all possible words as substitute for 'artificial' as comma separated. | ||
Current conversation: | ||
{history} | ||
{input}""" | ||
|
||
conversation = ConversationChain( | ||
llm=llm, | ||
prompt=PromptTemplate(template=template, input_variables=["history", "input"], output_parser=output_parser), | ||
memory=ConversationBufferMemory(), | ||
verbose=True) | ||
|
||
result = conversation.predict(input="") |
18 changes: 18 additions & 0 deletions
18
...e a YouTube Video Summarizer Using Whisper and LangChain /1-download_mp4_from_youtuber.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import yt_dlp | ||
|
||
def download_mp4_from_youtube(url): | ||
# Set the options for the download | ||
filename = 'lecuninterview.mp4' | ||
ydl_opts = { | ||
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]', | ||
'outtmpl': filename, | ||
'quiet': True, | ||
} | ||
|
||
# Download the video file | ||
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | ||
result = ydl.extract_info(url, download=True) | ||
|
||
url = "https://www.youtube.com/watch?v=mBjPyte2ZZo" | ||
|
||
download_mp4_from_youtube(url) |
8 changes: 8 additions & 0 deletions
8
.../2 Create a YouTube Video Summarizer Using Whisper and LangChain /2-whisper_transcribe.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import whisper | ||
|
||
model = whisper.load_model("base") | ||
result = model.transcribe("lecuninterview.mp4") | ||
print(result['text']) | ||
|
||
with open ('text.txt', 'w') as file: | ||
file.write(result['text']) |
103 changes: 103 additions & 0 deletions
103
...hains/2 Create a YouTube Video Summarizer Using Whisper and LangChain /3-summarization.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#from langchain import OpenAI, LLMChain | ||
from langchain.chains.mapreduce import MapReduceChain | ||
#from langchain.prompts import PromptTemplate | ||
from langchain.chains.summarize import load_summarize_chain | ||
|
||
from langchain_openai import OpenAI | ||
from langchain_core.prompts import PromptTemplate | ||
|
||
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0) | ||
|
||
|
||
|
||
# creates an instance of the RecursiveCharacterTextSplitter | ||
# class, which is responsible for splitting input text into smaller chunks. | ||
|
||
from langchain.text_splitter import RecursiveCharacterTextSplitter | ||
text_splitter = RecursiveCharacterTextSplitter( | ||
chunk_size=1000, chunk_overlap=0, separators=[" ", ",", "\n"] | ||
) | ||
|
||
# It is configured with a chunk_size of 1000 characters, | ||
# no chunk_overlap, and uses spaces, commas, and newline characters as separators. | ||
# This ensures that the input text is broken down into manageable pieces, | ||
# allowing for efficient processing by the language model. | ||
|
||
from langchain.docstore.document import Document | ||
|
||
with open('text.txt') as f: | ||
text = f.read() | ||
|
||
texts = text_splitter.split_text(text) | ||
docs = [Document(page_content=t) for t in texts[:4]] | ||
|
||
# Each Document object is initialized with the content of a chunk from the texts list. | ||
# The [:4] slice notation indicates that only the first four chunks will be used | ||
# to create the Document objects. | ||
|
||
from langchain.chains.summarize import load_summarize_chain | ||
import textwrap | ||
|
||
chain = load_summarize_chain(llm, chain_type="map_reduce") | ||
|
||
output_summary = chain.run(docs) | ||
wrapped_text = textwrap.fill(output_summary, width=100) | ||
from termcolor import colored | ||
print ("----- SUMMARY -----") | ||
print(colored(wrapped_text, 'yellow')) | ||
|
||
# With the following line of code, we can see the prompt template | ||
# that is used with the map_reduce technique. | ||
# Now we’re changing the prompt and using another summarization method | ||
|
||
print ("------ PROMPT TEMPLATE ------") | ||
|
||
|
||
print(colored(chain.llm_chain.prompt.template, 'yellow')) | ||
|
||
# The "stuff" approach is the simplest and most naive one, | ||
# in which all the text from the transcribed video is used in a single prompt. | ||
# This method may raise exceptions if all text is longer than the available | ||
# context size of the LLM and may not be the most efficient way to handle large amounts of text. | ||
# We’re going to experiment with the prompt below. | ||
# This prompt will output the summary as bullet points. | ||
|
||
prompt_template = """Write a concise bullet point summary of the following: | ||
{text} | ||
CONSCISE SUMMARY IN BULLET POINTS:""" | ||
|
||
BULLET_POINT_PROMPT = PromptTemplate(template=prompt_template, | ||
input_variables=["text"]) | ||
|
||
# Also, we initialized the summarization chain using the stuff as chain_type and the prompt above. | ||
|
||
chain = load_summarize_chain(llm, | ||
chain_type="stuff", | ||
prompt=BULLET_POINT_PROMPT) | ||
|
||
output_summary = chain.run(docs) | ||
|
||
wrapped_text = textwrap.fill(output_summary, | ||
width=1000, | ||
break_long_words=False, | ||
replace_whitespace=False) | ||
print ("----- CONCISE SUMMARY -----") | ||
print(colored(wrapped_text, 'yellow')) | ||
|
||
# The 'refine' summarization chain is a method for generating more accurate | ||
# and context-aware summaries. This chain type is designed to iteratively | ||
# refine the summary by providing additional context when needed. | ||
# That means: it generates the summary of the first chunk. | ||
# Then, for each successive chunk, the work-in-progress | ||
# summary is integrated with new info from the new chunk. | ||
|
||
chain = load_summarize_chain(llm, chain_type="refine") | ||
|
||
output_summary = chain.run(docs) | ||
wrapped_text = textwrap.fill(output_summary, width=100) | ||
print("----- REFINED SUMMARY -----") | ||
print(colored(wrapped_text, 'yellow')) |
Oops, something went wrong.