-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchess_LLM_Capa.py
116 lines (82 loc) · 3.33 KB
/
chess_LLM_Capa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python
# coding: utf-8
# # Chess Knowledge Extractor
#
# This project is designed to extract and analyze chess-related knowledge from various sources. It aims to answer questions like "What is the best way to learn chess?" based on advice from the legendary José Raúl Capablanca and from popular chess resources like 'Say Chess' Substack.
import dotenv
import os
import re
from bs4 import BeautifulSoup
import requests
import json
from collections import defaultdict
import openai
from langchain.llms import OpenAI
from langchain.chains import AnalyzeDocumentChain
from langchain.document_loaders import TextLoader
from langchain.chains.question_answering import load_qa_chain
from dotenv import load_dotenv, find_dotenv
# Load env vars from .env file
load_dotenv(find_dotenv())
# Set OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")
# Function to download text file from a URL
def download_text_file(url):
response = requests.get(url)
return response.text
# Function to read text file
def read_text_file(file_path):
with open(file_path, 'r') as file:
return file.read()
# Function to preprocess text
import re
def preprocess_text(text):
"""
This function takes in a string of text and removes any extra whitespaces.
Args:
text (str): The input text to be preprocessed.
Returns:
str: The preprocessed text with extra whitespaces removed.
"""
text = re.sub(r'\s+', ' ', text)
return text
# # Part1: Chess Fundamentals, by Capablanca
# URL of the Gutenberg project book
book_url = 'https://www.gutenberg.org/files/33870/33870-8.txt' # EBook of Chess Fundamentals, by Capablanca
# Download and read the text file
text_from_web = download_text_file(book_url)
# Optionally, you can save this to a local text file
with open("downloaded_book.txt", "w") as f:
f.write(text_from_web)
# Preprocess the text
preprocessed_text = preprocess_text(text_from_web)
llm = OpenAI(temperature=0, openai_api_key=openai.api_key)
qa_chain = load_qa_chain(llm, chain_type="map_reduce")
qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
qa_document_chain.run(input_document=preprocessed_text, question="what is the best way to learn chess")
# # Part 2: Say Chess Substack.
# read from web and answer question about chess
## 10 simple pieces of chess advice
from langchain.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://saychess.substack.com/p/10-simple-pieces-of-chess-advice")
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
splits = text_splitter.split_documents(loader.load())
# Embed and store splits
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
vectorstore = Chroma.from_documents(documents=splits,embedding=OpenAIEmbeddings(openai_api_key=openai.api_key))
retriever = vectorstore.as_retriever()
from langchain import hub
rag_prompt = hub.pull("rlm/rag-prompt")
# LLM
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.1, )
# RAG chain
from langchain.schema.runnable import RunnablePassthrough
rag_chain = (
{"context": retriever, "question": RunnablePassthrough()}
| rag_prompt
| llm
)
rag_chain.invoke("10-simple-pieces-of-chess-advice")