-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwordlebot.py
259 lines (207 loc) · 7.45 KB
/
wordlebot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# python3
# This module contains the helper functions for the wordle bot
from math import log2
from itertools import product
from re import match
from csv import DictReader
def generateGuess(
words: list,
freq_list: dict,
arrangements: list,
uncertainity: float,
current_score: int,
) -> str:
"""
Generates a guess after performing necessary calculations
"""
guesses = []
# Get the guesses and calculate the overall probability of their possible matches
for i, word in enumerate(words):
expectedValues = []
# If the word is not likely to occur i.e. not in the frequency data, skip that word
if not freq_list.get(word):
continue
for arrangement in arrangements:
matching = getMatchingWords(word, words, arrangement)
# Calculate the overall probability of the matching words
probability = 0
for word in matching:
probability += freq_list.get(word) if freq_list.get(word) else 0
# Add to the expected values list
value = probability * log2(1 / probability) if probability else 0
expectedValues.append(value)
# If the word is not probable then no need to consider its score
if freq_list.get(word):
score = freq_list.get(word) * current_score + (1 - freq_list.get(word)) * (
uncertainity - current_score
)
# Add the word to the list
guesses.append((word, score))
# Sort the guesses by their expected values
guesses.sort(key=lambda x: x[1], reverse=True)
return guesses[0][0] if guesses else None
def uncertainity(words: list, freq_list: dict) -> float:
"""
Calculates the expected information value given a word
"""
# Calculate the different expected information values for different arrangements
value = 0.0
for word in words:
value += (
freq_list.get(word) * log2(1 / freq_list.get(word))
if freq_list.get(word)
else 0
)
# Return the expected value for the word
return value
def getMatchingWords(last_guess: str, words: list, feedback_str: str) -> float:
"""
Calculates the information (bits) for a given the feedback string
"""
matching = []
for word in words:
if isValid(last_guess, feedback_str, word):
matching.append(word)
return matching
def genRegex(last_guess: str, feedback_str: str) -> str:
"""
Generate a regex string using the last guess and the feedback
"""
# list of tokens to merge to get the final regex string
regex_tokens = []
# The token for letters not in the word
G_letters = set()
Y_letters = set()
for l, f in zip(last_guess, feedback_str):
if f == "G":
G_letters.add(l)
elif f == "Y":
Y_letters.add(l)
# Check if copies exist in pairs i.e. the two sets are equal
G_letters = list(G_letters - Y_letters)
if not G_letters:
for l, f in zip(last_guess, feedback_str):
# If the letter is in the right position
if f == "R":
regex_tokens.append(l)
elif f == "Y":
regex_tokens.append(f"[^{l}]")
elif f == "G":
regex_tokens.append("[a-z]")
else:
# If no copies are present
G_string = "".join(G_letters)
for l, f in zip(last_guess, feedback_str):
# If the letter is in the right position
if f == "R":
regex_tokens.append(l)
elif f == "Y":
regex_tokens.append(f"[^{l}{G_string}]")
elif f == "G":
regex_tokens.append(f"[^{G_string}]")
regex = "".join(regex_tokens)
return regex
def isValid(last_guess: str, feedback_str: str, word: str):
"""
Checks if the given string is a valid match
"""
# Generate the regex string
regex = genRegex(last_guess, feedback_str)
if match(regex, word):
# Letters that should be present in the word
Y_letters = [l for l, f in zip(last_guess, feedback_str) if f == "Y"]
for letter in Y_letters:
if letter not in word:
return False
# word has all characters that should be present
return True
# Word does not match the regex
return False
# Function to read the word list
def readFile(filename: str) -> list:
with open(filename) as f:
words = [x.strip() for x in f.readlines()]
return words
def readFileDict(filename: str) -> dict:
with open(filename, newline="") as f:
data = DictReader(f)
words = {}
for row in data:
words[row["word"]] = float(row["probability"])
return words
def wordleBot(
words: list, # The list of possible words
freq_list: list, # The frequency list data
arrangements: list, # All possible feedback strings
current_score: int = 1, # Current score of the game
last_guess: str = None, # The previous guess made and passed down to the function
feedback_str: str = None, # The Feedback given for the last guess
simulate: bool = False, # Simulation Mode is ON or off
simulate_data: dict = None, # If simualtion mode is ON, then this dictionary should
# carry the input function with the parameters, word and last_guess
# and the correct word
) -> list:
"""
The Logic of the wordle bot using all the helper functions
"""
# Check if guessed correctly
if feedback_str == "RRRRR":
return None
# Calculate the best guess
current_uncertainity = uncertainity(words, freq_list)
last_guess = generateGuess(
words, freq_list, arrangements, current_uncertainity, current_score
)
# Get Feedback on last guess
if simulate:
input_func = simulate_data["input"]
feedback_str = input_func(word=simulate_data["word"], guess=last_guess)
else:
print("Next Guess: ", last_guess)
feedback_str = input("Feedback String: ")
# Update the words list in the bot if a valid last guess is made
if last_guess:
words = getMatchingWords(last_guess, words, feedback_str)
else:
return None
# Recursive call down the next function
next_guess = wordleBot(
words,
freq_list,
arrangements,
current_score + 1,
last_guess,
feedback_str,
simulate,
simulate_data,
)
# Check if the recursion has reached the answer
if isinstance(next_guess, list):
newlist = [last_guess]
newlist.extend(next_guess)
return newlist
elif next_guess:
return [last_guess, next_guess]
return last_guess
def main() -> list:
"""
Combines the necessary functions to execute the bot
"""
words = readFile("./data/wordle.txt")
freq_list = readFileDict("./data/5-gram_freq.csv")
arramgements = product("RYG", repeat=5)
guesses = wordleBot(words, freq_list, arramgements)
print(guesses)
def welcomeMsg():
print(f"\033[92m{' Wordle BOT 🤖'.center(90,'=')}\033[0m")
print(
">>\t\033[42mA\033[0m --> A is in the word and is in the correct position\t<<"
)
print(
">>\t\033[43mA\033[0m --> A is in the word but is in an incorrect position\t<<"
)
print(">>\t\033[100mA\033[0m --> A is not in the word\t\t\t\t<<")
print(f"\033[1;92m{' Guess the word '.center(90,'=')}\033[0m")
if __name__ == "__main__":
welcomeMsg()
main()