-
Notifications
You must be signed in to change notification settings - Fork 0
/
raye.py
249 lines (172 loc) · 6.78 KB
/
raye.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
#takes a string as input and matches it against a set of example questions to come up with a response.
#compares the top potential responses with each other to find if an answer among the top responses is better than the absolute top one
#raye uses data from conversations on hydar to come up with her answers
import compare_strings2 as cs
import random
import math
import threading
import sys
import generate_text
import classifier2
import socket
p_match_threshold = 0.67
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect(('localhost', 5923))
s.sendall(b'raye')
p_match_threshold = 0.89
except:
pass
lock = threading.Lock()
inp = sys.argv[1:len(sys.argv)]
inp = " ".join(inp)
#print(inp)
#previous_messages = ["yo", "hydar", "whats the weather like", "i like trains", "who are you", "what is skypixel hyblock", "why does hypixel skyblock exist"]
#responses = ["yo", "hydar", "idk", "dude same", "huh?", "reference to the popular 2018 video game among us", "im not sure either"]
f = open("./bots/raye_questions.txt", "r")
ff = f.read()
previous_messages = []
i = 0
while i < len(ff)-1:
for j in range(i, len(ff)):
if ff[j : j+1] == "\n":
previous_messages.append(ff[i:j])
i = j+1
i+=1
f = open("./bots/raye_responses.txt", "r")
ff = f.read()
responses = []
i = 0
while i < len(ff)-1:
for j in range(i, len(ff)):
if ff[j : j+1] == "\n":
responses.append(ff[i:j])
i = j+1
i+=1
#returns the probability that message2 is correct assuming message1 is correct
#in other words, compares message 2 to message1
def createProbability(message1, message2):
#very quickly, without running any of the more advanced algorithms, we can just check if the strings are equal
if message1 == message2:
return 1.0
else:
#otherwise, use the following algorithms
#if the length of the string is less than 8, it is better to just compare the individual characters using shortstrings
if(max(len(message1), len(message2)) < 8):
# n = 0.0
# for i in range(min(len(message1), len(message2))):
# if message1[i] == message2[i]:
# n=n+1.0
#
# if n == 0.0:
# m= 0.001
# else:
# m= n/max(len(message1), len(message2))
return (cs.shortStrings(message1, message2, 10))
else:
#otherwise, use the algorithm in compare_strings.py
x = cs.compareStrings(message1, message2, 10)
y = cs.shortStrings(message1, message2, 10)
#print(message1 + " " + str(x) + " " + message2 + " " + str(y))
return (x+y)/2
#probability that the corresponding meessage from previous_messages is the right message
#for now, it's also the probability that it's response is correct
lp = len(previous_messages) #lp is going to be used a bit, its just the length of the previous messages array
p_match = [0] * len(previous_messages)
#populate p_match with base probabilities
#use 4 threads to make this a bit faster
def populatePMatch(initial, end):
global p_match
for i in range(initial, end):
with lock:
p_match[i] = (createProbability(inp, previous_messages[i]))
t1 = threading.Thread(target = populatePMatch, args=(0, int(lp/4), ))
t2 = threading.Thread(target = populatePMatch, args=(int(lp/4), (2 * int(lp/4)), ))
t3 = threading.Thread(target = populatePMatch, args=((2 * int(lp/4)), (3 * int(lp/4)), ))
t4 = threading.Thread(target = populatePMatch, args=((3 * int(lp/4)), lp, ))
t1.start()
t2.start()
t3.start()
t4.start()
t1.join()
t2.join()
t3.join()
t4.join()
#print(p_match)
#enumerate p_match like this because enumerate doesn't really give what i want
en_p_match = [0] * lp
for i in range(lp):
en_p_match[i] = ([i, p_match[i]])
random.shuffle(en_p_match)
#sort enumerated p match based on probability
en_p_match.sort(reverse = True, key=lambda en_p_match:en_p_match[1])
#print(en_p_match)
#get the reponses that give the highest probabilities and put them in an array
#responses within an error range of 0.1 from the max are allowed in maxResp
maxResp = []
count = 0 # cap the number of things possible in maxResp
for i in en_p_match:
#threshold for max resp is 0.57, and it has to be within 0.1 of the max from en p match.
#for time saving, limit to 28 possibilities
if count < 28 and en_p_match[0][1] - i[1] < 0.1 and i[1] >= 0.67:
maxResp.append(responses[i[0]])
count += 1
#print(maxResp)
#threshold for highest en p match value is 0.67
if len(maxResp) > 0 and en_p_match[0][1] > p_match_threshold:
#find the condidional probability of each sentence in maxResp given that each of the others is correct
#using bayes theorem, P(maxResp[x]|maxResp[i], ... maxResp[n]) should be proportional to P(maxResp[x]) * product i->n (P(maxResp[i]|maxResp[x]))
#however, I will instead use sum of logs to reduce underflow
newProbabilities = [0] * len(maxResp)
#use 4 threads to do this if there are more than 4 values in maxResp
if len(maxResp) >= 4:
def condProbs(initial, end):
global newProbabilities
for i in range(initial, end):
newProbability = math.log(en_p_match[i][1])
for j in range(len(maxResp)):
if i!=j:
newProbability += math.log(createProbability(maxResp[i], maxResp[j]))
with lock:
newProbabilities[i] = (newProbability)
t1 = threading.Thread(target = condProbs, args=(0, int(len(maxResp)/4), ))
t2 = threading.Thread(target = condProbs, args=(int(len(maxResp)/4), (2 * int(len(maxResp)/4)), ))
t3 = threading.Thread(target = condProbs, args=((2 * int(len(maxResp)/4)), (3 * int(len(maxResp)/4)), ))
t4 = threading.Thread(target = condProbs, args=((3 * int(len(maxResp)/4)), len(maxResp), ))
t1.start()
t2.start()
t3.start()
t4.start()
t1.join()
t2.join()
t3.join()
t4.join()
else:
for i in range(len(maxResp)):
newProbability = math.log(en_p_match[i][1])
for j in range(len(maxResp)):
if i!=j:
newProbability += math.log(createProbability(maxResp[i], maxResp[j]))
with lock:
newProbabilities[i] = (newProbability)
#print(newProbabilities)
#find argmax of newProbabilities
maxNewP = [0, newProbabilities[0]]
for i in enumerate(newProbabilities):
if i[1] > maxNewP[1]:
maxNewP = i
print(maxResp[maxNewP[0]])
else:
if p_match_threshold == 0.89:
pass
else:
starters = ["Hydar, also ", "I mean hydar but like ", "Hydar, but also ", "Hydar, and also "]
str = starters[random.randint(0,len(starters)-1)]
#if classifier.classify(previous_messages[maxNewP[0]]) == "question":
if classifier2.classify(inp) == "question":
str += "i dont know"#generate_text.generate("sentence")
else:
newStr = generate_text.generate("question")
str += "ok"#newStr[0:len(newStr)-1] + "?"
#print(previous_messages[maxNewP[0]])
print(str)