-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathRLMethod.py
222 lines (186 loc) · 7.86 KB
/
RLMethod.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import random
import chess
import numpy as np
import math
import pygame
QTable = np.zeros((64,64))
# declaring the values of alpha, gamma and epsilon
learningRate = 0.1 #alpha
discountFactor = 0.9 #gamma
explorationRate = 0.1 #epsilon
# this method gets a list of all possible moves
def get_possible_moves(board):
possible_moves =[]
for move in board.legal_moves:
possible_moves.append(move)
return possible_moves
# this method based on the exploration rate selects whether to explor or exploit
def select_move(board,QTable,explorationRate):
if np.random.random() < explorationRate:
return np.random.choice(get_possible_moves(board))
else:
best_move = None
best_q_value = float('-inf')
for move in get_possible_moves(board):
q_val = QTable[move.from_square, move.to_square]
if q_val > best_q_value:
best_q_value = q_val
best_move = move
return best_move
# main training method of RL
def play_game(QTable,learningRate,discountFactor,explorationRate):
board = chess.Board()
while True:
if board.turn == chess.BLACK:
move = select_move(board,QTable,explorationRate)
else:
move = random.choice(list(board.legal_moves))
board.push(move)
if board.is_game_over():
break
reward = 0
if board.is_checkmate():
if board.turn == chess.BLACK:
reward = -1
else:
reward = 1
else:
reward = 0
current_q_val = QTable[move.from_square,move.to_square]
new_q_val = reward + discountFactor * np.max(QTable[move.to_square,:])
QTable[move.from_square,move.to_square] = (1-learningRate) * current_q_val + learningRate * new_q_val
for i in range(1000):
# number of iterations to be trained for
play_game(QTable,learningRate,discountFactor,explorationRate)
np.save("QTable.npy",QTable)
#testing the RL method created
def RL_testing():
Q = np.load("QTable.npy")
# initialise dispay
x = 800
y = 800
import random
screen = pygame.display.set_mode((x, y))
pygame.init()
# color scheme
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
HIGHLIGHT_COLOR = (0, 191, 255, 255) # Light blue or cyan
ALTERNATE_COLOR_1 = (118, 150, 86)
ALTERNATE_COLOR_2 = (238, 238, 210)
# initialize chessboard
board = chess.Board()
pieces = {
'P': pygame.image.load('chess-utils/w_pawn.png'),
'N': pygame.image.load('chess-utils/w_horse.png'),
'B': pygame.image.load('chess-utils/w_bishop.png'),
'R': pygame.image.load('chess-utils/w_rook.png'),
'Q': pygame.image.load('chess-utils/w_queen.png'),
'K': pygame.image.load('chess-utils/w_king.png'),
'p': pygame.image.load('chess-utils/b_pawn.png'),
'n': pygame.image.load('chess-utils/b_horse.png'),
'b': pygame.image.load('chess-utils/b_bishop.png'),
'r': pygame.image.load('chess-utils/b_rook.png'),
'q': pygame.image.load('chess-utils/b_queen.png'),
'k': pygame.image.load('chess-utils/b_king.png'),
}
def UpdateBoard(screen, board):
for i in range(64):
piece = board.piece_at(i)
if piece == None:
pass
else:
screen.blit(pieces[str(piece)], ((i % 8) * 100, 700 - (i // 8) * 100))
for i in range(7):
i = i + 1
pygame.draw.line(screen, WHITE, (0, i * 100), (800, i * 100))
pygame.draw.line(screen, WHITE, (i * 100, 0), (i * 100, 800))
pygame.display.flip()
def random_agent(BOARD):
return select_move(BOARD, Q, 0)
def main(board, agent_color):
'''
for bot vs human game
'''
# make background black
for i in range(8):
for j in range(8):
if (i + j) % 2 == 0:
pygame.draw.rect(screen, ALTERNATE_COLOR_1, pygame.Rect(i * 100, j * 100, 100, 100))
else:
pygame.draw.rect(screen, ALTERNATE_COLOR_2, pygame.Rect(i * 100, j * 100, 100, 100))
# name window
pygame.display.set_caption('Chess')
# variable to be used later
index_moves = []
status = True # white moves first
while (status):
# update screen
UpdateBoard(screen, board)
# bot work
if board.turn == agent_color:
board.push(random_agent(board))
for i in range(8):
for j in range(8):
if (i + j) % 2 == 0:
pygame.draw.rect(screen, ALTERNATE_COLOR_1, pygame.Rect(i * 100, j * 100, 100, 100))
else:
pygame.draw.rect(screen, ALTERNATE_COLOR_2, pygame.Rect(i * 100, j * 100, 100, 100))
# human work
else:
for event in pygame.event.get():
# if event object type is QUIT
# then quitting the pygame
# and program both.
if event.type == pygame.QUIT:
status = False
# if mouse clicked
if event.type == pygame.MOUSEBUTTONDOWN:
# remove previous highlights
for i in range(8):
for j in range(8):
if (i + j) % 2 == 0:
pygame.draw.rect(screen, ALTERNATE_COLOR_1, pygame.Rect(i * 100, j * 100, 100, 100))
else:
pygame.draw.rect(screen, ALTERNATE_COLOR_2, pygame.Rect(i * 100, j * 100, 100, 100))
# get position of mouse
pos = pygame.mouse.get_pos()
# find which square was clicked and index of it
square = (math.floor(pos[0] / 100), math.floor(pos[1] / 100))
index = (7 - square[1]) * 8 + (square[0])
# pygame.display.flip()
# if we are moving a piece
if index in index_moves:
move = moves[index_moves.index(index)]
board.push(move)
# reset index and moves
index = None
index_moves = []
# show possible moves
else:
# check the square that is clicked
piece = board.piece_at(index)
# if empty pass
if piece == None:
pass
else:
# figure out what moves this piece can make
all_moves = list(board.legal_moves)
moves = []
for m in all_moves:
if m.from_square == index:
moves.append(m)
t = m.to_square
TX1 = 100 * (t % 8) # Center X of the square
TY1 = 100 * (7 - t // 8) # Center Y of the square
# highlight squares it can move to
pygame.draw.rect(screen, BLACK, pygame.Rect(TX1, TY1, 100, 100), 50)
index_moves = [a.to_square for a in moves]
# deactivates the pygame library
if board.outcome() != None:
print(board.outcome())
status = False
print(board)
pygame.quit()
main(board, False)
RL_testing()