forked from lichess-bot-devs/lichess-bot
-
Notifications
You must be signed in to change notification settings - Fork 1
/
SearchLearningEngine.py
677 lines (508 loc) · 24.8 KB
/
SearchLearningEngine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
import copy
import random
import sys
import time
from collections import deque, Counter
from datetime import datetime
import chess
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as functional
import matplotlib.pyplot as plt
from HangingEngine import HangingEngine
from strategies import MinimalEngine, RandomMove
from custom_engine import ScoreEngine
from torchvision.utils import make_grid
from torch.utils.tensorboard import SummaryWriter
from itertools import islice
from LearningEngine import *
from math import ceil
torch.manual_seed(0)
random.seed(0)
np.random.seed(0)
NOTE = "CNN128_MLP2048_batch50_Reg_MarginLoss_Buffer1_L2_MaxLoss"
EVAL_INTERVAL = 120 # seconds between evaluation against baselines
TARGET_UPDATE = 0 # How many steps between updates of the target model
BATCH_SIZE = 50 # number of board positions per gradient batch
MAX_MOVES = 200 # maximum number of moves in simulated games for training data
BUFFER_MAX_SIZE = 1000000 # max number of positions in buffer
EPOCHS = 100 # number of epochs before forcibly adding a new game to the buffer
NUM_GAMES = 1 # number of games to add when adding new positions
MAX_BUFFER_ROUNDS = 1 # max number of games in buffer
LR = 1e-4 # learning rate
ZERO_REGULARIZER = 0.1 # weight for zero score regularizer from initial board
WARM_START = False # whether to load model from disk
ANCHOR_REGULARIZER = 100 # weight for labeled position regularizer
STORE_LOG = True # whether to store tensorboard log
STARTING_POSITION = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1" # standard
# STARTING_POSITION = "7r/8/4k3/8/8/4K3/8/R7 w - - 0 1" # King and rook vs king and rook
# STARTING_POSITION = "4k3/4q3/8/8/8/8/3Q4/3K4 w - - 0 1" # King and queen vs king and queen
# STARTING_POSITION = "4k3/4q3/8/8/8/8/8/3K4 w - - 0 1" # King and queen vs king
# STARTING_POSITION = "5rr1/1k6/8/8/7K/8/4R3/2R5 w - - 0 1" # 4 rooks ladder mate in 2
# STARTING_POSITION = "5rr1/k7/8/8/7K/8/1R6/2R5 w - - 2 2" # 4 rooks ladder mate in 1
# STARTING_POSITION = "Q6k/3ppppp/8/3q4/8/8/1P6/KR6 b - - 0 1" # only legal move is mate
# STARTING_POSITION = "Q6k/4pppp/8/3q4/8/8/1P6/KR6 b - - 0 1" # 1 legal move is mate, other is loss
# STARTING_POSITION = "8/3k4/4n3/8/8/8/2N5/1K6 w - - 0 1" # King and knight each, no mate possible
# STARTING_POSITION = "1n2k1n1/8/8/8/8/8/8/1N2K1N1 w - - 0 1" # King and 2 knights each. No mates possible
# STARTING_POSITION = "rn1qkbnr/ppp1pppp/3p4/8/4P1b1/P1N5/1PPP1PPP/R1BQKBNR b KQkq - 0 3" # Hanging queen
def batchify(data, batch_size: int):
shuffled_data = list(data)
random.shuffle(shuffled_data)
iterator = iter(shuffled_data)
batches = []
full_batches = len(data) // batch_size
remainder = len(data) - full_batches * batch_size
for _ in range(full_batches):
batches.append(list(islice(iterator, batch_size)))
if remainder:
if batches:
batches[-1].extend(iterator)
else:
batches.append(list(iterator))
return batches
def get_features(board):
"""
Returns a numpy vector describing the board position as a grid
"""
# mirror board if black's turn
if board.turn == chess.BLACK:
def rank(s):
return 7 - chess.square_rank(s)
else:
def rank(s):
return chess.square_rank(s)
all_pieces = board.piece_map().items()
piece_grid = np.zeros((13, 8, 8))
attacker_grid = np.zeros((12, 8, 8))
for square, piece in all_pieces:
type_index = PIECE_INDEX[piece.piece_type]
if piece.color != board.turn:
type_index += len(PIECE_INDEX)
position_rank = rank(square)
position_file = chess.square_file(square)
piece_grid[type_index, position_rank, position_file] = 1
for attacked_square in board.attacks(square):
attacked_rank = rank(attacked_square)
attacked_file = chess.square_file(attacked_square)
attacker_grid[type_index, attacked_rank, attacked_file] += 1
# add castling rights
if board.has_kingside_castling_rights(board.turn):
piece_grid[12, 0, 7] = 1
if board.has_queenside_castling_rights(board.turn):
piece_grid[12, 0, 0] = 1
if board.has_kingside_castling_rights(not board.turn):
piece_grid[12, 7, 7] = 1
if board.has_queenside_castling_rights(not board.turn):
piece_grid[12, 7, 7] = 1
# add en passant squares
ep = board.ep_square
if ep:
piece_grid[12, rank(ep), chess.square_file(ep)] = 1
features = np.concatenate((piece_grid, attacker_grid), 0)
return features
class ChessResnet(nn.Module):
def __init__(self, depth=3, width=128):
super().__init__()
d = 64 * (12 + 13)
self.in_layers = nn.ModuleList([nn.Linear(d, width) for _ in range(depth)])
self.mid_layers = nn.ModuleList([nn.Linear(width, width) for _ in range(depth)])
self.final = nn.Linear(width, 1)
def forward(self, x):
if x.ndim == 3:
x = x.view(1, -1, 8, 8)
x = torch.flatten(x, 1)
h = 0
for in_layer, mid_layer in zip(self.in_layers, self.mid_layers):
h_in = functional.leaky_relu(in_layer(x))
h = functional.leaky_relu(mid_layer(h + h_in))
return self.final(h)
def zero_output(self):
def init(x):
torch.nn.init.uniform_(x, -1e-5, 1e-5)
init(self.final.weight)
init(self.final.bias)
class ChessConvNet(nn.Module):
def __init__(self, zero=False):
super().__init__()
num_filters = 128
filter_size = 1
padding = filter_size // 2
num_hidden = 2048
self.conv1 = nn.Conv2d(13 + 12, num_filters, filter_size, 1, padding)
self.conv2 = nn.Conv2d(num_filters, num_filters, filter_size, 1, padding)
# self.conv3 = nn.Conv2d(d, d, filter_size, 1, padding)
self.fc1 = nn.Linear(64 * num_filters, num_hidden)
self.data_linear = nn.Linear(64 * (13 + 12), num_hidden)
self.data_middle = nn.Linear(num_hidden, num_hidden // 2)
self.data_final = nn.Linear(num_hidden // 2, 1)
def forward(self, x0):
if x0.ndim == 3:
x0 = x0.view(1, -1, 8, 8)
output = 0
x = functional.leaky_relu(self.conv1(x0))
x = functional.leaky_relu(self.conv2(x))
x = torch.flatten(x, 1)
x = functional.leaky_relu(self.fc1(x))
# MLP
h = self.data_linear(torch.flatten(x0, 1))
h = functional.leaky_relu(h) + x
h = functional.leaky_relu(self.data_middle(h))
h = self.data_final(h)
output += h
return output
def zero_output(self):
def init(x):
torch.nn.init.uniform_(x, -1e-5, 1e-5)
init(self.data_final.weight)
init(self.data_final.bias)
init(self.data_linear.weight)
init(self.data_linear.bias)
init(self.data_middle.weight)
init(self.data_middle.bias)
if hasattr(self, "fc1"):
init(self.fc1.weight)
init(self.fc1.bias)
if hasattr(self, "conv1"):
init(self.conv1.weight)
init(self.conv1.bias)
if hasattr(self, "conv2"):
init(self.conv2.weight)
init(self.conv2.bias)
class NetEngine(MinimalEngine):
def __init__(self, *args, name=None, net=None, from_file=DEFAULT_MODEL_LOCATION):
super().__init__(*args)
self.name = name
blank_board = chess.Board()
sample_vector = torch.from_numpy(get_features(blank_board).astype(np.float32)).view(1, -1, 8, 8)
self.net = ChessConvNet()
# self.net = ChessResnet()
if from_file:
self.net.load_state_dict(torch.load(from_file))
else:
self.net.eval()
prev_score = self.net(sample_vector)
self.net.zero_output()
new_score = self.net(sample_vector)
print(f"Set outputs of initial network to have near-zero. "
f"Initial board went from {prev_score.data.numpy()[0, 0]} to {new_score.data.numpy()[0, 0]}.")
def search(self, board: chess.Board, time_limit, ponder, max_tolerance=0.1):
# returns a random choice among highest-scoring positions
moves = np.array(list(board.legal_moves))
features = []
material_counts = np.zeros(moves.size)
non_terminal = np.zeros(moves.size)
for i, move in enumerate(moves):
board.push(move)
features.append(get_features(board))
material_counts[i] = material_count(board)
if not board.is_game_over():
non_terminal[i] = 1
board.pop()
features = torch.from_numpy(np.asarray(features, dtype=np.float32))
with torch.no_grad():
scores = -non_terminal * self.net(features).numpy().ravel() - material_counts
# consider scores within 0.1 of max to be equivalent
best_moves = moves[scores.max() - scores <= max_tolerance]
return np.random.choice(best_moves)
# def plot_filters(model: NetEngine, writer: SummaryWriter, step: int):
# slices = ["My P", "My N", "My B", "My R", "My Q", "My K", "Op P", "Op N", "Op B", "Op R", "Op Q", "Op K", "Special"]
#
# if not hasattr(model, "conv1"):
# return
#
# weight = model.conv1.weight
#
# num_filters = weight.shape[0]
#
# for i in range(len(slices)):
# grid = make_grid(torch.tile(weight[:, i:(i + 1), :, :], (1, 3, 1, 1)), normalize=True)
# writer.add_image(slices[i], grid, step)
def expand_state(board):
"""
Generate a representation of a state for learning that includes:
- the material count of the state
- the feature representation of the state
- the material counts for all child states
- the feature representations of all child states
"""
features = get_features(board)
material = material_count(board)
next_features = []
next_material = []
next_non_terminal = []
for move in board.legal_moves:
board.push(move)
next_features.append(get_features(board))
next_material.append(material_count(board))
next_non_terminal.append(0 if board.is_checkmate() else 1)
board.pop()
key = board._transposition_key()
return key, (material, features, next_material, next_features, next_non_terminal)
def update_buffer_bfs(buffer, engine_learner, max_depth=None):
"""
Fills buffer with BFS.
"""
if len(buffer) == 0:
frontier = deque()
frontier.append((chess.Board(STARTING_POSITION), 0))
deepest = -1
known_positions = set()
known_positions.add(STARTING_POSITION[:-6])
while frontier:
board, curr_depth = frontier.popleft()
key, data = expand_state(board)
buffer[key] = data
if max_depth and curr_depth >= max_depth:
continue
for move in board.legal_moves:
new_board = copy.deepcopy(board)
new_board.push(move)
new_key = new_board._transposition_key()
if new_key not in known_positions:
frontier.append((new_board, curr_depth + 1))
known_positions.add(new_key)
if curr_depth > deepest:
print(f"BFS reached depth {curr_depth}. Buffer size {len(buffer)}. Frontier size {len(frontier)}.")
deepest = curr_depth
def update_buffer_self_play(buffer, engine_learner):
# play a game against self
for gn in range(NUM_GAMES):
board = chess.Board(STARTING_POSITION)
board.starting_fen = STARTING_POSITION
while not board.is_game_over(claim_draw=False) and len(board.move_stack) < MAX_MOVES:
if board._transposition_key() not in buffer:
key, data = expand_state(board)
buffer[key] = data
if random.random() < 0.25:
move = random.choice(list(board.legal_moves))
else:
move = engine_learner.search(board, 10000, 10000, max_tolerance=0.1)
board.push(move)
# commented out because there's no need to learn from terminal states
# if board._transposition_key() not in buffer:
# key, data = expand_state(board)
# buffer[key] = data
outcome_str = "mate" if board.is_checkmate() else "draw"
print(f"Buffer size {len(buffer)}. Game {gn}: {outcome_str}.")
def update_buffer_max_loss(buffer, engine_learner):
model = engine_learner.net
for gn in range(NUM_GAMES):
board = chess.Board(STARTING_POSITION)
board.starting_fen = STARTING_POSITION
while not board.is_game_over(claim_draw=False) and len(board.move_stack) < MAX_MOVES:
if board._transposition_key() not in buffer:
key, data = expand_state(board)
buffer[key] = data
chosen_move = None
chosen_loss = 0
moves = np.array(list(board.legal_moves))
features = []
material_counts = np.zeros(moves.size)
non_terminal = np.zeros(moves.size)
for i, move in enumerate(moves):
board.push(move)
features.append(get_features(board))
material_counts[i] = material_count(board)
if not board.is_game_over():
non_terminal[i] = 1
board.pop()
features = torch.from_numpy(np.asarray(features, dtype=np.float32))
with torch.no_grad():
scores = -non_terminal * model(features).numpy().ravel() - material_counts
current_score = material_count(board)
if not board.is_game_over():
current_score += model(torch.from_numpy(get_features(board).astype(np.float32))).numpy().ravel()[0]
loss = np.abs(current_score - scores)
best_moves = moves[loss.max() - loss < 1e-3]
chosen_move = np.random.choice(best_moves)
board.push(chosen_move)
# commented out because there's no need to learn from terminal states
# if board._transposition_key() not in buffer:
# key, data = expand_state(board)
# buffer[key] = data
outcome_str = "mate" if board.is_checkmate() else "draw"
print(f"Buffer size {len(buffer)}. Game {gn}: {outcome_str}.")
def main():
time_string = datetime.now().strftime("%Y%m%d-%H%M%S")
note = NOTE
if STORE_LOG:
base_dir = "/Users/bert/Desktop"
else:
base_dir = tempfile.TemporaryDirectory().name
log_dir = '{}/logs/'.format(base_dir)
print("Storing logs in {}".format(log_dir))
writer = SummaryWriter(log_dir + note + time_string, flush_secs=1)
epoch = 0
start_time = time.perf_counter()
if WARM_START:
engine_learner = NetEngine(None, None, sys.stderr, from_file=DEFAULT_MODEL_LOCATION)
else:
engine_learner = NetEngine(None, None, sys.stderr, from_file=None)
model = engine_learner.net
opponent_random = RandomMove(None, None, sys.stderr)
opponent_hanging = HangingEngine(None, None, sys.stderr)
opponent_minimax = ScoreEngine(None, None, sys.stderr, max_depth=2)
blank_board = chess.Board()
sample_vector = get_features(blank_board)
writer.add_graph(model, torch.Tensor(sample_vector))
writer.flush()
game_buffer = deque(maxlen=MAX_BUFFER_ROUNDS)
total_optimizer_steps = 0
loss_fn = nn.HuberLoss()
loss_fn = nn.MSELoss()
# boards for evaluation
empty = chess.Board()
hanging_queen = chess.Board("rn1qkbnr/ppp1pppp/3p4/8/4P1b1/P1N5/1PPP1PPP/R1BQKBNR b KQkq - 0 3")
ladder_mate = chess.Board("5rr1/1k6/8/8/7K/8/4R3/2R5 w - - 0 1")
ladder_mate_in_one = chess.Board("5rr1/k7/8/8/7K/8/1R6/2R5 w - - 2 2")
eval_boards = [empty, hanging_queen, ladder_mate, ladder_mate_in_one]
eval_board_features = [get_features(x).astype(np.float32) for x in eval_boards]
eval_board_features = torch.from_numpy(np.asarray(eval_board_features))
eval_board_material = torch.from_numpy(np.asarray([material_count(x) for x in eval_boards]))
empty_board_key, empty_board_data = expand_state(empty)
empty_board_data = list(empty_board_data)
empty_board_data[2] = []
empty_board_data[3] = []
# Anchor boards
anchor_boards = [empty,]
anchor_board_values = torch.from_numpy(np.asarray([0,]))
anchor_board_features = [get_features(x).astype(np.float32) for x in anchor_boards]
anchor_board_features = torch.from_numpy(np.asarray(anchor_board_features))
optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
# optimizer = torch.optim.SGD(model.parameters(), lr=LR, momentum=0.9)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=100, verbose=True)
# scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, 1e-8, 1e-4, step_size_up=100000, cycle_momentum=False,
# verbose=False)
eval_time = time.time()
minimax_eval_time = time.time()
while True:
# update engine name
engine_learner.engine.id["name"] = "NetEngine{}".format(epoch)
# generate some new positions to put in the buffer
model.eval()
if epoch % EPOCHS == 0:
new_buffer = {}
# alternate between adding game played against self versus max_play
if epoch % 2 == 0:
update_buffer_self_play(new_buffer, engine_learner)
else:
update_buffer_max_loss(new_buffer, engine_learner)
game_buffer.append(new_buffer)
buffer = {}
for game in game_buffer:
buffer.update(game)
print(f"New games added. Full buffer combining {len(game_buffer)} rounds has {len(buffer)} states")
if len(game_buffer) < MAX_BUFFER_ROUNDS:
continue
# Learning
loss_record = 0
if TARGET_UPDATE and epoch % TARGET_UPDATE == 0:
target_model = copy.deepcopy(model)
target_model.eval()
# target_model = model
if TARGET_UPDATE == 0:
target_model = model
batches = batchify(buffer.items(), BATCH_SIZE)
num_steps = 0
for batch in batches:
# train on a batch
optimizer.zero_grad()
model.train()
keys, data_list = zip(*batch)
keys = list(keys)
material, features, next_material, next_features, next_non_terminal = zip(*data_list)
material = torch.from_numpy(np.asarray(material, dtype=np.float32)).view((-1, 1))
next_material = [torch.from_numpy(np.asarray(m, dtype=np.float32)).view((-1, 1)) for m in next_material]
features = torch.from_numpy(np.asarray(features, dtype=np.float32))
next_features = [torch.from_numpy(np.asarray(x, dtype=np.float32)) for x in next_features]
next_non_terminal = [torch.from_numpy(np.asarray(x, dtype=np.float32)).view((-1, 1))
for x in next_non_terminal]
current_scores = material + model(features)
total_loss = ZERO_REGULARIZER * functional.l1_loss(current_scores, material)
total_loss = ANCHOR_REGULARIZER * functional.l1_loss(model(anchor_board_features).view([-1]),
anchor_board_values)
# collect next features and non-terminal statuses into single batch
batch_next_features = torch.cat(next_features)
if batch_next_features.shape[0] <= 1:
continue
batch_next_scores = target_model(batch_next_features)
batch_start_indices = np.cumsum([len(x) for x in next_material])
batch_start_indices = np.concatenate(([0], batch_start_indices))
for i in range(len(keys)):
if len(next_material[i]):
next_scores = -next_material[i] - next_non_terminal[i] * \
batch_next_scores[batch_start_indices[i]:(batch_start_indices[i + 1])]
next_score = torch.max(next_scores)
# best_move = torch.argmax(next_scores)
# next_score = -next_material[i][best_move] - next_non_terminal[i][best_move] * target_model(
# batch_next_features[batch_start_indices[i] + best_move])
# assert(torch.allclose(next_score, next_score_old))
loss = loss_fn(current_scores[i], next_score.view([1]))
total_loss += loss
margin_loss = functional.margin_ranking_loss(current_scores[i], next_scores, torch.ones((1, 1)))
total_loss += margin_loss
total_loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
# scheduler.step()
loss_record += total_loss.data
num_steps += 1
total_optimizer_steps += 1
writer.add_scalar("Optimizer loss", total_loss.data / len(keys), total_optimizer_steps)
model.eval()
writer.add_scalar("Learning Loss", loss_record / len(buffer), epoch)
# scheduler.step(loss_record)
# scheduler.step()
# log diagnostic info
with torch.no_grad():
eval_board_scores = model(eval_board_features).ravel() + eval_board_material
writer.add_scalar("Empty Board Score", eval_board_scores[0], epoch * 1000)
writer.add_scalar("Hanging Queen Score", eval_board_scores[1].data, epoch)
writer.add_scalar("Ladder Mate Score", eval_board_scores[2].data, epoch)
writer.add_scalar("Ladder Mate-in-1 Score", eval_board_scores[3].data, epoch)
# eval_board_scores = target_model(eval_board_features).ravel() + eval_board_material
# writer.add_scalar("Target Empty Board Score", eval_board_scores[0], epoch)
# writer.add_scalar("Target Hanging Queen Score", eval_board_scores[1].data, step)
# writer.add_scalar("Target Ladder Mate Score", eval_board_scores[2].data, step)
if time.time() - eval_time > EVAL_INTERVAL:
print("Evaluating")
eval_time = time.time()
num_games = 10
rand_wins, rand_losses, rand_ties, rand_score = play_match(engine_learner, opponent_random, num_games,
writer, "Random",
epoch,
starting_position=STARTING_POSITION)
hang_wins, hang_losses, hang_ties, hang_score = play_match(engine_learner, opponent_hanging, num_games,
writer, "Hanging",
epoch,
starting_position=STARTING_POSITION)
play_match(engine_learner, engine_learner, num_games, writer, "Self", epoch,
starting_position=STARTING_POSITION)
writer.add_scalar("Win Rate v. Random", rand_wins / num_games, epoch)
writer.add_scalar("Loss Rate v. Random", rand_losses / num_games, epoch)
writer.add_scalar("Win Rate v. Hanging", hang_wins / num_games, epoch)
writer.add_scalar("Loss Rate v. Hanging", hang_losses / num_games, epoch)
writer.add_scalar("Score v. Hanging", hang_score, epoch)
writer.add_scalar("Score v. Random", rand_score, epoch)
if STORE_LOG:
torch.save(model.state_dict(), DEFAULT_MODEL_LOCATION)
if time.time() - minimax_eval_time > EVAL_INTERVAL * 10:
# compare against slow minimax agent
mini_wins, mini_losses, mini_ties, mini_score = play_match(engine_learner, opponent_minimax, num_games,
writer, "Minimax", epoch,
starting_position=STARTING_POSITION)
minimax_eval_time = time.time()
writer.add_scalar("Win Rate v. Minimax", mini_wins / num_games, epoch)
writer.add_scalar("Loss Rate v. Minimax", mini_losses / num_games, epoch)
writer.add_scalar("Score v. Minimax", mini_score, epoch)
# plot_filters(model, writer, total_optimizer_steps)
writer.add_scalar("Buffer Size", len(buffer), epoch)
elapsed_time = time.perf_counter() - start_time
epoch += 1
print("Completed {} epochs ({:.2f} rounds/sec). Loss {}".format(epoch, epoch / elapsed_time,
loss_record / len(buffer)))
writer.flush()
epoch_loss = loss_record / len(buffer)
if __name__ == "__main__":
main()