-
Notifications
You must be signed in to change notification settings - Fork 0
/
test1.py
89 lines (72 loc) · 3.54 KB
/
test1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from abc import abstractmethod
import os
import random
import numpy as np
import tqdm
from game import Game, MonteCarloPlayer_classic, Move, Player
from board import Board
from tree import MonteCarloTreeSearchNode
class RandomPlayer(Player):
def __init__(self) -> None:
super().__init__()
def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
from_pos = (random.randint(0, 4), random.randint(0, 4))
move = random.choice([Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT])
return from_pos, move
## MonteCarlo Fist Visit approach
class MonteCarloPlayer_classic(Player):
def __init__(self,root : MonteCarloTreeSearchNode,player_id, num_simulations = 100, c_param = 0.1) -> None:
self.root = root
self.num_simulations = num_simulations
self.c_param = c_param
self.player_id = player_id
@abstractmethod
def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
#print(f"make_move -> my player id: {self.player_id}")
root = MonteCarloTreeSearchNode(Board(game.get_board()), player_id=self.player_id, d=0, root_player=self.player_id,id=0,num_simulations=self.num_simulations, c_param=self.c_param)
selected_node = root.best_action()
from_pos, move = selected_node.parent_action
#print('In make_move del nostro player -> Il nodo selezionato è il seguente: ', selected_node)
#print(f"In make_move del nostro player -> from_pos (col,row): {from_pos}, move: {move}")
return from_pos, Move(move.value)
if __name__ == '__main__':
losing_board = []
results = {}
my_player_id = 0
players = np.empty(2, dtype=Player)
tot = 100
# cross validation backbone to find best hyperparameters
# this below is the best configuration found if we consider a performance/execution_time tradeoff
for ns in [100]:
for cp in [0.1]:
for cp2 in [0.1]:
#wins and matches for accuracy
wins = 0
matches = 0
#play tot games
for i in tqdm.tqdm(range(tot)):
my_player_id = random.randint(0, 1)
print(f"my_player_id: {my_player_id}")
g = Game()
#g.print()
# player initialization -> our player is players[my_player_id]
root = MonteCarloTreeSearchNode(state=Board(), player_id=my_player_id, d=0, id=0,root_player=my_player_id, num_simulations=ns,c_param=cp)
players[my_player_id] = MonteCarloPlayer_classic(root=root, player_id=my_player_id,num_simulations=ns, c_param=cp)
players[1 - my_player_id] = RandomPlayer()
# play the game
winner = g.play(players[0], players[1])
g.print()
print(f"Winner: Player {winner}")
matches += 1
#update accuracy
if winner == my_player_id:
wins += 1
else:
#g.print()
losing_board.append(g.get_board())
print(f"Vinte {wins} partite su {matches} : cp :{cp} e cp2:{cp2} accuracy = {100*float(wins)/float(matches)}%")
#print accuracy
acc = 100*float(wins)/float(matches)
print(f"cp :{cp} e cp2:{cp2} -> accuracy: ",acc )
#save results
results[str(ns) + "-"+ str(cp)] = acc