-
Notifications
You must be signed in to change notification settings - Fork 1
/
agents_pool.py
165 lines (148 loc) · 6.24 KB
/
agents_pool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#=======================================================
# agents_pool.py
# Created by: Adi Ben Binyamin
# Eran Amar
#========================================================
import random
import sys
import re
from action import Action
class Agent:
'''
Abstract class.
'''
def choose_act(self, state):
'''
return legal action for the given state that the agent should perform.
'''
raise NotImplementedError()
class AlphaBetha(Agent):
# The searching depth of the alpha-beta. Note that the depth decrease only on opponents turns
# therefor the actual depth in the game tree is TWICE the value of ALPHA_BETA_DEPTH.
ALPHA_BETA_DEPTH = 2
@staticmethod
def __runAB(eval_func, state):
acts_res = []
for act in state.get_legal_actions():
successor_state = state.generate_successor(act)
acts_res.append((act, AlphaBetha.__min_val_ab(eval_func, successor_state, AlphaBetha.ALPHA_BETA_DEPTH)))
_, best_val = max(acts_res, key=lambda x: x[1])
return random.choice([best_action for best_action, val in acts_res if val == best_val])
@staticmethod
def __min_val_ab(eval_func, state, depth, alpha=-sys.maxint, beta=sys.maxint):
if AlphaBetha.__terminal_test(state, depth):
return eval_func(state)
val = sys.maxint
for act in state.get_legal_actions():
successor_state = state.generate_successor(act)
val = min(val, AlphaBetha.__max_val_ab(eval_func, successor_state, depth - 1, alpha, beta))
if val <= alpha:
return val
beta = min(beta, val)
return val
@staticmethod
def __max_val_ab(eval_func, state, depth, alpha=-sys.maxint, beta=sys.maxint):
if AlphaBetha.__terminal_test(state, depth):
return eval_func(state)
val = -sys.maxint
for act in state.get_legal_actions():
successor_state = state.generate_successor(act)
val = max(val, AlphaBetha.__min_val_ab(eval_func, successor_state, depth, alpha, beta))
if val >= beta:
return val
alpha = max(alpha, val)
return val
@staticmethod
def __terminal_test(state, depth):
return state.is_terminal() or (depth == 0)
def __init__(self, heuristic, player_obj):
self._eval_func = heuristic.get_evaluate_function(player_obj)
def choose_act(self, state):
return AlphaBetha.__runAB(self._eval_func, state)
class GreedyAgent(Agent):
'''
This agent return the best action by some evaluate function WITHOUT looking on
next turns.
'''
def __init__(self, heuristic, player_obj):
self._eval_func = heuristic.get_evaluate_function(player_obj)
def choose_act(self, state):
acts_vals = [(act, self._eval_func(state.generate_successor(act))) for act in state.get_legal_actions()]
_, best_val = acts_vals[0]
for act, val in acts_vals:
if val > best_val:
best_val = val
return random.choice([act for act,val in acts_vals if val==best_val])
class ReflexAgent(Agent):
'''
That agent returns the first legal action found.
'''
def choose_act(self, state):
return state.get_legal_actions()[0]
class RandomAgent(Agent):
'''
That agent choose action randomly among the available legal actions.
'''
def choose_act(self, state):
return random.choice(state.get_legal_actions())
class HumanAgent(Agent):
'''
That agent request the user to choose a valid move. (10 wrong inputs will terminate the program!)
'''
def choose_act(self, state):
patt = r'[ ]*[0-8][ ]*,?[ ]*[0-8][ ]*'
legal_acts = state.get_legal_actions()
for attempt in range(10):
try:
user_in = raw_input(
"You are the %s player. Enter input: " % state.get_player()).strip()
if user_in.lower() in ["quit", "q"]:
print 'Terminating by request...'
exit(0)
if re.match(patt, user_in):
user_raw, user_col = (int(user_in[0]), int(user_in[-1]))
miniB_index = (user_raw / 3) * 3 + user_col / 3
inner_index = (user_raw % 3) * 3 + user_col % 3
act = Action(miniB_index, inner_index)
if not act in legal_acts:
print 'The move is illegal!'
else:
return act
else:
print 'The input does not match the required pattern: %s' % patt
except Exception as e:
print 'Exception:', e
print "Either enter a coordinate in the form 'r, c' where r and c are" \
" integers between 0 and 8, or enter 'quit' or 'q' to stop the game. Tries left: %d" % (9-attempt)
print "User failed to enter valid input 10 times. Stopping the game."
exit(1)
class GenericRandomJumpAgent(Agent):
INITIAL_COUNTER = 3.0
def __init__(self, another_agent_obj, random_jump_formula):
'''
random_jump_formula is a function which get a natural number and returns
the probability for that number (i.e rational in [0,1])
'''
self.counter = GenericRandomJumpAgent.INITIAL_COUNTER
self._agent = another_agent_obj
self._probability_formula = random_jump_formula
def choose_act(self, state):
if self.__to_jump():
return random.choice(state.get_legal_actions())
return self._agent.choose_act(state)
def get_counter(self):
return int(self.counter)
def reset_counter(self):
self.counter = GenericRandomJumpAgent.INITIAL_COUNTER
def get_probability_formula(self):
return self._probability_formula
def __to_jump(self):
'''
flip a coin with probability regarding the probability formula, and return
True iff the coins says to choose action randomly, else chooses action using
the inner agent object.
Each call to that function increase the value of self.counter
'''
jump = random.random() < self._probability_formula(self.counter)
self.counter += 0.1
return jump