-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathai_base.py
218 lines (183 loc) · 7.47 KB
/
ai_base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
'''
This is the AI base class. This module contains:
- `SystemState` class: it is a data structure capturing the one-ring vision of
the snake
- `AI_Base`: the base class for all AI player. It provides specification for
a subclass to implement own algorithm so that the subclass can plug seamlessly
to the environment.
'''
from abc import ABC, abstractmethod
from snake import GameOutcome
class SystemState:
'''
It is the System State data structure carrying the one-ring vision of
the snake. It also indicates the food position and carries the current
movement of the snake.
'''
def __init__(self):
## mark the position of the food relative to the snake
self.food_north: bool = False
self.food_south: bool = False
self.food_east: bool = False
self.food_west: bool = False
## mark the obstacle one-ring around the snake
self.obj_north: int = 0
self.obj_south: int = 0
self.obj_east: int = 0
self.obj_west: int = 0
self.obj_north_east: int = 0
self.obj_north_west: int = 0
self.obj_south_east: int = 0
self.obj_south_west: int = 0
## record the current movement of the snake
self.dir_x: int = 0
self.dir_y: int = 0
class DecayingFloat:
'''
This class provides a delaying floating number. It is disguised as a
`float` but provides methods to trigger a decay.
The constructor takes the following inputs parameters.
Parameters:
value : float
The initial value of the decaying float number. We assume it
is a positive value.
factor : float, optional, default=None
The decaying factor. If None is specified, the float number will
not decay.
minval : float, optional, default=None
The minimum value of the float. If None is specified, the float
number can reach zero which is the lowest.
mode : str
It can be either "exp" for exponential decaying or "linear" for
linear decaying. An unrecognized string will cause the value
not to decay.
'''
def __init__(self, value:float, factor:float=None, minval:float=None,
mode:str="exp"):
self.init = value
self.value = value
self.factor = factor
self.minval = minval
self.mode = mode
def __float__(self) -> float:
'''
This method performs the type casting operation to return a float.
'''
return float(self.value)
def reset(self):
'''
To start over the decaying function from the beginning.
'''
self.value = self.init
def decay(self):
'''
To perform a step of decay. The decaying depends on the `factor`
and the `mode`. If `factor` is not given or `mode` string is
unrecognized, the method simply does nothing.
'''
if self.factor==None: return
if self.mode=="exp": self.value *= self.factor
elif self.mode=="linear": self.value -= self.factor
if self.minval==None:
return
elif self.value<self.minval:
self.value = self.minval
class AI_Base(ABC):
'''
This is the base class for the AI player.
The subclass must reimplement the following two abstract methods:
- callback_take_action(): called when the environment requests
your AI agent to take an action based on the current system state.
- callback_action_outcome(): called right after the environment
has taken the last action to report the new state and outcome.
The subclass may reimplement the following method:
- callback_terminating(): called when the program is just about
to exit. The algorithm can print some final statistical info or
save some info before the program ends.
'''
def __init__(self):
self._name = "Human Player"
self._state: SystemState = None
def get_name(self) -> str:
'''Return the name of this AI algorithm.
Returns
-------
str
The name of this AI algorithm.
'''
return self._name
def state_str(self, state:SystemState) -> str:
'''Return the string representation of the system state
observed by this player.
Returns
-------
str
The string representation of the system state.
'''
return "["+(">" if state.food_east else " ") \
+("v" if state.food_south else " ") \
+("<" if state.food_west else " ") \
+("^" if state.food_north else " ") + "]," \
+ "[%+d,%+d,%+d,%d]"% \
(state.obj_north,state.obj_south,state.obj_east,state.obj_west) \
+ "-%s"%("U" if state.dir_y==-1 else "D" if state.dir_y==1 else \
"L" if state.dir_x==-1 else "R")
def is_keyboard_allowed(self) -> bool:
'''Return if this AI algorithm can accept keyboard input. By
default, it is not allowed. So user cannot interfere with
the decision made by this algorithm using the keyboard.
Returns
-------
bool
Whether keyboard is allowed to interfere with the decision made
by the algorithm.
'''
return False
## callback when a request for action is needed by the environment
@abstractmethod
def callback_take_action(self, state:SystemState) -> (int,int):
'''This is a callback function of the algorithm when an action
is needed. This method is periodically called by the environment.
This is abstract method and should be reimplemented in the
subclass.
Parameters
----------
state : SystemState
The current system state of the environment.
Returns
-------
Tuple (int,int)
The algorithm should return a tuple instructing the environment
the next move of the snake. The first element in the tuple is the
x-direction (either -1,0,1 for left,none,right) and the second
element is the y-direction (either -1,0,1 for up,none,right).
In the rule, the snake cannot move diagonally, so at least one
of the element must be a zero.
'''
## if called accidentally, it simply returns the same
## movement as the previous state
return (state.dir_x,state.dir_y)
## callback when the outcome for the last action is available
@abstractmethod
def callback_action_outcome(self, state:SystemState, outcome:GameOutcome):
'''This is a callback function for the environment to report an outcome
of an action made previously by the algorithm.
This is abstract method and should be reimplemented in the
subclass.
Parameters
----------
state : SystemState
The current system state of the environment.
outcome : GameOutcome
The outcome of the action made previously.
'''
pass
def callback_terminating(self):
'''This is a callback function which will be called when
a termination signal is triggered in the environment. This gives
the algorithm to perform any final processing before the game ends.
It is useful for an algorithm to save some learned data.
By default, it performs nothing. A subclass may overload this method
to provide any final processing.
'''
pass # do nothing by default