-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
81 lines (74 loc) · 3.23 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import time
import numpy as np
from getkeys import key_check
num_actions = 8
max_memory = 500 # Maximum number of experiences we are storing
hidden_size = 512 # Size of the hidden layers
batch_size = 1 # Number of experiences we use for training per batch
def test(game, model, n_games, verbose=1):
# Reseting the win counter
win_cnt = 0
# We want to keep track of the progress of the AI over time, so we save its win count history
win_hist = []
ranks = []
# Epochs is the number of games we play
for e in range(n_games):
# Resetting the game
game.reset()
game_over = False
# get tensorflow running first to acquire cudnn handle
input_t = game.observe()
if e == 0:
paused = True
print('Testing is paused. Press X once game is loaded and is ready to be played.')
else:
paused = False
while not game_over:
if not paused:
# The learner is acting on the last observed game screen
# input_t is a vector containing representing the game screen
input_tm1 = input_t
"""
We want to avoid that the learner settles on a local minimum.
Imagine you are eating in an exotic restaurant. After some experimentation you find
that Penang Curry with fried Tempeh tastes well. From this day on, you are settled, and the only Asian
food you are eating is Penang Curry. How can your friends convince you that there is better Asian food?
It's simple: Sometimes, they just don't let you choose but order something random from the menu.
Maybe you'll like it.
The chance that your friends order for you is epsilon
"""
# Choose yourself
# q contains the expected rewards for the actions
q = model.predict(input_tm1)
# We pick the action with the highest expected reward
action = np.argmax(q[0])
# apply action, get rewards and new state
input_t, reward, game_over, finish_rank = game.act(action)
if game_over:
if finish_rank != 10:
win_cnt += 1
ranks.append(finish_rank)
else:
ranks.append(-1)
"""
The experiences < s, a, r, s’ > we make during gameplay are our training data.
Here we first save the last experience, and then load a batch of experiences to train our model
"""
# menu control
keys = key_check()
if 'X' in keys:
if paused:
paused = False
print('unpaused!')
time.sleep(1)
else:
print('Pausing!')
paused = True
time.sleep(1)
elif 'Q' in keys:
print('Quitting!')
return
if verbose > 0:
print("Game {:03d}/{:03d} | Win count {}".format(e+1, n_games, win_cnt))
win_hist.append(win_cnt)
return win_hist, ranks