test.py

import time
import numpy as np
from getkeys import key_check

num_actions = 8 
max_memory = 500  # Maximum number of experiences we are storing
hidden_size = 512  # Size of the hidden layers
batch_size = 1  # Number of experiences we use for training per batch

def test(game, model, n_games, verbose=1):
    # Reseting the win counter
    win_cnt = 0
    # We want to keep track of the progress of the AI over time, so we save its win count history
    win_hist = []
    ranks = []
    # Epochs is the number of games we play
    for e in range(n_games):
        # Resetting the game
        game.reset()
        game_over = False
        # get tensorflow running first to acquire cudnn handle
        input_t = game.observe()
        if e == 0:
            paused = True
            print('Testing is paused. Press X once game is loaded and is ready to be played.')
        else:
            paused = False
        while not game_over:
            if not paused:
                # The learner is acting on the last observed game screen
                # input_t is a vector containing representing the game screen
                input_tm1 = input_t

                """
                We want to avoid that the learner settles on a local minimum.
                Imagine you are eating in an exotic restaurant. After some experimentation you find 
                that Penang Curry with fried Tempeh tastes well. From this day on, you are settled, and the only Asian 
                food you are eating is Penang Curry. How can your friends convince you that there is better Asian food?
                It's simple: Sometimes, they just don't let you choose but order something random from the menu.
                Maybe you'll like it.
                The chance that your friends order for you is epsilon
                """

                # Choose yourself
                # q contains the expected rewards for the actions
                q = model.predict(input_tm1)
                # We pick the action with the highest expected reward
                action = np.argmax(q[0])

                # apply action, get rewards and new state
                input_t, reward, game_over, finish_rank = game.act(action)
                if game_over:
                    if finish_rank != 10:
                        win_cnt += 1
                        ranks.append(finish_rank)
                    else:
                        ranks.append(-1)
                """
                The experiences < s, a, r, s’ > we make during gameplay are our training data.
                Here we first save the last experience, and then load a batch of experiences to train our model
                """

            # menu control
            keys = key_check()
            if 'X' in keys:
                if paused:
                    paused = False
                    print('unpaused!')
                    time.sleep(1)
                else:
                    print('Pausing!')
                    paused = True
                    time.sleep(1)
            elif 'Q' in keys:
                print('Quitting!')
                return

        if verbose > 0:
            print("Game {:03d}/{:03d} | Win count {}".format(e+1, n_games, win_cnt))
        win_hist.append(win_cnt)
    return win_hist, ranks