-
Notifications
You must be signed in to change notification settings - Fork 0
/
dqn_evaluate.py
49 lines (39 loc) · 1.41 KB
/
dqn_evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
"""
Evaluate an agent that has been trained with the DQN algorithm to play the game Pong for Atari 2600.
"""
from dqn.dqn_agent import Agent
from dqn.pong_environment import PongFromFeatures
import numpy as np
import torch
NUMBER_OF_EVALUATION_EPISODES = 10
MAXIMUM_NUMBER_OF_STEPS_PER_EPISODE = 50000
SHOW_RENDERING = True
environment = PongFromFeatures(SHOW_RENDERING)
environment.seed(42)
print("State shape: ", environment.observation_space.shape)
print("Number of actions: ", environment.action_space.n)
agent = Agent(
state_size=environment.observation_space.shape[0],
action_size=environment.action_space.n,
seed=0,
)
agent.qnetwork_local.load_state_dict(
torch.load("./dqn/saved_models/dqm_episode_9000.pth")
)
rewards_per_episode_list = []
for i in range(NUMBER_OF_EVALUATION_EPISODES):
print("Simulating episode: ", i)
state = environment.reset()
total_episode_reward = 0
for j in range(MAXIMUM_NUMBER_OF_STEPS_PER_EPISODE):
action = agent.act(state)
environment.render()
state, reward, done, _ = environment.step(action)
total_episode_reward += reward
if done:
break
rewards_per_episode_list.append(total_episode_reward)
environment.close()
rewards_per_episode_array = np.array(rewards_per_episode_list)
print("Average score: ", np.mean(rewards_per_episode_array))
print("Standard deviation of score: ", np.std(rewards_per_episode_array))