-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_env.py
49 lines (36 loc) · 940 Bytes
/
test_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from source.environment import TakeItEasy
from source.agents.random import Random
seed=42
# create and reset env
env = TakeItEasy(seed=seed)
# Random Policy
agent = Random(env.obs_space, env.action_space, seed=seed)
state, available, reward, done = env.reset()
i = 0
print("Step ", i)
print(state)
print(available)
print(reward)
print(done)
print("-"*100)
while not done:
# obtain action
action = agent.policy(state, available)
# step in environment
next_state, occupied, reward, done = env.step(action)
# here, one would update the agent
# agent.train(None)
# after training, this is the new state
state = next_state
print("Step ", i+1)
print(state)
print(action)
print(available, int(available.sum()))
print(reward)
print(done)
print("-" * 100)
i += 1
# print final reward
print("Reward for random agent: ", reward)
# show final game state
env.show_game_state()