-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlearn_highlow.py
85 lines (72 loc) · 2.55 KB
/
learn_highlow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import random
from card_env import Highlow
import numpy as np
class Agent():
def __init__(self, env, epsilon):
self.actions = env.actions()
self.epsilon = epsilon
self.num_len = 13
hllist= [0.0] * len(self.actions)
self.V = np.array([hllist for j in range(self.num_len)])
def policy(self, state):
if random.random() < self.epsilon:
return random.choice(self.actions)
else:
#print(state, self.V[state], np.argmax(self.V[state]))
return np.argmax(self.V[state])
def play(self, env):
## Initialize position of agent.
hllist= [0] * len(self.actions)
N = [hllist for j in range(self.num_len)]
hllist= [0.0] * len(self.actions)
V = np.array([hllist for j in range(self.num_len)])
env.reset()
env.draw()
done = False
rewards = []
state = env.top-1 ## 1~13 --> 0~12
while not done:
action = self.policy(state)
next_state, reward, done = env.step(action)
# print(state, next_state, action, reward)
rewards.append(reward)
# n = N[state][action]
# average = V[state][action]
# new_average = (average * n + reward) / (n + 1)
# N[state][action] += 1
# V[state][action] = new_average
V[state][action] = V[state][action] + reward
state = next_state-1 ## 1~13 --> 0~12
self.V = self.V + np.array(V)
return rewards
def main():
env = Highlow()
epsilon = [0.0, 0.3, 0.6, 0.9]
game_steps = list(range(10, 310, 10))
result = {}
for e in epsilon:
agent = Agent(env, e)
means = []
# Try 310 episode.
for i in game_steps:
env.steps = i
rewards = agent.play(env)
means.append(np.mean(rewards))
print("Episode {}: Agent gets {} reward.".format(i, np.mean(rewards)))
# print(agent.V)
# print(env.top)
# print(agent.V[env.top])
# print(np.argmax(agent.V[env.top]))
# exit()
result["epsilon={}".format(e)] = means
result["draw_count"] = game_steps
result = pd.DataFrame(result)
result.set_index("draw_count", drop=True, inplace=True)
result.plot.line(figsize=(10, 5))
plt.savefig("./Img/result.png")
if __name__ == "__main__":
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
main()