-
Notifications
You must be signed in to change notification settings - Fork 0
/
CartPoleAI.py
57 lines (42 loc) · 1.44 KB
/
CartPoleAI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import random
from Brain import Brain
from RingMemory import RingMemory
import numpy as np
G = 0.99
MEMORY_SIZE = 10000
STARTING_EPS = 0.99
EPSILON_DECAY = 0.99
class CartPoleAI:
def __init__(self, actions_dim, state_dim, epsilon_decay=True):
self.eps = STARTING_EPS
self.adim = actions_dim
self.sdim = state_dim
self.epsdecay = epsilon_decay
self.brain = Brain(self.adim, self.sdim)
self.experiences = RingMemory(MEMORY_SIZE)
def move(self, s):
if random.random() < self.eps:
return random.randint(0, self.adim-1)
x = self.brain.choose_action(s)
return np.argmax(x[0])
def add_experience(self, exp):
self.experiences.add(exp)
if self.epsdecay:
self.eps = self.eps * EPSILON_DECAY
def learn(self):
batch = self.experiences.sample(64)
pred = self.brain.batch_prediction([exp[0] for exp in batch])
pred_ = self.brain.batch_prediction(
[exp[2] if exp[2] is not None else np.zeros((self.sdim, 1)) for exp in batch])
x = np.zeros((len(batch), self.sdim))
y = np.zeros((len(batch), self.adim))
for i in range(len(batch)):
o = batch[i]
t = pred[i]
if o[2] is None:
t[o[1]] = o[3]
else:
t[o[1]] = o[3] + G * np.amax(pred_[i])
x[i] = o[0]
y[i] = t
self.brain.train(x, y)