Skip to content

Commit 3fc2006

Browse files
committed
Minor changes in examples to correspond with respective train.py scripts.
1 parent 8b8f9af commit 3fc2006

File tree

6 files changed

+331
-105
lines changed

6 files changed

+331
-105
lines changed

breakout/results/example.ipynb

Lines changed: 42 additions & 38 deletions
Large diffs are not rendered by default.

breakout/train.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@
5454
# OPTIMIZER : A dictionary defining the optimizer used in training.
5555
# METRICS : The file path where the metrics are saved.
5656

57-
GAMES = 10000
58-
SKIP = 6
59-
CHECKPOINT = 1000
57+
GAMES = 25000
58+
SKIP = 4
59+
CHECKPOINT = 2500
6060

6161
SHAPE = {
6262
"original": (1, 1, 210, 160),
@@ -66,34 +66,34 @@
6666

6767
DISCOUNT = 0.95
6868
GAMMA = 0.99
69-
GRADIENTS = (-10, 10)
69+
GRADIENTS = (-1, 1)
7070

7171
PUNISHMENT = -1
72-
INCENTIVE = 1
72+
INCENTIVE = 3
7373

7474
MINIBATCH = 32
7575
TRAIN_EVERY = 1
7676

7777
EXPLORATION_RATE = 1.0
78-
EXPLORATION_MIN = 0.005
79-
EXPLORATION_STEPS = 8000 // TRAIN_EVERY
78+
EXPLORATION_MIN = 0.08
79+
EXPLORATION_STEPS = 5000 // TRAIN_EVERY
8080

8181
MIN_REWARD = lambda game: 1.7 ** (game / 1000) if game <= 4000 else 10
82-
MEMORY = 250
83-
RESET_Q_EVERY = TRAIN_EVERY * 5
82+
MEMORY = 1000
83+
RESET_Q_EVERY = TRAIN_EVERY * 250
8484

8585
NETWORK = {
8686
"input_channels": 1, "outputs": 4,
8787
"channels": [32, 64, 64],
8888
"kernels": [8, 4, 3],
8989
"padding": ["valid", "valid", "valid"],
9090
"strides": [4, 2, 1],
91-
"nodes": [512],
91+
"nodes": [1024, 512],
9292
}
9393
OPTIMIZER = {
94-
"optimizer": torch.optim.RMSprop,
94+
"optimizer": torch.optim.Adam,
9595
"lr": 0.0001,
96-
"hyperparameters": {}
96+
"hyperparameters": {"eps": 1.5e-4}
9797
}
9898

9999
METRICS = "./output/metrics.csv"

enduro/results/example.ipynb

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,7 @@
101101
" \"strides\": [4, 2, 1],\n",
102102
" \"nodes\": [512],\n",
103103
"}\n",
104-
"optimizer = {\n",
105-
" \"optimizer\": torch.optim.RMSprop,\n",
106-
" \"lr\": 0.0001,\n",
107-
" \"hyperparameters\": {}\n",
108-
"}\n",
104+
"optimizer = {\"optimizer\": torch.optim.RMSprop, \"lr\": 0.0001}\n",
109105
"\n",
110106
"agent = Agent(\n",
111107
" network=network,\n",

help/visualisation/preprocessing.ipynb

Lines changed: 14 additions & 16 deletions
Large diffs are not rendered by default.

tetris/DQN.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ def reward(self, state, reward):
332332
state = self.preprocess(state)
333333

334334
height = 0
335-
for i, row in enumerate(state.__reversed__()):
335+
for i, row in enumerate(reversed(state)):
336336
if all(row == -1):
337337
height = state.shape[0] - i
338338
break

tetris/results/example.ipynb

Lines changed: 261 additions & 33 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)