hallvardnmbu
diff --git a/‎breakout/results/example.ipynb
Lines changed: 42 additions & 38 deletions b/‎breakout/results/example.ipynb
Lines changed: 42 additions & 38 deletions
diff --git a/‎breakout/train.py
Lines changed: 12 additions & 12 deletions b/‎breakout/train.py
Lines changed: 12 additions & 12 deletions
diff --git a/‎enduro/results/example.ipynb
Lines changed: 1 addition & 5 deletions b/‎enduro/results/example.ipynb
Lines changed: 1 addition & 5 deletions
diff --git a/‎help/visualisation/preprocessing.ipynb
Lines changed: 14 additions & 16 deletions b/‎help/visualisation/preprocessing.ipynb
Lines changed: 14 additions & 16 deletions
diff --git a/‎tetris/DQN.py
Lines changed: 1 addition & 1 deletion b/‎tetris/DQN.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎tetris/results/example.ipynb
Lines changed: 261 additions & 33 deletions b/‎tetris/results/example.ipynb
Lines changed: 261 additions & 33 deletions
@@ -54,9 +54,9 @@
 # OPTIMIZER : A dictionary defining the optimizer used in training.
 # METRICS : The file path where the metrics are saved.
 
-GAMES = 10000
-SKIP = 6
-CHECKPOINT = 1000
+GAMES = 25000
+SKIP = 4
+CHECKPOINT = 2500
 
 SHAPE = {
     "original": (1, 1, 210, 160),
@@ -66,34 +66,34 @@
 
 DISCOUNT = 0.95
 GAMMA = 0.99
-GRADIENTS = (-10, 10)
+GRADIENTS = (-1, 1)
 
 PUNISHMENT = -1
-INCENTIVE = 1
+INCENTIVE = 3
 
 MINIBATCH = 32
 TRAIN_EVERY = 1
 
 EXPLORATION_RATE = 1.0
-EXPLORATION_MIN = 0.005
-EXPLORATION_STEPS = 8000 // TRAIN_EVERY
+EXPLORATION_MIN = 0.08
+EXPLORATION_STEPS = 5000 // TRAIN_EVERY
 
 MIN_REWARD = lambda game: 1.7 ** (game / 1000) if game <= 4000 else 10
-MEMORY = 250
-RESET_Q_EVERY = TRAIN_EVERY * 5
+MEMORY = 1000
+RESET_Q_EVERY = TRAIN_EVERY * 250
 
 NETWORK = {
     "input_channels": 1, "outputs": 4,
     "channels": [32, 64, 64],
     "kernels": [8, 4, 3],
     "padding": ["valid", "valid", "valid"],
     "strides": [4, 2, 1],
-    "nodes": [512],
+    "nodes": [1024, 512],
 }
 OPTIMIZER = {
-    "optimizer": torch.optim.RMSprop,
+    "optimizer": torch.optim.Adam,
     "lr": 0.0001,
-    "hyperparameters": {}
+    "hyperparameters": {"eps": 1.5e-4}
 }
 
 METRICS = "./output/metrics.csv"
 
@@ -101,11 +101,7 @@
     "    \"strides\": [4, 2, 1],\n",
     "    \"nodes\": [512],\n",
     "}\n",
-    "optimizer = {\n",
-    "    \"optimizer\": torch.optim.RMSprop,\n",
-    "    \"lr\": 0.0001,\n",
-    "    \"hyperparameters\": {}\n",
-    "}\n",
+    "optimizer = {\"optimizer\": torch.optim.RMSprop, \"lr\": 0.0001}\n",
     "\n",
     "agent = Agent(\n",
     "    network=network,\n",
 
@@ -332,7 +332,7 @@ def reward(self, state, reward):
         state = self.preprocess(state)
 
         height = 0
-        for i, row in enumerate(state.__reversed__()):
+        for i, row in enumerate(reversed(state)):
             if all(row == -1):
                 height = state.shape[0] - i
                 break