carletonai · machine-moon · Jul 21, 2025 · Jul 21, 2025 · Jul 21, 2025 · Jul 21, 2025
diff --git a/Release_Notes.txt b/Release_Notes.txt
@@ -2,6 +2,14 @@ CuMind Release Notes
 -------------------------------------------------------------------
 Document all technical changes introduced in this release as concise bullet points below.
 
+v0.1.9 (2025-07-24)
+----------------------
+Tarek Ibrahim (68)
+- Introduced 'multi_device' option in configuration for better device management.
+- Refactored training loop in Trainer class for improved logging and clarity.
+- Added vectorized n-step return functions in jax_utils for efficiency.
+- Implemented randint method in KeyManager for random integer generation.
+
 v0.1.8 (2025-07-18)
 ----------------------
 Tarek Ibrahim (68)

diff --git a/configuration.json b/configuration.json
@@ -1,42 +1,45 @@
 {
   "CuMind": {
     "networks": {
-      "hidden_dim": 128
+      "hidden_state_dim": 128
     },
     "representation": {
       "type": "cumind.core.resnet.ResNet",
-      "num_blocks": 2,
+      "num_hidden_layers": 2,
       "conv_channels": 32,
       "seed": 42
     },
     "dynamics": {
       "type": "cumind.core.mlp.MLPWithEmbedding",
-      "num_blocks": 2,
+      "hidden_dim": 128,
+      "num_hidden_layers": 2,
       "seed": 42
     },
     "prediction": {
       "type": "cumind.core.mlp.MLPDual",
+      "hidden_dim": 128,
+      "num_hidden_layers": 2,
       "seed": 42
     },
     "memory": {
-      "type": "cumind.data.memory.MemoryBuffer",
-      "capacity": 2000,
+      "type": "cumind.data.memory.PrioritizedMemoryBuffer",
+      "capacity": 1000,
       "min_size": 100,
       "min_pct": 0.1,
-      "per_alpha": 0.6,
-      "per_epsilon": 1e-06,
-      "per_beta": 0.4
+      "alpha": 0.6,
+      "epsilon": 1e-06,
+      "beta": 0.4
     },
     "training": {
       "optimizer": "optax.adamw",
       "batch_size": 64,
-      "learning_rate": 0.01,
+      "learning_rate": 0.001,
       "weight_decay": 0.0001,
       "target_update_frequency": 250,
       "checkpoint_interval": 50,
-      "num_episodes": 1220,
+      "num_episodes": 2000,
       "train_frequency": 2,
-      "checkpoint_root_dir": "checkpoints"
+      "checkpoint_dir": "checkpoints"
     },
     "mcts": {
       "num_simulations": 25,
@@ -47,7 +50,8 @@
     "env": {
       "name": "CartPole-v1",
       "action_space_size": 2,
-      "observation_shape": [4]
+      "observation_shape": [4],
+      "max_episode_steps": 500
     },
     "selfplay": {
       "num_unroll_steps": 5,
@@ -62,11 +66,13 @@
     "logging": {
       "dir": "logs",
       "level": "INFO",
-      "console": true,
-      "timestamps": false,
-      "tqdm": false
+      "console": false,
+      "timestamps": true,
+      "tqdm": true
     },
     "device": "cpu",
-    "seed": 42
+    "seed": 42,
+    "validate": true,
+    "multi_device": false
   }
 }
diff --git a/cumind.svg b/cumind.svg
diff --git a/cumind___main__.svg b/cumind___main__.svg
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@
 # =========================
 [project]
 name = "cumind"
-version = "0.1.8"
+version = "0.1.9"
 description = "A JAX-based CuMind is a JAX-based RL framework inspired by Google DeepMind. Achieve's superhuman performance in complex domains without pretraining nor prior knowledge of their rules."
 readme = "README.md"
 requires-python = ">=3.12"

diff --git a/src/cartpole.py b/src/cartpole.py
@@ -7,10 +7,23 @@
 
 def main() -> None:
     """Main function for running the CartPole example."""
-    cfg.load("configuration.json")
-    ckpt = train()
+    timestamp, checkpoint_dir = cfg.load("configuration.json")
+
+    # Print directory information
+    print(f"Logging directory: {cfg.logging.dir}/{timestamp}/training.log")
+    print(f"Checkpoint directory: {checkpoint_dir}/")
+
+    train()
     log.info(f"Training completed in {log.elapsed()}.")
-    inference(ckpt)
+
+    # Print latest checkpoint path
+    latest_ckpt = f"{checkpoint_dir}/episode_{cfg.training.num_episodes:05d}.pkl"
+    print(f"Latest checkpoint: {latest_ckpt}")
+
+    # latest_ckpt = "checkpoints/CartPole-v1/20250720_230602/episode_00700.pkl"
+
+    log.open()
+    inference(latest_ckpt, 500)
 
 
 if __name__ == "__main__":

diff --git a/src/cumind/__init__.py b/src/cumind/__init__.py
@@ -1,6 +1,6 @@
 """CuMind: A modular reinforcement learning framework."""
 
-__version__ = "0.1.8"
+__version__ = "0.1.9"
 
 # Most commonly used components
 from .agent import Agent, inference, train