diff --git a/gymnasium/atari/.gitkeep b/gymnasium/atari/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/gymnasium/box2d/.gitkeep b/gymnasium/box2d/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/gymnasium/classic_control/Acrobot-v1/configuration.json b/gymnasium/classic_control/Acrobot-v1/configuration.json new file mode 100644 index 0000000..ae15860 --- /dev/null +++ b/gymnasium/classic_control/Acrobot-v1/configuration.json @@ -0,0 +1,54 @@ +{ + "Network architecture": { + "hidden_dim": 128, + "num_blocks": 2, + "conv_channels": 32 + }, + "Training": { + "batch_size": 64, + "learning_rate": 0.01, + "weight_decay": 0.0001, + "target_update_frequency": 250, + "checkpoint_interval": 50, + "num_episodes": 1220, + "train_frequency": 2, + "checkpoint_root_dir": "checkpoints" + }, + "MCTS": { + "num_simulations": 25, + "c_puct": 1.25, + "dirichlet_alpha": 0.25, + "exploration_fraction": 0.25 + }, + "Environment": { + "env_name": "Acrobot-v1", + "action_space_size": 3, + "observation_shape": [ + 6 + ] + }, + "Self-Play": { + "num_unroll_steps": 5, + "td_steps": 10, + "discount": 0.997 + }, + "Memory": { + "memory_capacity": 2000, + "min_memory_size": 100, + "min_memory_pct": 0.1, + "per_alpha": 0.6, + "per_epsilon": 1e-06, + "per_beta": 0.4 + }, + "Data Types": { + "model_dtype": "float32", + "action_dtype": "int32", + "target_dtype": "float32" + }, + "Device": { + "device_type": "cpu" + }, + "Other": { + "seed": 42 + } +} \ No newline at end of file diff --git a/gymnasium/classic_control/CartPole-v1/configuration.json b/gymnasium/classic_control/CartPole-v1/configuration.json new file mode 100644 index 0000000..79383e2 --- /dev/null +++ b/gymnasium/classic_control/CartPole-v1/configuration.json @@ -0,0 +1,54 @@ +{ + "Network architecture": { + "hidden_dim": 128, + "num_blocks": 2, + "conv_channels": 32 + }, + "Training": { + "batch_size": 64, + "learning_rate": 0.01, + "weight_decay": 0.0001, + "target_update_frequency": 250, + "checkpoint_interval": 50, + "num_episodes": 1220, + "train_frequency": 2, + "checkpoint_root_dir": "checkpoints" + }, + "MCTS": { + "num_simulations": 25, + "c_puct": 1.25, + "dirichlet_alpha": 0.25, + "exploration_fraction": 0.25 + }, + "Environment": { + "env_name": "CartPole-v1", + "action_space_size": 2, + "observation_shape": [ + 4 + ] + }, + "Self-Play": { + "num_unroll_steps": 5, + "td_steps": 10, + "discount": 0.997 + }, + "Memory": { + "memory_capacity": 2000, + "min_memory_size": 100, + "min_memory_pct": 0.1, + "per_alpha": 0.6, + "per_epsilon": 1e-06, + "per_beta": 0.4 + }, + "Data Types": { + "model_dtype": "float32", + "action_dtype": "int32", + "target_dtype": "float32" + }, + "Device": { + "device_type": "cpu" + }, + "Other": { + "seed": 42 + } +} \ No newline at end of file diff --git a/gymnasium/classic_control/MountainCar-v0/configuration.json b/gymnasium/classic_control/MountainCar-v0/configuration.json new file mode 100644 index 0000000..cd94895 --- /dev/null +++ b/gymnasium/classic_control/MountainCar-v0/configuration.json @@ -0,0 +1,54 @@ +{ + "Network architecture": { + "hidden_dim": 128, + "num_blocks": 2, + "conv_channels": 32 + }, + "Training": { + "batch_size": 64, + "learning_rate": 0.01, + "weight_decay": 0.0001, + "target_update_frequency": 250, + "checkpoint_interval": 50, + "num_episodes": 1220, + "train_frequency": 2, + "checkpoint_root_dir": "checkpoints" + }, + "MCTS": { + "num_simulations": 25, + "c_puct": 1.25, + "dirichlet_alpha": 0.25, + "exploration_fraction": 0.25 + }, + "Environment": { + "env_name": "MountainCar-v0", + "action_space_size": 3, + "observation_shape": [ + 2 + ] + }, + "Self-Play": { + "num_unroll_steps": 5, + "td_steps": 10, + "discount": 0.997 + }, + "Memory": { + "memory_capacity": 2000, + "min_memory_size": 100, + "min_memory_pct": 0.1, + "per_alpha": 0.6, + "per_epsilon": 1e-06, + "per_beta": 0.4 + }, + "Data Types": { + "model_dtype": "float32", + "action_dtype": "int32", + "target_dtype": "float32" + }, + "Device": { + "device_type": "cpu" + }, + "Other": { + "seed": 42 + } +} \ No newline at end of file diff --git a/gymnasium/classic_control/MountainCarContinuous-v0/configuration.json b/gymnasium/classic_control/MountainCarContinuous-v0/configuration.json new file mode 100644 index 0000000..8955252 --- /dev/null +++ b/gymnasium/classic_control/MountainCarContinuous-v0/configuration.json @@ -0,0 +1,56 @@ +{ + "Network architecture": { + "hidden_dim": 128, + "num_blocks": 2, + "conv_channels": 32 + }, + "Training": { + "batch_size": 64, + "learning_rate": 0.01, + "weight_decay": 0.0001, + "target_update_frequency": 250, + "checkpoint_interval": 50, + "num_episodes": 1220, + "train_frequency": 2, + "checkpoint_root_dir": "checkpoints" + }, + "MCTS": { + "num_simulations": 25, + "c_puct": 1.25, + "dirichlet_alpha": 0.25, + "exploration_fraction": 0.25 + }, + "Environment": { + "env_name": "MountainCarContinuous-v0", + "action_space_size": [ + 1 + ], + "observation_shape": [ + 2 + ] + }, + "Self-Play": { + "num_unroll_steps": 5, + "td_steps": 10, + "discount": 0.997 + }, + "Memory": { + "memory_capacity": 2000, + "min_memory_size": 100, + "min_memory_pct": 0.1, + "per_alpha": 0.6, + "per_epsilon": 1e-06, + "per_beta": 0.4 + }, + "Data Types": { + "model_dtype": "float32", + "action_dtype": "int32", + "target_dtype": "float32" + }, + "Device": { + "device_type": "cpu" + }, + "Other": { + "seed": 42 + } +} \ No newline at end of file diff --git a/gymnasium/classic_control/Pendulum-v1/configuration.json b/gymnasium/classic_control/Pendulum-v1/configuration.json new file mode 100644 index 0000000..104bee8 --- /dev/null +++ b/gymnasium/classic_control/Pendulum-v1/configuration.json @@ -0,0 +1,56 @@ +{ + "Network architecture": { + "hidden_dim": 128, + "num_blocks": 2, + "conv_channels": 32 + }, + "Training": { + "batch_size": 64, + "learning_rate": 0.01, + "weight_decay": 0.0001, + "target_update_frequency": 250, + "checkpoint_interval": 50, + "num_episodes": 1220, + "train_frequency": 2, + "checkpoint_root_dir": "checkpoints" + }, + "MCTS": { + "num_simulations": 25, + "c_puct": 1.25, + "dirichlet_alpha": 0.25, + "exploration_fraction": 0.25 + }, + "Environment": { + "env_name": "Pendulum-v1", + "action_space_size": [ + 1 + ], + "observation_shape": [ + 3 + ] + }, + "Self-Play": { + "num_unroll_steps": 5, + "td_steps": 10, + "discount": 0.997 + }, + "Memory": { + "memory_capacity": 2000, + "min_memory_size": 100, + "min_memory_pct": 0.1, + "per_alpha": 0.6, + "per_epsilon": 1e-06, + "per_beta": 0.4 + }, + "Data Types": { + "model_dtype": "float32", + "action_dtype": "int32", + "target_dtype": "float32" + }, + "Device": { + "device_type": "cpu" + }, + "Other": { + "seed": 42 + } +} \ No newline at end of file