Skip to content

Commit dc81159

Browse files
bump & good params
1 parent 85cd1f0 commit dc81159

File tree

2 files changed

+6
-8
lines changed

2 files changed

+6
-8
lines changed

src/main.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,25 +22,23 @@ using TI = typename DEVICE::index_t;
2222
using PENDULUM_SPEC = MyPendulumSpecification<T, TI, MyPendulumParameters<T>>;
2323
using ENVIRONMENT = MyPendulum<PENDULUM_SPEC>;
2424
struct LOOP_CORE_PARAMETERS: rlt::rl::algorithms::ppo::loop::core::DefaultParameters<T, TI, ENVIRONMENT>{
25-
2625
static constexpr TI N_ENVIRONMENTS = 8;
2726
static constexpr TI ON_POLICY_RUNNER_STEPS_PER_ENV = 128;
2827
static constexpr TI BATCH_SIZE = 128;
29-
static constexpr TI TOTAL_STEP_LIMIT = 500000;
28+
static constexpr TI TOTAL_STEP_LIMIT = 1000000;
3029
static constexpr TI ACTOR_HIDDEN_DIM = 32;
3130
static constexpr TI CRITIC_HIDDEN_DIM = 32;
3231
static constexpr auto ACTOR_ACTIVATION_FUNCTION = rlt::nn::activation_functions::ActivationFunction::FAST_TANH;
3332
static constexpr auto CRITIC_ACTIVATION_FUNCTION = rlt::nn::activation_functions::ActivationFunction::FAST_TANH;
3433
static constexpr TI STEP_LIMIT = TOTAL_STEP_LIMIT/(ON_POLICY_RUNNER_STEPS_PER_ENV * N_ENVIRONMENTS) + 1;
3534
static constexpr TI EPISODE_STEP_LIMIT = ENVIRONMENT::EPISODE_STEP_LIMIT;
3635
struct OPTIMIZER_PARAMETERS: rlt::nn::optimizers::adam::DEFAULT_PARAMETERS_TENSORFLOW<T>{
37-
static constexpr T ALPHA = 0.01;
36+
static constexpr T ALPHA = 0.001;
3837
};
39-
38+
static constexpr bool NORMALIZE_OBSERVATIONS = true;
4039
struct PPO_PARAMETERS: rlt::rl::algorithms::ppo::DefaultParameters<T, TI, BATCH_SIZE>{
4140
static constexpr T ACTION_ENTROPY_COEFFICIENT = 0.0;
4241
static constexpr TI N_EPOCHS = 1;
43-
static constexpr bool NORMALIZE_OBSERVATIONS = true;
4442
static constexpr T GAMMA = 0.9;
4543
static constexpr T INITIAL_ACTION_STD = 2.0;
4644
};
@@ -50,7 +48,7 @@ using LOOP_CORE_CONFIG = rlt::rl::algorithms::ppo::loop::core::Config<T, TI, RNG
5048
using LOOP_EXTRACK_CONFIG = rlt::rl::loop::steps::extrack::Config<LOOP_CORE_CONFIG>; // Sets up the experiment tracking structure (https://docs.rl.tools/10-Experiment%20Tracking.html)
5149
template <typename NEXT>
5250
struct LOOP_EVAL_PARAMETERS: rlt::rl::loop::steps::evaluation::Parameters<T, TI, NEXT>{
53-
static constexpr TI EVALUATION_INTERVAL = LOOP_CORE_CONFIG::CORE_PARAMETERS::STEP_LIMIT / 5;
51+
static constexpr TI EVALUATION_INTERVAL = LOOP_CORE_CONFIG::CORE_PARAMETERS::STEP_LIMIT / 10;
5452
static constexpr TI NUM_EVALUATION_EPISODES = 10;
5553
static constexpr TI N_EVALUATIONS = NEXT::CORE_PARAMETERS::STEP_LIMIT / EVALUATION_INTERVAL;
5654
};
@@ -75,7 +73,7 @@ using LOOP_STATE = typename LOOP_CONFIG::template State<LOOP_CONFIG>;
7573

7674
int main(){
7775
DEVICE device;
78-
TI seed = 2;
76+
TI seed = 0;
7977
LOOP_STATE ls;
8078
#ifndef BENCHMARK
8179
// Set experiment tracking info

0 commit comments

Comments
 (0)