@@ -22,25 +22,23 @@ using TI = typename DEVICE::index_t;
2222using PENDULUM_SPEC = MyPendulumSpecification<T, TI, MyPendulumParameters<T>>;
2323using ENVIRONMENT = MyPendulum<PENDULUM_SPEC>;
2424struct LOOP_CORE_PARAMETERS : rlt::rl::algorithms::ppo::loop::core::DefaultParameters<T, TI, ENVIRONMENT>{
25-
2625 static constexpr TI N_ENVIRONMENTS = 8 ;
2726 static constexpr TI ON_POLICY_RUNNER_STEPS_PER_ENV = 128 ;
2827 static constexpr TI BATCH_SIZE = 128 ;
29- static constexpr TI TOTAL_STEP_LIMIT = 500000 ;
28+ static constexpr TI TOTAL_STEP_LIMIT = 1000000 ;
3029 static constexpr TI ACTOR_HIDDEN_DIM = 32 ;
3130 static constexpr TI CRITIC_HIDDEN_DIM = 32 ;
3231 static constexpr auto ACTOR_ACTIVATION_FUNCTION = rlt::nn::activation_functions::ActivationFunction::FAST_TANH;
3332 static constexpr auto CRITIC_ACTIVATION_FUNCTION = rlt::nn::activation_functions::ActivationFunction::FAST_TANH;
3433 static constexpr TI STEP_LIMIT = TOTAL_STEP_LIMIT/(ON_POLICY_RUNNER_STEPS_PER_ENV * N_ENVIRONMENTS) + 1 ;
3534 static constexpr TI EPISODE_STEP_LIMIT = ENVIRONMENT::EPISODE_STEP_LIMIT;
3635 struct OPTIMIZER_PARAMETERS : rlt::nn::optimizers::adam::DEFAULT_PARAMETERS_TENSORFLOW<T>{
37- static constexpr T ALPHA = 0.01 ;
36+ static constexpr T ALPHA = 0.001 ;
3837 };
39-
38+ static constexpr bool NORMALIZE_OBSERVATIONS = true ;
4039 struct PPO_PARAMETERS : rlt::rl::algorithms::ppo::DefaultParameters<T, TI, BATCH_SIZE>{
4140 static constexpr T ACTION_ENTROPY_COEFFICIENT = 0.0 ;
4241 static constexpr TI N_EPOCHS = 1 ;
43- static constexpr bool NORMALIZE_OBSERVATIONS = true ;
4442 static constexpr T GAMMA = 0.9 ;
4543 static constexpr T INITIAL_ACTION_STD = 2.0 ;
4644 };
@@ -50,7 +48,7 @@ using LOOP_CORE_CONFIG = rlt::rl::algorithms::ppo::loop::core::Config<T, TI, RNG
5048using LOOP_EXTRACK_CONFIG = rlt::rl::loop::steps::extrack::Config<LOOP_CORE_CONFIG>; // Sets up the experiment tracking structure (https://docs.rl.tools/10-Experiment%20Tracking.html)
5149template <typename NEXT>
5250struct LOOP_EVAL_PARAMETERS : rlt::rl::loop::steps::evaluation::Parameters<T, TI, NEXT>{
53- static constexpr TI EVALUATION_INTERVAL = LOOP_CORE_CONFIG::CORE_PARAMETERS::STEP_LIMIT / 5 ;
51+ static constexpr TI EVALUATION_INTERVAL = LOOP_CORE_CONFIG::CORE_PARAMETERS::STEP_LIMIT / 10 ;
5452 static constexpr TI NUM_EVALUATION_EPISODES = 10 ;
5553 static constexpr TI N_EVALUATIONS = NEXT::CORE_PARAMETERS::STEP_LIMIT / EVALUATION_INTERVAL;
5654};
@@ -75,7 +73,7 @@ using LOOP_STATE = typename LOOP_CONFIG::template State<LOOP_CONFIG>;
7573
7674int main (){
7775 DEVICE device;
78- TI seed = 2 ;
76+ TI seed = 0 ;
7977 LOOP_STATE ls;
8078#ifndef BENCHMARK
8179 // Set experiment tracking info
0 commit comments