Skip to content

Commit 1f36b9e

Browse files
bumping to RLtools v2.2
1 parent dceb64e commit 1f36b9e

File tree

3 files changed

+14
-14
lines changed

3 files changed

+14
-14
lines changed

CMakeLists.txt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
cmake_minimum_required(VERSION 3.10)
22

33
project(rl-tools-example)
4-
#set(RL_TOOLS_BACKEND_ENABLE_MKL ON) # if you have MKL installed (fastest on Intel)
5-
#set(RL_TOOLS_BACKEND_ENABLE_OPENBLAS ON) # if you have OpenBLAS installed
6-
#set(RL_TOOLS_BACKEND_ENABLE_ACCELERATE ON) # if you are on macOS (fastest on Apple Silicon)
74
add_subdirectory(external/rl_tools)
85

96
set(FETCHCONTENT_UPDATES_DISCONNECTED ON)

external/rl_tools

Submodule rl_tools updated 727 files

src/main.cpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
#include <rl_tools/operations/cpu_mux.h>
2-
#include <rl_tools/nn/operations_cpu_mux.h>
3-
#include <rl_tools/nn_models/operations_cpu.h>
42

53
#include "../include/my_pendulum/my_pendulum.h"
64
#include "../include/my_pendulum/operations_generic.h"
75
#include "../include/my_pendulum/operations_cpu.h" // JSON conversion functions for the rl::loop::steps::save_trajectories step (stored according to the experiment tracking specification: https://docs.rl.tools/10-Experiment%20Tracking.html)
86

7+
#include <rl_tools/nn/optimizers/adam/instance/operations_generic.h>
8+
#include <rl_tools/nn/operations_cpu_mux.h>
9+
#include <rl_tools/nn_models/operations_cpu.h>
10+
911
#include <rl_tools/rl/algorithms/ppo/loop/core/config.h>
1012
#include <rl_tools/rl/algorithms/ppo/loop/core/operations_generic.h>
1113
#include <rl_tools/rl/loop/steps/extrack/operations_cpu.h>
@@ -16,12 +18,13 @@ namespace rlt = rl_tools;
1618

1719

1820
using DEVICE = rlt::devices::DEVICE_FACTORY<>;
19-
using RNG = decltype(rlt::random::default_engine(typename DEVICE::SPEC::RANDOM{}));
21+
using RNG = DEVICE::SPEC::RANDOM::ENGINE<>;
2022
using T = float;
23+
using TYPE_POLICY = rlt::numeric_types::Policy<T>;
2124
using TI = typename DEVICE::index_t;
2225
using PENDULUM_SPEC = MyPendulumSpecification<T, TI, MyPendulumParameters<T>>;
2326
using ENVIRONMENT = MyPendulum<PENDULUM_SPEC>;
24-
struct LOOP_CORE_PARAMETERS: rlt::rl::algorithms::ppo::loop::core::DefaultParameters<T, TI, ENVIRONMENT>{
27+
struct LOOP_CORE_PARAMETERS: rlt::rl::algorithms::ppo::loop::core::DefaultParameters<TYPE_POLICY, TI, ENVIRONMENT>{
2528
static constexpr TI N_ENVIRONMENTS = 8;
2629
static constexpr TI ON_POLICY_RUNNER_STEPS_PER_ENV = 128;
2730
static constexpr TI BATCH_SIZE = 128;
@@ -32,28 +35,28 @@ struct LOOP_CORE_PARAMETERS: rlt::rl::algorithms::ppo::loop::core::DefaultParame
3235
static constexpr auto CRITIC_ACTIVATION_FUNCTION = rlt::nn::activation_functions::ActivationFunction::FAST_TANH;
3336
static constexpr TI STEP_LIMIT = TOTAL_STEP_LIMIT/(ON_POLICY_RUNNER_STEPS_PER_ENV * N_ENVIRONMENTS) + 1;
3437
static constexpr TI EPISODE_STEP_LIMIT = ENVIRONMENT::EPISODE_STEP_LIMIT;
35-
struct OPTIMIZER_PARAMETERS: rlt::nn::optimizers::adam::DEFAULT_PARAMETERS_TENSORFLOW<T>{
38+
struct OPTIMIZER_PARAMETERS: rlt::nn::optimizers::adam::DEFAULT_PARAMETERS_TENSORFLOW<TYPE_POLICY>{
3639
static constexpr T ALPHA = 0.001;
3740
};
3841
static constexpr bool NORMALIZE_OBSERVATIONS = true;
39-
struct PPO_PARAMETERS: rlt::rl::algorithms::ppo::DefaultParameters<T, TI, BATCH_SIZE>{
42+
struct PPO_PARAMETERS: rlt::rl::algorithms::ppo::DefaultParameters<TYPE_POLICY, TI, BATCH_SIZE>{
4043
static constexpr T ACTION_ENTROPY_COEFFICIENT = 0.0;
4144
static constexpr TI N_EPOCHS = 1;
4245
static constexpr T GAMMA = 0.9;
4346
static constexpr T INITIAL_ACTION_STD = 2.0;
4447
};
4548
};
46-
using LOOP_CORE_CONFIG = rlt::rl::algorithms::ppo::loop::core::Config<T, TI, RNG, ENVIRONMENT, LOOP_CORE_PARAMETERS>;
49+
using LOOP_CORE_CONFIG = rlt::rl::algorithms::ppo::loop::core::Config<TYPE_POLICY, TI, RNG, ENVIRONMENT, LOOP_CORE_PARAMETERS>;
4750
#ifndef BENCHMARK
4851
using LOOP_EXTRACK_CONFIG = rlt::rl::loop::steps::extrack::Config<LOOP_CORE_CONFIG>; // Sets up the experiment tracking structure (https://docs.rl.tools/10-Experiment%20Tracking.html)
4952
template <typename NEXT>
50-
struct LOOP_EVAL_PARAMETERS: rlt::rl::loop::steps::evaluation::Parameters<T, TI, NEXT>{
53+
struct LOOP_EVAL_PARAMETERS: rlt::rl::loop::steps::evaluation::Parameters<TYPE_POLICY, TI, NEXT>{
5154
static constexpr TI EVALUATION_INTERVAL = LOOP_CORE_CONFIG::CORE_PARAMETERS::STEP_LIMIT / 10;
5255
static constexpr TI NUM_EVALUATION_EPISODES = 10;
5356
static constexpr TI N_EVALUATIONS = NEXT::CORE_PARAMETERS::STEP_LIMIT / EVALUATION_INTERVAL;
5457
};
5558
using LOOP_EVALUATION_CONFIG = rlt::rl::loop::steps::evaluation::Config<LOOP_EXTRACK_CONFIG, LOOP_EVAL_PARAMETERS<LOOP_EXTRACK_CONFIG>>; // Evaluates the policy in a fixed interval and logs the return
56-
struct LOOP_SAVE_TRAJECTORIES_PARAMETERS: rlt::rl::loop::steps::save_trajectories::Parameters<T, TI, LOOP_EVALUATION_CONFIG>{
59+
struct LOOP_SAVE_TRAJECTORIES_PARAMETERS: rlt::rl::loop::steps::save_trajectories::Parameters<TYPE_POLICY, TI, LOOP_EVALUATION_CONFIG>{
5760
static constexpr TI INTERVAL_TEMP = LOOP_CORE_CONFIG::CORE_PARAMETERS::STEP_LIMIT / 3;
5861
static constexpr TI INTERVAL = INTERVAL_TEMP == 0 ? 1 : INTERVAL_TEMP;
5962
static constexpr TI NUM_EPISODES = 10;
@@ -77,7 +80,7 @@ int main(){
7780
LOOP_STATE ls;
7881
#ifndef BENCHMARK
7982
// Set experiment tracking info
80-
ls.extrack_name = "example";
83+
ls.extrack_config.name = "example";
8184
#endif
8285
rlt::malloc(device, ls);
8386
rlt::init(device, ls, seed);

0 commit comments

Comments
 (0)