Skip to content

Commit 04baa57

Browse files
adding benchmark target; updating ui; bumping rlt
1 parent 9a19e5b commit 04baa57

File tree

4 files changed

+40
-23
lines changed

4 files changed

+40
-23
lines changed

CMakeLists.txt

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
#set(RL_TOOLS_BACKEND_ENABLE_ACCELERATE ON) # if you are on macOS (fastest on Apple Silicon)
44
add_subdirectory(external/rl_tools)
55

6-
add_executable(my_pendulum
7-
src/main.cpp
8-
)
9-
#target_compile_definitions(my_pendulum PRIVATE BENCHMARK)
6+
add_executable(my_pendulum src/main.cpp)
107
target_link_libraries(my_pendulum PRIVATE RLtools::RLtools)
118

9+
# The following target disables evaluations and checkpointing during training to assess the training time
10+
add_executable(my_pendulum_benchmark src/main.cpp)
11+
target_compile_definitions(my_pendulum_benchmark PRIVATE BENCHMARK)
12+
target_link_libraries(my_pendulum_benchmark PRIVATE RLtools::RLtools)
13+
1214

1315

1416
if(NOT MSVC AND CMAKE_BUILD_TYPE STREQUAL "Release")

external/rl_tools

Submodule rl_tools updated 286 files

include/my_pendulum/operations_cpu.h

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,32 @@
1+
#include <string>
2+
13
namespace rl_tools{
2-
template<typename DEVICE, typename SPEC>
3-
std::string json(DEVICE& device, const MyPendulum<SPEC>& env, const typename MyPendulum<SPEC>::Parameters& parameters){
4+
template <typename DEVICE, typename SPEC>
5+
std::string json(DEVICE&, MyPendulum<SPEC>& env, typename MyPendulum<SPEC>::Parameters& parameters){
46
return "{}";
57
}
6-
7-
template<typename DEVICE, typename SPEC>
8-
std::string json(DEVICE& device, const MyPendulum<SPEC>& env, const typename MyPendulum<SPEC>::Parameters& parameters, const typename MyPendulum<SPEC>::State& state){
8+
template <typename DEVICE, typename SPEC>
9+
std::string json(DEVICE&, MyPendulum<SPEC>& env, typename MyPendulum<SPEC>::Parameters& parameters, typename MyPendulum<SPEC>::State& state){
910
std::string json = "{";
1011
json += "\"theta\":" + std::to_string(state.theta) + ",";
1112
json += "\"theta_dot\":" + std::to_string(state.theta_dot);
1213
json += "}";
1314
return json;
1415
}
16+
1517
template <typename DEVICE, typename SPEC>
1618
std::string get_ui(DEVICE& device, MyPendulum<SPEC>& env){
17-
// just the body of `function render(ctx, state, action) {` (so that it can be easily processed by `new Function("ctx", "state", "action", body)`
19+
// Implement the functions `export async function render(ui_state, parameters, state, action)` and `export async function init(canvas, parameters, options)` and `export` them so that they are available as ES6 imports
1820
// Please have a look at https://studio.rl.tools which helps you create render functions interactively
1921
std::string ui = R"RL_TOOLS_LITERAL(
22+
export async function init(canvas, options){
23+
// Simply saving the context for 2D environments
24+
return {
25+
ctx: canvas.getContext('2d')
26+
}
27+
}
28+
export async function render(ui_state, parameters, state, action) {
29+
const ctx = ui_state.ctx
2030
ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height);
2131
2232
const centerX = ctx.canvas.width / 2;
@@ -88,8 +98,8 @@ namespace rl_tools{
8898
ctx.lineTo(arrowX, arrowY);
8999
ctx.fillStyle = 'black';
90100
ctx.fill();
101+
}
91102
)RL_TOOLS_LITERAL";
92103
return ui;
93104
}
94-
95-
}
105+
}

src/main.cpp

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,27 +22,32 @@ using TI = typename DEVICE::index_t;
2222
using PENDULUM_SPEC = MyPendulumSpecification<T, TI, MyPendulumParameters<T>>;
2323
using ENVIRONMENT = MyPendulum<PENDULUM_SPEC>;
2424
struct LOOP_CORE_PARAMETERS: rlt::rl::algorithms::ppo::loop::core::DefaultParameters<T, TI, ENVIRONMENT>{
25-
struct PPO_PARAMETERS: rlt::rl::algorithms::ppo::DefaultParameters<T, TI>{
26-
static constexpr T ACTION_ENTROPY_COEFFICIENT = 0.0;
27-
static constexpr TI N_EPOCHS = 2;
28-
};
29-
25+
static constexpr TI BATCH_SIZE = 256;
26+
static constexpr TI ACTOR_HIDDEN_DIM = 64;
27+
static constexpr TI CRITIC_HIDDEN_DIM = 64;
28+
static constexpr TI ON_POLICY_RUNNER_STEPS_PER_ENV = 1024;
3029
static constexpr TI N_ENVIRONMENTS = 4;
31-
static constexpr TI ON_POLICY_RUNNER_STEPS_PER_ENV = 256;
32-
static constexpr TI BATCH_SIZE = 64;
3330
static constexpr TI TOTAL_STEP_LIMIT = 300000;
3431
static constexpr TI STEP_LIMIT = TOTAL_STEP_LIMIT/(ON_POLICY_RUNNER_STEPS_PER_ENV * N_ENVIRONMENTS) + 1;
3532
static constexpr TI EPISODE_STEP_LIMIT = 200;
33+
using OPTIMIZER_PARAMETERS = rlt::nn::optimizers::adam::DEFAULT_PARAMETERS_PYTORCH<T>;
34+
struct PPO_PARAMETERS: rlt::rl::algorithms::ppo::DefaultParameters<T, TI, BATCH_SIZE>{
35+
static constexpr T ACTION_ENTROPY_COEFFICIENT = 0.0;
36+
static constexpr TI N_EPOCHS = 2;
37+
static constexpr T GAMMA = 0.9;
38+
static constexpr T INITIAL_ACTION_STD = 2.0;
39+
static constexpr bool NORMALIZE_OBSERVATIONS = true;
40+
};
3641
};
3742
using LOOP_CORE_CONFIG = rlt::rl::algorithms::ppo::loop::core::Config<T, TI, RNG, ENVIRONMENT, LOOP_CORE_PARAMETERS>;
43+
#ifndef BENCHMARK
44+
using LOOP_EXTRACK_CONFIG = rlt::rl::loop::steps::extrack::Config<LOOP_CORE_CONFIG>; // Sets up the experiment tracking structure (https://docs.rl.tools/10-Experiment%20Tracking.html)
3845
template <typename NEXT>
3946
struct LOOP_EVAL_PARAMETERS: rlt::rl::loop::steps::evaluation::Parameters<T, TI, NEXT>{
4047
static constexpr TI EVALUATION_INTERVAL = 4;
4148
static constexpr TI NUM_EVALUATION_EPISODES = 10;
4249
static constexpr TI N_EVALUATIONS = NEXT::CORE_PARAMETERS::STEP_LIMIT / EVALUATION_INTERVAL;
4350
};
44-
#ifndef BENCHMARK
45-
using LOOP_EXTRACK_CONFIG = rlt::rl::loop::steps::extrack::Config<LOOP_CORE_CONFIG>; // Sets up the experiment tracking structure (https://docs.rl.tools/10-Experiment%20Tracking.html)
4651
using LOOP_EVALUATION_CONFIG = rlt::rl::loop::steps::evaluation::Config<LOOP_EXTRACK_CONFIG, LOOP_EVAL_PARAMETERS<LOOP_EXTRACK_CONFIG>>; // Evaluates the policy in a fixed interval and logs the return
4752
struct LOOP_SAVE_TRAJECTORIES_PARAMETERS: rlt::rl::loop::steps::save_trajectories::Parameters<T, TI, LOOP_EVALUATION_CONFIG>{
4853
static constexpr TI INTERVAL_TEMP = LOOP_CORE_CONFIG::CORE_PARAMETERS::STEP_LIMIT / 10;
@@ -84,5 +89,5 @@ int main(){
8489
}
8590
auto end_time = std::chrono::high_resolution_clock::now();
8691
std::chrono::duration<double> diff = end_time-start_time;
87-
std::cout << "Training time: " << diff.count() << std::endl;
92+
std::cout << "Training time: " << diff.count() << " s" << std::endl;
8893
}

0 commit comments

Comments
 (0)