diff --git a/tasks/rl_hopper.py b/tasks/rl_hopper.py index 5d5f195..1c00343 100644 --- a/tasks/rl_hopper.py +++ b/tasks/rl_hopper.py @@ -162,7 +162,7 @@ def setup(): def train(context, parameter_path): course = context.course - num_courses = 2 + num_courses = 3 if course >= num_courses: logging.info("Experiment already completed") @@ -285,7 +285,7 @@ def prepare_data_tuples(states, actions, rewards, num_layers, skip_steps): total_steps = 0 num_trials = 2000 print_steps = max(1, num_trials // 100) - epsilon = 0.8 - 0.7 * (course + 1) / num_courses + epsilon = 0.8 - 0.6 * (course + 1) / num_courses course_statistics = {}