From 09302491e781886739dede404a74964ef20dd4e2 Mon Sep 17 00:00:00 2001 From: Patrick Virie Date: Wed, 25 Dec 2024 20:21:47 +0700 Subject: [PATCH] make epsilon more intuitive --- tasks/rl_hopper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/rl_hopper.py b/tasks/rl_hopper.py index 75ba845..69d85cb 100644 --- a/tasks/rl_hopper.py +++ b/tasks/rl_hopper.py @@ -239,7 +239,7 @@ def prepare_data_tuples(states, actions, rewards, num_layers, skip_steps): total_steps = 0 num_trials = 2000 print_steps = max(1, num_trials // 100) - epsilon = 0.2 + 0.7 * (course + 1) / num_courses + epsilon = 1 - 0.5 * (course + 1) / num_courses next_best_targets = np.zeros((len(goals), len(goals[0][0])), dtype=np.float32) next_best_target_diffs = np.ones((len(goals), 1), dtype=np.float32) * 1e4 @@ -258,7 +258,7 @@ def prepare_data_tuples(states, actions, rewards, num_layers, skip_steps): actions = [] rewards = [] for _ in range(200): - if random.random() >= epsilon or course == 0: + if random.random() <= epsilon or course == 0: selected_action = env.action_space.sample() else: a = model.react(alg.State(observation.data), stable_state)