diff --git a/tasks/rl_hopper.py b/tasks/rl_hopper.py index fbdc4c9..4a06c42 100644 --- a/tasks/rl_hopper.py +++ b/tasks/rl_hopper.py @@ -258,7 +258,7 @@ def prepare_data_tuples(states, actions, rewards, num_layers, skip_steps): actions = [] rewards = [] - for j in range(1000): + for j in range(200): if random.random() <= epsilon or course == 0: selected_action = env.action_space.sample() # quantize to -1 0 1