Skip to content

Commit

Permalink
revert debug comments
Browse files Browse the repository at this point in the history
  • Loading branch information
runjerry committed Feb 3, 2025
1 parent 82cce62 commit e90394a
Showing 1 changed file with 142 additions and 141 deletions.
283 changes: 142 additions & 141 deletions alf/algorithms/sac_algorithm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,147 +194,148 @@ def test_sac_algorithm(self, use_naive_parallel_network, reward_dim):
1.0, float(eval_time_step.reward.mean()), delta=0.3)


# class SACAlgorithmTestDiscrete(parameterized.TestCase, alf.test.TestCase):
# @parameterized.parameters((True, 1), (False, 3))
# def test_sac_algorithm_discrete(self, use_naive_parallel_network,
# reward_dim):
# num_env = 1
# config = TrainerConfig(
# root_dir="dummy",
# unroll_length=1,
# mini_batch_length=2,
# mini_batch_size=64,
# initial_collect_steps=500,
# whole_replay_buffer_training=False,
# clear_replay_buffer=False,
# )
# env_class = PolicyUnittestEnv
#
# steps_per_episode = 13
# env = env_class(
# num_env,
# steps_per_episode,
# action_type=ActionType.Discrete,
# reward_dim=reward_dim)
#
# eval_env = env_class(
# 100,
# steps_per_episode,
# action_type=ActionType.Discrete,
# reward_dim=reward_dim)
#
# obs_spec = env._observation_spec
# action_spec = env._action_spec
# reward_spec = env._reward_spec
#
# fc_layer_params = (10, 10)
#
# q_network = partial(
# QNetwork,
# fc_layer_params=fc_layer_params,
# use_naive_parallel_network=use_naive_parallel_network)
#
# alg2 = SacAlgorithm(
# observation_spec=obs_spec,
# action_spec=action_spec,
# reward_spec=reward_spec,
# q_network_cls=q_network,
# use_entropy_reward=(reward_dim == 1),
# epsilon_greedy=0.1,
# env=env,
# config=config,
# critic_optimizer=alf.optimizers.Adam(lr=1e-3),
# alpha_optimizer=alf.optimizers.Adam(lr=1e-2),
# debug_summaries=False,
# name="MySAC")
#
# eval_env.reset()
# for i in range(700):
# alg2.train_iter()
# if i < config.initial_collect_steps:
# continue
# eval_env.reset()
# eval_time_step = unroll(eval_env, alg2, steps_per_episode - 1)
# logging.log_every_n_seconds(
# logging.INFO,
# "%d reward=%f" % (i, float(eval_time_step.reward.mean())),
# n_seconds=1)
#
# self.assertAlmostEqual(
# 1.0, float(eval_time_step.reward.mean()), delta=0.2)
#
#
# class SACAlgorithmTestMixed(parameterized.TestCase, alf.test.TestCase):
# @parameterized.parameters((True, ), (False, ))
# def test_sac_algorithm_mixed(self, use_naive_parallel_network):
# num_env = 1
# config = TrainerConfig(
# root_dir="dummy",
# unroll_length=1,
# mini_batch_length=2,
# mini_batch_size=64,
# initial_collect_steps=500,
# whole_replay_buffer_training=False,
# clear_replay_buffer=False,
# )
# env_class = MixedPolicyUnittestEnv
#
# steps_per_episode = 13
# env = env_class(num_env, steps_per_episode)
#
# eval_env = env_class(100, steps_per_episode)
#
# obs_spec = env._observation_spec
# action_spec = env._action_spec
#
# fc_layer_params = (10, 10, 10)
#
# continuous_projection_net_ctor = partial(
# alf.networks.NormalProjectionNetwork,
# state_dependent_std=True,
# scale_distribution=True,
# std_transform=clipped_exp)
#
# actor_network = partial(
# ActorDistributionNetwork,
# fc_layer_params=fc_layer_params,
# continuous_projection_net_ctor=continuous_projection_net_ctor)
#
# q_network = partial(
# QNetwork,
# preprocessing_combiner=NestConcat(),
# fc_layer_params=fc_layer_params,
# use_naive_parallel_network=use_naive_parallel_network)
#
# alg2 = SacAlgorithm(
# observation_spec=obs_spec,
# action_spec=action_spec,
# actor_network_cls=actor_network,
# q_network_cls=q_network,
# epsilon_greedy=0.1,
# env=env,
# config=config,
# actor_optimizer=alf.optimizers.Adam(lr=1e-2),
# critic_optimizer=alf.optimizers.Adam(lr=1e-2),
# alpha_optimizer=alf.optimizers.Adam(lr=1e-2),
# debug_summaries=False,
# name="MySAC")
#
# eval_env.reset()
# for i in range(700):
# alg2.train_iter()
# if i < config.initial_collect_steps:
# continue
#
# eval_env.reset()
# eval_time_step = unroll(eval_env, alg2, steps_per_episode - 1)
# logging.log_every_n_seconds(
# logging.INFO,
# "%d reward=%f" % (i, float(eval_time_step.reward.mean())),
# n_seconds=1)
#
# self.assertAlmostEqual(
# 1.0, float(eval_time_step.reward.mean()), delta=0.2)
class SACAlgorithmTestDiscrete(parameterized.TestCase, alf.test.TestCase):
@parameterized.parameters((True, 1), (False, 3))
def test_sac_algorithm_discrete(self, use_naive_parallel_network,
reward_dim):
num_env = 1
config = TrainerConfig(
root_dir="dummy",
unroll_length=1,
mini_batch_length=2,
mini_batch_size=64,
initial_collect_steps=500,
whole_replay_buffer_training=False,
clear_replay_buffer=False,
)
env_class = PolicyUnittestEnv

steps_per_episode = 13
env = env_class(
num_env,
steps_per_episode,
action_type=ActionType.Discrete,
reward_dim=reward_dim)

eval_env = env_class(
100,
steps_per_episode,
action_type=ActionType.Discrete,
reward_dim=reward_dim)

obs_spec = env._observation_spec
action_spec = env._action_spec
reward_spec = env._reward_spec

fc_layer_params = (10, 10)

q_network = partial(
QNetwork,
fc_layer_params=fc_layer_params,
use_naive_parallel_network=use_naive_parallel_network)

alg2 = SacAlgorithm(
observation_spec=obs_spec,
action_spec=action_spec,
reward_spec=reward_spec,
q_network_cls=q_network,
use_entropy_reward=(reward_dim == 1),
epsilon_greedy=0.1,
env=env,
config=config,
critic_optimizer=alf.optimizers.Adam(lr=1e-3),
alpha_optimizer=alf.optimizers.Adam(lr=1e-2),
debug_summaries=False,
name="MySAC")

eval_env.reset()
for i in range(700):
alg2.train_iter()
if i < config.initial_collect_steps:
continue
eval_env.reset()
eval_time_step = unroll(eval_env, alg2, steps_per_episode - 1)
logging.log_every_n_seconds(
logging.INFO,
"%d reward=%f" % (i, float(eval_time_step.reward.mean())),
n_seconds=1)

self.assertAlmostEqual(
1.0, float(eval_time_step.reward.mean()), delta=0.2)


class SACAlgorithmTestMixed(parameterized.TestCase, alf.test.TestCase):
@parameterized.parameters((True, ), (False, ))
def test_sac_algorithm_mixed(self, use_naive_parallel_network):
num_env = 1
config = TrainerConfig(
root_dir="dummy",
unroll_length=1,
mini_batch_length=2,
mini_batch_size=64,
initial_collect_steps=500,
whole_replay_buffer_training=False,
clear_replay_buffer=False,
)
env_class = MixedPolicyUnittestEnv

steps_per_episode = 13
env = env_class(num_env, steps_per_episode)

eval_env = env_class(100, steps_per_episode)

obs_spec = env._observation_spec
action_spec = env._action_spec

fc_layer_params = (10, 10, 10)

continuous_projection_net_ctor = partial(
alf.networks.NormalProjectionNetwork,
state_dependent_std=True,
scale_distribution=True,
std_transform=clipped_exp)

actor_network = partial(
ActorDistributionNetwork,
fc_layer_params=fc_layer_params,
continuous_projection_net_ctor=continuous_projection_net_ctor)

q_network = partial(
QNetwork,
preprocessing_combiner=NestConcat(),
fc_layer_params=fc_layer_params,
use_naive_parallel_network=use_naive_parallel_network)

alg2 = SacAlgorithm(
observation_spec=obs_spec,
action_spec=action_spec,
actor_network_cls=actor_network,
q_network_cls=q_network,
epsilon_greedy=0.1,
env=env,
config=config,
actor_optimizer=alf.optimizers.Adam(lr=1e-2),
critic_optimizer=alf.optimizers.Adam(lr=1e-2),
alpha_optimizer=alf.optimizers.Adam(lr=1e-2),
debug_summaries=False,
name="MySAC")

eval_env.reset()
for i in range(700):
alg2.train_iter()
if i < config.initial_collect_steps:
continue

eval_env.reset()
eval_time_step = unroll(eval_env, alg2, steps_per_episode - 1)
logging.log_every_n_seconds(
logging.INFO,
"%d reward=%f" % (i, float(eval_time_step.reward.mean())),
n_seconds=1)

self.assertAlmostEqual(
1.0, float(eval_time_step.reward.mean()), delta=0.2)


if __name__ == '__main__':
alf.test.main()

0 comments on commit e90394a

Please sign in to comment.