From 4ad053af306c31fb8d4c30009f67f5bdd89a7e85 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Thu, 24 Nov 2022 02:56:05 -0800 Subject: [PATCH 01/44] goal extension for brax walkers with language option --- carl/envs/brax/__init__.py | 4 + .../envs/brax/brax_target_language_wrapper.py | 33 ++++ carl/envs/brax/brax_walker_goal_wrapper.py | 89 ++++++++++ .../brax/walker_language_context_sampling.py | 42 +++++ carl/envs/carl_env.py | 8 +- test/test_language_goals.py | 164 ++++++++++++++++++ 6 files changed, 337 insertions(+), 3 deletions(-) create mode 100644 carl/envs/brax/brax_target_language_wrapper.py create mode 100644 carl/envs/brax/brax_walker_goal_wrapper.py create mode 100644 carl/envs/brax/walker_language_context_sampling.py create mode 100644 test/test_language_goals.py diff --git a/carl/envs/brax/__init__.py b/carl/envs/brax/__init__.py index eee221fb..ce7a6678 100644 --- a/carl/envs/brax/__init__.py +++ b/carl/envs/brax/__init__.py @@ -18,3 +18,7 @@ from carl.envs.brax.carl_ur5e import CONTEXT_BOUNDS as CARLUr5e_bounds from carl.envs.brax.carl_ur5e import DEFAULT_CONTEXT as CARLUr5e_defaults from carl.envs.brax.carl_ur5e import CARLUr5e + +from carl.envs.brax.brax_target_language_wrapper import BraxLanguageWrapper +from carl.envs.brax.brax_walker_goal_wrapper import BraxWalkerGoalWrapper +from carl.envs.brax.walker_language_context_sampling import sample_walker_language_goals diff --git a/carl/envs/brax/brax_target_language_wrapper.py b/carl/envs/brax/brax_target_language_wrapper.py new file mode 100644 index 00000000..aff7c8e2 --- /dev/null +++ b/carl/envs/brax/brax_target_language_wrapper.py @@ -0,0 +1,33 @@ +import gym + + +class BraxLanguageWrapper(gym.Wrapper): + """Translates the context features target distance and target radius into language""" + + def __init__(self, env) -> None: + super().__init__(env) + + def reset(self, return_info=False): + state, info = self.env.reset(info=True) + goal_str = self.get_goal_desc(info["context"]) + extended_state = {"env_state": state, "goal": goal_str} + if return_info: + return extended_state, info + else: + return extended_state + + def step(self, action): + state, reward, done, info = self.env.step(action) + goal_str = self.get_goal_desc(info["context"]) + extended_state = {"env_state": state, "goal": goal_str} + return extended_state, reward, done, info + + def get_goal_desc(self, context): + if "target_radius" in context.keys(): + target_distance = context["target_distance"] + target_radius = context["target_radius"] + return f"The distance to the goal is {target_distance} steps. Move within {target_radius} steps of the goal." + else: + target_distance = context["target_distance"] + target_direction = context["target_direction"] + return f"Move {target_distance} steps {target_direction}." diff --git a/carl/envs/brax/brax_walker_goal_wrapper.py b/carl/envs/brax/brax_walker_goal_wrapper.py new file mode 100644 index 00000000..e37ad555 --- /dev/null +++ b/carl/envs/brax/brax_walker_goal_wrapper.py @@ -0,0 +1,89 @@ +import gym +import numpy as np + +STATE_INDICES = { + "CARLAnt": [13, 14], + "CARLHumanoid": [22, 23], + "CARLHalfcheetah": [14, 15], +} + + +class BraxWalkerGoalWrapper(gym.Wrapper): + """Adds a positional goal to brax walker envs""" + + def __init__(self, env) -> None: + super().__init__(env) + if ( + self.env.__class__.__name__ == "CARLHumanoid" + or self.env.__class__.__name__ == "CARLHalfcheetah" + ): + self.env._forward_reward_weight = 0 + self.position = None + self.goal_position = None + self.direction_values = { + 3: [0, -1], + 1: [0, 1], + 2: [1, 0], + 4: [-1, 0], + 34: [-np.sqrt(0.5), -np.sqrt(0.5)], + 14: [-np.sqrt(0.5), np.sqrt(0.5)], + 32: [np.sqrt(0.5), -np.sqrt(0.5)], + 12: [np.sqrt(0.5), np.sqrt(0.5)], + 334: [ + -np.cos(22.5 * np.pi / 180), + -np.sin(22.5 * np.pi / 180), + ], + 434: [ + -np.sin(22.5 * np.pi / 180), + -np.cos(22.5 * np.pi / 180), + ], + 114: [ + -np.cos(22.5 * np.pi / 180), + np.sin(22.5 * np.pi / 180), + ], + 414: [ + -np.sin(22.5 * np.pi / 180), + np.cos(22.5 * np.pi / 180), + ], + 332: [ + np.cos(22.5 * np.pi / 180), + -np.sin(22.5 * np.pi / 180), + ], + 232: [ + np.sin(22.5 * np.pi / 180), + -np.cos(22.5 * np.pi / 180), + ], + 112: [ + np.cos(22.5 * np.pi / 180), + np.sin(22.5 * np.pi / 180), + ], + 212: [np.sin(22.5 * np.pi / 180), np.cos(22.5 * np.pi / 180)], + } + + def reset(self, return_info=False): + state, info = self.env.reset(info=True) + self.position = (0, 0) + self.goal_position = ( + np.array(self.direction_values[self.context["target_direction"]]) + * self.context["target_distance"] + ) + if return_info: + return state, info + else: + return state + + def step(self, action): + state, reward, done, info = self.env.step(action) + indices = STATE_INDICES[self.env.__class__.__name__] + new_position = np.array(list(self.position)) + np.array( + [state[indices[0]], state[indices[1]]] + ) + current_distance_to_goal = np.linalg.norm(self.goal_position - new_position) + previous_distance_to_goal = np.linalg.norm(self.goal_position - self.position) + direction_reward = max(0, previous_distance_to_goal - current_distance_to_goal) + if self.env.__class__.__name__ == "CARLAnt": + # Since we can't set the forward reward to 0 here, we simply increase the reward range + direction_reward = direction_reward * 10 + augmented_reward = reward + direction_reward + self.position = new_position + return state, augmented_reward, done, info diff --git a/carl/envs/brax/walker_language_context_sampling.py b/carl/envs/brax/walker_language_context_sampling.py new file mode 100644 index 00000000..2d24bc52 --- /dev/null +++ b/carl/envs/brax/walker_language_context_sampling.py @@ -0,0 +1,42 @@ +import numpy as np +from carl.utils.types import Context, Contexts + + +def sample_walker_language_goals( + num_contexts, low=5, high=2500, normal=False, mean=25000, std=0.1 +): + directions = [ + 1, # north + 3, # south + 2, # east + 4, # west + 12, + 32, + 14, + 34, + 112, + 332, + 114, + 334, + 212, + 232, + 414, + 434, + ] + + sampled_contexts: Contexts = {} + + for i in range(num_contexts): + c: Context = {} + c["target_direction"] = np.random.choice(directions) + if normal: + c["target_distance"] = np.round( + min(max(np.random.normal(loc=mean, scale=std * mean), low), high), + decimals=2, + ) + else: + c["target_distance"] = np.round( + np.random.uniform(low=low, high=high), decimals=2 + ) + sampled_contexts[i] = c + return sampled_contexts diff --git a/carl/envs/carl_env.py b/carl/envs/carl_env.py index e0f094c1..7cd65bda 100644 --- a/carl/envs/carl_env.py +++ b/carl/envs/carl_env.py @@ -255,7 +255,7 @@ def contexts(self, contexts: Contexts) -> None: k: self.fill_context_with_default(context=v) for k, v in contexts.items() } - def reset(self, **kwargs: Dict) -> Union[ObsType, tuple[ObsType, dict]]: # type: ignore [override] + def reset(self, info=False, **kwargs: Dict) -> Union[ObsType, tuple[ObsType, dict]]: # type: ignore [override] """ Reset environment. @@ -269,7 +269,7 @@ def reset(self, **kwargs: Dict) -> Union[ObsType, tuple[ObsType, dict]]: # type state State of environment after reset. info_dict : dict - Return also if return_info=True. + Return also if info=True. """ self.episode_counter += 1 @@ -286,7 +286,8 @@ def reset(self, **kwargs: Dict) -> Union[ObsType, tuple[ObsType, dict]]: # type state = _ret state = self.build_context_adaptive_state(state=state) ret = state - if return_info: + if info: + info_dict["context"] = self.context ret = state, info_dict return ret @@ -346,6 +347,7 @@ def step(self, action: Any) -> Tuple[Any, Any, bool, Dict]: """ # Step the environment state, reward, done, info = self.env.step(action) + info["context"] = self.context if not self.hide_context: # Scale context features diff --git a/test/test_language_goals.py b/test/test_language_goals.py new file mode 100644 index 00000000..d9a40de7 --- /dev/null +++ b/test/test_language_goals.py @@ -0,0 +1,164 @@ +import unittest + +from carl.envs.brax import ( + CARLAnt, + CARLHalfcheetah, + CARLFetch, + BraxWalkerGoalWrapper, + BraxLanguageWrapper, + sample_walker_language_goals, +) + +DIRECTIONS = [ + 1, # north + 3, # south + 2, # east + 4, # west + 12, + 32, + 14, + 34, + 112, + 332, + 114, + 334, + 212, + 232, + 414, + 434, +] + + +class TestGoalSampling(unittest.TestCase): + def test_uniform_sampling(self): + contexts = sample_walker_language_goals(10, low=4, high=200) + assert len(contexts.keys()) == 10 + assert "target_distance" in contexts[0].keys() + assert "target_direction" in contexts[0].keys() + assert all([contexts[i]["target_direction"] in DIRECTIONS for i in range(10)]) + assert all([contexts[i]["target_distance"] <= 200 for i in range(10)]) + assert all([contexts[i]["target_distance"] >= 4 for i in range(10)]) + + def test_normal_sampling(self): + contexts = sample_walker_language_goals(10, normal=True, low=4, high=200) + assert len(contexts.keys()) == 10 + assert "target_distance" in contexts[0].keys() + assert "target_direction" in contexts[0].keys() + assert all([contexts[i]["target_direction"] in DIRECTIONS for i in range(10)]) + assert all([contexts[i]["target_distance"] <= 200 for i in range(10)]) + assert all([contexts[i]["target_distance"] >= 4 for i in range(10)]) + + +class TestGoalWrapper(unittest.TestCase): + def test_reset(self): + contexts = sample_walker_language_goals(10, low=4, high=200) + env = CARLAnt(contexts=contexts) + wrapped_env = BraxWalkerGoalWrapper(env) + + assert wrapped_env.position is None + state = wrapped_env.reset() + assert state is not None + assert wrapped_env.position is not None + + state, info = wrapped_env.reset(return_info=True) + assert state is not None + assert info is not None + + env = CARLHalfcheetah(contexts=contexts) + wrapped_env = BraxWalkerGoalWrapper(env) + + assert wrapped_env.position is None + state = wrapped_env.reset() + assert state is not None + assert wrapped_env.position is not None + + state, info = wrapped_env.reset(return_info=True) + assert state is not None + assert info is not None + + def test_reward_scale(self): + contexts = sample_walker_language_goals(10, low=4, high=200) + env = CARLAnt(contexts=contexts) + wrapped_env = BraxWalkerGoalWrapper(env) + basic_env = CARLAnt() + + for _ in range(10): + wrapped_env.reset() + basic_env.reset() + for _ in range(10): + action = basic_env.action_space.sample() + _, wrapped_reward, _, _ = wrapped_env.step(action) + _, basic_reward, _, _ = basic_env.step(action) + assert wrapped_reward >= basic_reward - 0.01 + + contexts = sample_walker_language_goals(10, low=4, high=200) + env = CARLHalfcheetah(contexts=contexts) + wrapped_env = BraxWalkerGoalWrapper(env) + basic_env = CARLHalfcheetah() + + for _ in range(10): + wrapped_env.reset() + basic_env.reset() + for _ in range(10): + action = basic_env.action_space.sample() + _, wrapped_reward, _, _ = wrapped_env.step(action) + _, basic_reward, _, _ = basic_env.step(action) + assert wrapped_reward >= basic_reward - 0.01 + + +class TestLanguageWrapper(unittest.TestCase): + def test_reset(self) -> None: + env = CARLFetch() + wrapped_env = BraxLanguageWrapper(env) + state = wrapped_env.reset() + assert type(state) is dict + assert "env_state" in state.keys() + assert "goal" in state.keys() + assert type(state["goal"]) is str + assert str(wrapped_env.context["target_distance"]) in state["goal"] + assert str(wrapped_env.context["target_radius"]) in state["goal"] + state, info = wrapped_env.reset(return_info=True) + assert info is not None + assert type(state) is dict + + contexts = sample_walker_language_goals(10, low=4, high=200) + env = CARLAnt(contexts=contexts) + wrapped_env = BraxLanguageWrapper(env) + state = wrapped_env.reset() + assert type(state) is dict + assert "env_state" in state.keys() + assert "goal" in state.keys() + assert type(state["goal"]) is str + assert str(wrapped_env.context["target_distance"]) in state["goal"] + assert str(wrapped_env.context["target_direction"]) in state["goal"] + state, info = wrapped_env.reset(return_info=True) + assert info is not None + assert type(state) is dict + + def test_step(self): + contexts = sample_walker_language_goals(10, low=4, high=200) + env = CARLFetch(contexts=contexts) + wrapped_env = BraxLanguageWrapper(env) + wrapped_env.reset() + for _ in range(10): + action = wrapped_env.action_space.sample() + state, _, _, _ = wrapped_env.step(action) + assert type(state) is dict + assert "env_state" in state.keys() + assert "goal" in state.keys() + assert type(state["goal"]) is str + assert str(wrapped_env.context["target_distance"]) in state["goal"] + assert str(wrapped_env.context["target_radius"]) in state["goal"] + + env = CARLAnt(contexts=contexts) + wrapped_env = BraxLanguageWrapper(env) + wrapped_env.reset() + for _ in range(10): + action = wrapped_env.action_space.sample() + state, _, _, _ = wrapped_env.step(action) + assert type(state) is dict + assert "env_state" in state.keys() + assert "goal" in state.keys() + assert type(state["goal"]) is str + assert str(wrapped_env.context["target_distance"]) in state["goal"] + assert str(wrapped_env.context["target_direction"]) in state["goal"] From 97fddafd1fcf3b8156afefa9e19126dd2f6e2cdc Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Thu, 24 Nov 2022 08:31:45 -0800 Subject: [PATCH 02/44] added stopping --- carl/envs/brax/brax_walker_goal_wrapper.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/carl/envs/brax/brax_walker_goal_wrapper.py b/carl/envs/brax/brax_walker_goal_wrapper.py index e37ad555..61d7db16 100644 --- a/carl/envs/brax/brax_walker_goal_wrapper.py +++ b/carl/envs/brax/brax_walker_goal_wrapper.py @@ -68,6 +68,7 @@ def reset(self, return_info=False): * self.context["target_distance"] ) if return_info: + info["success"] = 0 return state, info else: return state @@ -86,4 +87,9 @@ def step(self, action): direction_reward = direction_reward * 10 augmented_reward = reward + direction_reward self.position = new_position + if abs(current_distance_to_goal) <= 5: + done = True + info["success"] = 1 + else: + info["success"] = 0 return state, augmented_reward, done, info From 93be73ae8c0d87ecb45907749193b5aa282dd210 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Fri, 25 Nov 2022 07:30:45 -0800 Subject: [PATCH 03/44] changed reward structure --- carl/envs/brax/brax_walker_goal_wrapper.py | 6 +----- test/test_language_goals.py | 14 ++++---------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/carl/envs/brax/brax_walker_goal_wrapper.py b/carl/envs/brax/brax_walker_goal_wrapper.py index 61d7db16..adba1afd 100644 --- a/carl/envs/brax/brax_walker_goal_wrapper.py +++ b/carl/envs/brax/brax_walker_goal_wrapper.py @@ -82,14 +82,10 @@ def step(self, action): current_distance_to_goal = np.linalg.norm(self.goal_position - new_position) previous_distance_to_goal = np.linalg.norm(self.goal_position - self.position) direction_reward = max(0, previous_distance_to_goal - current_distance_to_goal) - if self.env.__class__.__name__ == "CARLAnt": - # Since we can't set the forward reward to 0 here, we simply increase the reward range - direction_reward = direction_reward * 10 - augmented_reward = reward + direction_reward self.position = new_position if abs(current_distance_to_goal) <= 5: done = True info["success"] = 1 else: info["success"] = 0 - return state, augmented_reward, done, info + return state, direction_reward, done, info diff --git a/test/test_language_goals.py b/test/test_language_goals.py index d9a40de7..d568f0a7 100644 --- a/test/test_language_goals.py +++ b/test/test_language_goals.py @@ -80,30 +80,24 @@ def test_reward_scale(self): contexts = sample_walker_language_goals(10, low=4, high=200) env = CARLAnt(contexts=contexts) wrapped_env = BraxWalkerGoalWrapper(env) - basic_env = CARLAnt() for _ in range(10): wrapped_env.reset() - basic_env.reset() for _ in range(10): - action = basic_env.action_space.sample() + action = wrapped_env.action_space.sample() _, wrapped_reward, _, _ = wrapped_env.step(action) - _, basic_reward, _, _ = basic_env.step(action) - assert wrapped_reward >= basic_reward - 0.01 + assert wrapped_reward >= 0 contexts = sample_walker_language_goals(10, low=4, high=200) env = CARLHalfcheetah(contexts=contexts) wrapped_env = BraxWalkerGoalWrapper(env) - basic_env = CARLHalfcheetah() for _ in range(10): wrapped_env.reset() - basic_env.reset() for _ in range(10): - action = basic_env.action_space.sample() + action = wrapped_env.action_space.sample() _, wrapped_reward, _, _ = wrapped_env.step(action) - _, basic_reward, _, _ = basic_env.step(action) - assert wrapped_reward >= basic_reward - 0.01 + assert wrapped_reward >= 0 class TestLanguageWrapper(unittest.TestCase): From a83a64167348bce88a219937e8233c66f80d8bb7 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Fri, 25 Nov 2022 07:51:59 -0800 Subject: [PATCH 04/44] added episode termination wrapper to walker envs --- carl/envs/brax/carl_ant.py | 3 ++- carl/envs/brax/carl_halfcheetah.py | 3 ++- carl/envs/brax/carl_humanoid.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/carl/envs/brax/carl_ant.py b/carl/envs/brax/carl_ant.py index c53fd64f..367f0754 100644 --- a/carl/envs/brax/carl_ant.py +++ b/carl/envs/brax/carl_ant.py @@ -6,7 +6,7 @@ import brax import numpy as np from brax.envs.ant import _SYSTEM_CONFIG, Ant -from brax.envs.wrappers import GymWrapper, VectorGymWrapper, VectorWrapper +from brax.envs.wrappers import GymWrapper, EpisodeWrapper, VectorGymWrapper, VectorWrapper from google.protobuf import json_format, text_format from google.protobuf.json_format import MessageToDict from numpyencoder import NumpyEncoder @@ -57,6 +57,7 @@ def __init__( ] = None, context_selector_kwargs: Optional[Dict] = None, ): + env = EpisodeWrapper(env, 1000, 1) if n_envs == 1: env = GymWrapper(env) else: diff --git a/carl/envs/brax/carl_halfcheetah.py b/carl/envs/brax/carl_halfcheetah.py index fea08688..5677890b 100644 --- a/carl/envs/brax/carl_halfcheetah.py +++ b/carl/envs/brax/carl_halfcheetah.py @@ -6,7 +6,7 @@ import brax import numpy as np from brax.envs.halfcheetah import _SYSTEM_CONFIG, Halfcheetah -from brax.envs.wrappers import GymWrapper, VectorGymWrapper, VectorWrapper +from brax.envs.wrappers import GymWrapper, EpisodeWrapper, VectorGymWrapper, VectorWrapper from google.protobuf import json_format, text_format from google.protobuf.json_format import MessageToDict from numpyencoder import NumpyEncoder @@ -55,6 +55,7 @@ def __init__( ] = None, context_selector_kwargs: Optional[Dict] = None, ): + env = EpisodeWrapper(env, 1000, 1) if n_envs == 1: env = GymWrapper(env) else: diff --git a/carl/envs/brax/carl_humanoid.py b/carl/envs/brax/carl_humanoid.py index 873473ca..659f0674 100644 --- a/carl/envs/brax/carl_humanoid.py +++ b/carl/envs/brax/carl_humanoid.py @@ -7,7 +7,7 @@ import numpy as np from brax import jumpy as jp from brax.envs.humanoid import _SYSTEM_CONFIG, Humanoid -from brax.envs.wrappers import GymWrapper, VectorGymWrapper, VectorWrapper +from brax.envs.wrappers import GymWrapper, EpisodeWrapper, VectorGymWrapper, VectorWrapper from brax.physics import bodies from google.protobuf import json_format, text_format from google.protobuf.json_format import MessageToDict @@ -55,6 +55,7 @@ def __init__( ] = None, context_selector_kwargs: Optional[Dict] = None, ): + env = EpisodeWrapper(env, 1000, 1) if n_envs == 1: env = GymWrapper(env) else: From b316c2597afd6af60e750fbc3cc07a69b9e24df6 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Sat, 26 Nov 2022 05:34:03 -0800 Subject: [PATCH 05/44] fixed ob shapes for vector envs --- carl/envs/carl_env.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/carl/envs/carl_env.py b/carl/envs/carl_env.py index 7cd65bda..69629522 100644 --- a/carl/envs/carl_env.py +++ b/carl/envs/carl_env.py @@ -495,7 +495,10 @@ def build_observation_space( pass else: if env_lower_bounds is None and env_upper_bounds is None: - obs_dim = obs_shape[0] + if len(obs_shape)==2: + obs_dim = obs_shape[1] + else: + obs_dim = obs_shape[0] env_lower_bounds = -np.inf * np.ones(obs_dim) env_upper_bounds = np.inf * np.ones(obs_dim) @@ -546,9 +549,13 @@ def build_observation_space( high: Vector = np.concatenate( (np.array(env_upper_bounds), np.array(context_upper_bounds)) ) + if len(obs_shape) == 2: + low = np.vstack([[low] for _ in range(obs_shape[0])]) + high = np.vstack([[high] for _ in range(obs_shape[0])]) self.env.observation_space = spaces.Box( low=np.array(low), high=np.array(high), dtype=np.float32 ) + self.observation_space = ( self.env.observation_space ) # make sure it is the same object From ae814df8eef51650215b563e8c580c3833900215 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Thu, 6 Jul 2023 09:26:36 +0200 Subject: [PATCH 06/44] Goal functionality for pointmass and brax --- .../envs/brax/brax_target_language_wrapper.py | 33 --- carl/envs/brax/brax_walker_goal_wrapper.py | 75 ++++++- .../brax/walker_language_context_sampling.py | 42 ---- carl/envs/dmc/carl_dm_pointmass.py | 107 ++++++++++ carl/envs/dmc/dmc_tasks/pointmass.py | 196 ++++++++++++++++++ carl/envs/dmc/loader.py | 1 + .../data/screenshots/pointmass.jpg | Bin 0 -> 12577 bytes 7 files changed, 377 insertions(+), 77 deletions(-) delete mode 100644 carl/envs/brax/brax_target_language_wrapper.py delete mode 100644 carl/envs/brax/walker_language_context_sampling.py create mode 100644 carl/envs/dmc/carl_dm_pointmass.py create mode 100644 carl/envs/dmc/dmc_tasks/pointmass.py create mode 100644 docs/source/environments/data/screenshots/pointmass.jpg diff --git a/carl/envs/brax/brax_target_language_wrapper.py b/carl/envs/brax/brax_target_language_wrapper.py deleted file mode 100644 index aff7c8e2..00000000 --- a/carl/envs/brax/brax_target_language_wrapper.py +++ /dev/null @@ -1,33 +0,0 @@ -import gym - - -class BraxLanguageWrapper(gym.Wrapper): - """Translates the context features target distance and target radius into language""" - - def __init__(self, env) -> None: - super().__init__(env) - - def reset(self, return_info=False): - state, info = self.env.reset(info=True) - goal_str = self.get_goal_desc(info["context"]) - extended_state = {"env_state": state, "goal": goal_str} - if return_info: - return extended_state, info - else: - return extended_state - - def step(self, action): - state, reward, done, info = self.env.step(action) - goal_str = self.get_goal_desc(info["context"]) - extended_state = {"env_state": state, "goal": goal_str} - return extended_state, reward, done, info - - def get_goal_desc(self, context): - if "target_radius" in context.keys(): - target_distance = context["target_distance"] - target_radius = context["target_radius"] - return f"The distance to the goal is {target_distance} steps. Move within {target_radius} steps of the goal." - else: - target_distance = context["target_distance"] - target_direction = context["target_direction"] - return f"Move {target_distance} steps {target_direction}." diff --git a/carl/envs/brax/brax_walker_goal_wrapper.py b/carl/envs/brax/brax_walker_goal_wrapper.py index adba1afd..d26cb401 100644 --- a/carl/envs/brax/brax_walker_goal_wrapper.py +++ b/carl/envs/brax/brax_walker_goal_wrapper.py @@ -1,5 +1,6 @@ import gym import numpy as np +from carl.utils.types import Context, Contexts STATE_INDICES = { "CARLAnt": [13, 14], @@ -7,6 +8,44 @@ "CARLHalfcheetah": [14, 15], } +def sample_walker_language_goals( + num_contexts, low=5, high=2500, normal=False, mean=25000, std=0.1 +): + directions = [ + 1, # north + 3, # south + 2, # east + 4, # west + 12, + 32, + 14, + 34, + 112, + 332, + 114, + 334, + 212, + 232, + 414, + 434, + ] + + sampled_contexts: Contexts = {} + + for i in range(num_contexts): + c: Context = {} + c["target_direction"] = np.random.choice(directions) + if normal: + c["target_distance"] = np.round( + min(max(np.random.normal(loc=mean, scale=std * mean), low), high), + decimals=2, + ) + else: + c["target_distance"] = np.round( + np.random.uniform(low=low, high=high), decimals=2 + ) + sampled_contexts[i] = c + return sampled_contexts class BraxWalkerGoalWrapper(gym.Wrapper): """Adds a positional goal to brax walker envs""" @@ -74,11 +113,11 @@ def reset(self, return_info=False): return state def step(self, action): - state, reward, done, info = self.env.step(action) + state, _, done, info = self.env.step(action) indices = STATE_INDICES[self.env.__class__.__name__] new_position = np.array(list(self.position)) + np.array( [state[indices[0]], state[indices[1]]] - ) + ) current_distance_to_goal = np.linalg.norm(self.goal_position - new_position) previous_distance_to_goal = np.linalg.norm(self.goal_position - self.position) direction_reward = max(0, previous_distance_to_goal - current_distance_to_goal) @@ -89,3 +128,35 @@ def step(self, action): else: info["success"] = 0 return state, direction_reward, done, info + + +class BraxLanguageWrapper(gym.Wrapper): + """Translates the context features target distance and target radius into language""" + + def __init__(self, env) -> None: + super().__init__(env) + + def reset(self, return_info=False): + state, info = self.env.reset(info=True) + goal_str = self.get_goal_desc(info["context"]) + extended_state = {"env_state": state, "goal": goal_str} + if return_info: + return extended_state, info + else: + return extended_state + + def step(self, action): + state, reward, done, info = self.env.step(action) + goal_str = self.get_goal_desc(info["context"]) + extended_state = {"env_state": state, "goal": goal_str} + return extended_state, reward, done, info + + def get_goal_desc(self, context): + if "target_radius" in context.keys(): + target_distance = context["target_distance"] + target_radius = context["target_radius"] + return f"The distance to the goal is {target_distance} steps. Move within {target_radius} steps of the goal." + else: + target_distance = context["target_distance"] + target_direction = context["target_direction"] + return f"Move {target_distance} steps {target_direction}." diff --git a/carl/envs/brax/walker_language_context_sampling.py b/carl/envs/brax/walker_language_context_sampling.py deleted file mode 100644 index 2d24bc52..00000000 --- a/carl/envs/brax/walker_language_context_sampling.py +++ /dev/null @@ -1,42 +0,0 @@ -import numpy as np -from carl.utils.types import Context, Contexts - - -def sample_walker_language_goals( - num_contexts, low=5, high=2500, normal=False, mean=25000, std=0.1 -): - directions = [ - 1, # north - 3, # south - 2, # east - 4, # west - 12, - 32, - 14, - 34, - 112, - 332, - 114, - 334, - 212, - 232, - 414, - 434, - ] - - sampled_contexts: Contexts = {} - - for i in range(num_contexts): - c: Context = {} - c["target_direction"] = np.random.choice(directions) - if normal: - c["target_distance"] = np.round( - min(max(np.random.normal(loc=mean, scale=std * mean), low), high), - decimals=2, - ) - else: - c["target_distance"] = np.round( - np.random.uniform(low=low, high=high), decimals=2 - ) - sampled_contexts[i] = c - return sampled_contexts diff --git a/carl/envs/dmc/carl_dm_pointmass.py b/carl/envs/dmc/carl_dm_pointmass.py new file mode 100644 index 00000000..b6d8baaa --- /dev/null +++ b/carl/envs/dmc/carl_dm_pointmass.py @@ -0,0 +1,107 @@ +from typing import Dict, List, Optional, Union + +import numpy as np + +from carl.context.selection import AbstractSelector +from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv +from carl.envs.dmc.dmc_tasks.fish import STEP_LIMIT # type: ignore +from carl.utils.trial_logger import TrialLogger +from carl.utils.types import Context, Contexts + +DEFAULT_CONTEXT = { + "gravity": -9.81, # Gravity is disabled via flag + "friction_tangential": 1, # Scaling factor for tangential friction of all geoms (objects) + "friction_torsional": 1, # Scaling factor for torsional friction of all geoms (objects) + "friction_rolling": 1, # Scaling factor for rolling friction of all geoms (objects) + "timestep": 0.004, # Seconds between updates + "joint_damping": 1.0, # Scaling factor for all joints + "joint_stiffness": 0.0, + "actuator_strength": 1, # Scaling factor for all actuators in the model + "density": 5000.0, + "viscosity": 0.0, + "geom_density": 1.0, # No effect, because no gravity + "wind_x": 0.0, + "wind_y": 0.0, + "wind_z": 0.0, + "mass": 0.3, + "starting_x": 0.0, + "starting_y": 0.0, + "target_x": 0.0, + "target_y": 0.0, + "area_size": 0.6, +} + +CONTEXT_BOUNDS = { + "gravity": (-np.inf, -0.1, float), + "friction_tangential": (0, np.inf, float), + "friction_torsional": (0, np.inf, float), + "friction_rolling": (0, np.inf, float), + "timestep": ( + 0.001, + 0.1, + float, + ), + "joint_damping": (0, np.inf, float), + "joint_stiffness": (0, np.inf, float), + "actuator_strength": (0, np.inf, float), + "density": (0, np.inf, float), + "viscosity": (0, np.inf, float), + "geom_density": (0, np.inf, float), + "wind_x": (-np.inf, np.inf, float), + "wind_y": (-np.inf, np.inf, float), + "wind_z": (-np.inf, np.inf, float), + "mass": (0, np.inf, float), + "starting_x": (0, np.inf, float), + "starting_y": (0, np.inf, float), + "target_x": (0, np.inf, float), + "target_y": (0, np.inf, float), + "area_size": (0, np.inf, float), +} + +CONTEXT_MASK = [ + "gravity", + "geom_density", + "wind_x", + "wind_y", + "wind_z", +] + + +class CARLDmcPointMassEnv(CARLDmcEnv): + def __init__( + self, + domain: str = "pointmass", + task: str = "easy_context", + contexts: Contexts = {}, + context_mask: Optional[List[str]] = [], + hide_context: bool = True, + add_gaussian_noise_to_context: bool = False, + gaussian_noise_std_percentage: float = 0.01, + logger: Optional[TrialLogger] = None, + scale_context_features: str = "no", + default_context: Optional[Context] = DEFAULT_CONTEXT, + max_episode_length: int = STEP_LIMIT, + state_context_features: Optional[List[str]] = None, + dict_observation_space: bool = False, + context_selector: Optional[ + Union[AbstractSelector, type[AbstractSelector]] + ] = None, + context_selector_kwargs: Optional[Dict] = None, + ): + super().__init__( + domain=domain, + task=task, + contexts=contexts, + context_mask=context_mask, + hide_context=hide_context, + add_gaussian_noise_to_context=add_gaussian_noise_to_context, + gaussian_noise_std_percentage=gaussian_noise_std_percentage, + logger=logger, + scale_context_features=scale_context_features, + default_context=default_context, + max_episode_length=max_episode_length, + state_context_features=state_context_features, + dict_observation_space=dict_observation_space, + context_selector=context_selector, + context_selector_kwargs=context_selector_kwargs, + ) diff --git a/carl/envs/dmc/dmc_tasks/pointmass.py b/carl/envs/dmc/dmc_tasks/pointmass.py new file mode 100644 index 00000000..f30f3f60 --- /dev/null +++ b/carl/envs/dmc/dmc_tasks/pointmass.py @@ -0,0 +1,196 @@ +# flake8: noqa: E501 +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Finger Domain.""" +from __future__ import annotations + +from typing import Any + +from multiprocessing.sharedctypes import Value + +import numpy as np +from dm_control.rl import control # type: ignore +from dm_control.suite.point_mass import ( # type: ignore + _DEFAULT_TIME_LIMIT, + SUITE, + Physics, + PointMass, + get_model_and_assets, +) + +from carl.envs.dmc.dmc_tasks.utils import adapt_context # type: ignore +from carl.utils.types import Context + + +def check_constraints( + mass, + starting_x, + starting_y, + target_x, + target_y, + area_size, +) -> None: + if starting_x >= area_size/2 or starting_y >= area_size/2: + raise ValueError( + f"The starting points are located outside of the grid. Choose a value lower than {area_size/2}." + ) + + if target_x >= area_size/2 or target_y >= area_size/2: + raise ValueError( + f"The target points are located outside of the grid. Choose a value lower than {area_size/2}." + ) + + +def get_pointmass_xml_string( + mass: float = 0.3, + starting_x: float = 0.0, + starting_y: float = 0.0, + target_x: float = 0.0, + target_y: float = 0.0, + area_size: float = 0.6, + **kwargs: Any, +) -> bytes: + check_constraints( + mass=mass, + starting_x=starting_x, + starting_y=starting_y, + target_x=target_x, + target_y=target_y, + area_size=area_size, + ) + + xml_string = f""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + """ + xml_string_bytes = xml_string.encode() + return xml_string_bytes + + +class ContextualPointMass(PointMass): + def initialize_episode(self, physics): + """Don't randomize joint positions in contextual setting.""" + if self._randomize_gains: + dir1 = self.random.randn(2) + dir1 /= np.linalg.norm(dir1) + # Find another actuation direction that is not 'too parallel' to dir1. + parallel = True + while parallel: + dir2 = self.random.randn(2) + dir2 /= np.linalg.norm(dir2) + parallel = abs(np.dot(dir1, dir2)) > 0.9 + physics.model.wrap_prm[[0, 1]] = dir1 + physics.model.wrap_prm[[2, 3]] = dir2 + super().initialize_episode(physics) + + +@SUITE.add("benchmarking") # type: ignore[misc] +def easy_context( + context: Context = {}, + context_mask: list = [], + time_limit: float = _DEFAULT_TIME_LIMIT, + random: np.random.RandomState | int | None = None, + environment_kwargs: dict | None = None, +) -> control.Environment: + """Returns the Spin task.""" + xml_string, assets = get_model_and_assets() + xml_string = get_pointmass_xml_string(**context) + if context != {}: + xml_string = adapt_context( + xml_string=xml_string, context=context, context_mask=context_mask + ) + physics = Physics.from_xml_string(xml_string, assets) + task = ContextualPointMass(randomize_gains=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, + task, + time_limit=time_limit, + **environment_kwargs, + ) + + +@SUITE.add("benchmarking") # type: ignore[misc] +def hard_context( + context: Context = {}, + context_mask: list = [], + time_limit: float = _DEFAULT_TIME_LIMIT, + random: np.random.RandomState | int | None = None, + environment_kwargs: dict | None = None, +) -> control.Environment: + """Returns the hard Turn task.""" + xml_string, assets = get_model_and_assets() + xml_string = get_pointmass_xml_string(**context) + if context != {}: + xml_string = adapt_context( + xml_string=xml_string, context=context, context_mask=context_mask + ) + physics = Physics.from_xml_string(xml_string, assets) + task = ContextualPointMass(randomize_gains=True, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, + task, + time_limit=time_limit, + **environment_kwargs, + ) diff --git a/carl/envs/dmc/loader.py b/carl/envs/dmc/loader.py index 30c41738..829f5e55 100644 --- a/carl/envs/dmc/loader.py +++ b/carl/envs/dmc/loader.py @@ -10,6 +10,7 @@ fish, quadruped, walker, + pointmass, ) from carl.utils.types import Context diff --git a/docs/source/environments/data/screenshots/pointmass.jpg b/docs/source/environments/data/screenshots/pointmass.jpg new file mode 100644 index 0000000000000000000000000000000000000000..216eb985adb32d7ed5aed513a8d8500e5fa3f640 GIT binary patch literal 12577 zcmbVycRXC(*Y6pF(L2#c3nFSrq8okmUL$%>Q<8`}gM{c^5Md%D2+>2D&P?}buGkeZ%-?i4>t8KzKVHTj%)zZ-dAP@k6fPVmC5>N-AM1RZQ zCloBiq{M#<87V0VDLEMh1vwcxIRzCh6$K>?B{?}YJv9w29UVO#1r-A$105syo$l`- zkiR{l#AIMcI!ba%@YnyxLTCo)$p9QA8495Xi0C0udI+H%fP?EKf&5c|{}c!jl$eAR zMn+CS2{x#s1Bk%6iHM;jB*esE>rn7LKuk}Ln%4p&Qgbx8TUc6I+t|9ey19FJdU^Xl zeDpZrNnlXq(`Ql9&tqa!)6z3CUu0$H6uc=cdi$=pq_n!Gw(etn!>7-!ZS5VMSlsun zq2ZAqqhsSgC+6lC7MGS+R@Z*-?C$L!93CB?oc_fH0igfF0-yf^`#*4jM1T+xgCvFh z#RVY>0RN%%#3Z~@qztMiFehI|zH8xROlm2wt6IqUrA@b)o&5$WSOjF|F7Nz>_7AfE zJ75w2U&#I^u>XZ?0-%CIK;}W|0Ttjxun%YVUK(k|x+*_5<+NHFjkPLU%S2T(;)CnF z4xX@`1*QrnHtmfJ9z0<@h)KeC!1sPOiJ+{|q0VXqz(wNxz`Npov(ndS0^nQ$R+KA% z!(G}9Hv*7XuKi=yV{=*Mj2X4^-sIp3wN*081v~8$+w{KKenMm0^TzHb(bA{=cN=`% zqx#=2Mb5pz+ZVx~aks$gQ~V^N533)I2oiusO}Ldl^W%q&oX2AC)^)DxeX#3TTHtUt z)2`IYw{}m@2li_TKz7tN0hqk}W}N4=?Mm;qx?!~J=%1#oXaD}7w;k0gaT0HbnjuPZ zj9jM*Z)6;I!K)p?kD7}%=#_uPKX`ij1ipKz0i2FRNR`@|b>`TD9d$t}uCf)bH_m(E z6i)!2nY<$a1I`QhW2RtYxV19@Xnj{n05IRM7u&*zWMRKj@}IH5_y21USbcKa_J?Lv zk1g24Hs3LF66bVmYstT5JLTp-y&pdG zoX9Zp?7J)u8P->}ZL9Vy~smvv4jh)}RaI62l+!cAk&RDy^i~kcz%jzA3#V;}y%n#tO90>;p|% z5C1%-xG8Ir>T}HyI8BQsan|8QmM^@TIv@J#O7srmi)(W|_yg<<lw+v$$@@aQLHPhoR z6(A=tYs|tj`{nMd*NKt@fX5v0!w@0Y^Zm#2gMhjKv9FJfZFy?5e^ACaS<+C_XZSVW z=VxV|b=~!(H>BgXhh<*{1&5S&RhJXr-JuGozBQ%I^z$dbR#!#RpHxW{#om<_)}Xr3bo(adv5kQK ztL-MywRenOoFt#5=IJ$t)&2gJ7q(E}PXLr$!7Y(+iIS_n?UNg2z}eWM#7^*_Pt~{r z4z=_q?4g~gWc;p9fB7X!>Pexa{uRyEwp%^Wt@;(s?#j-Yq{f+7fYYt`z4 ztLZm;h~jer8mY9CVMnW!Irn(2MhA#>jfdFO{%o)*oAX6jze4-v_*CRW%|}2KkEH=;ah5Ioig2TK1h&D1T{gw~qu(1jyP%yU zTn;g^s!v0QId|Ugt>)F#+ggURz$&14#_qyR|5u_g-P(QbKzpyk>0K#z=RkoYjB}e*gy)I{m5XEZ zP4y&OI_Lwbliu1Z3WzD`{CD>IN067_O2{h7pT2ylqo%i1HU2n=)g?IeBGq zZp70GB$r%e6=J&!?GS(#1{Xc!OHQNY!s%|0FGNn6LFxmH0_7S~=&jK-EodL9{F1i_ z++fk81hT0fCO0(aSsn>s=jB?P+r3nP>|&P?YoX7#Kbhse*wbA~?q}ea`k3-AQAR7k z8b^U-gCnqPTL`YO#Qe``zpu`;K2j95K^~MNIa$ioGXj^>B@7>9q&34z&_bzXojc8y=%v=Gq>VPHNvT>QQ%9Ap35)l^a%fqGi)6 z8!zs&V;Z*AePWAu+3yLUo7-XYe+Q2_B>*>H?^!$CzjL6zCpD*Jz@zXW8b|i|RE)%K zBjhuw%m)QysP0M~z-uZDd8YbXm`G5c)7W@at?__I%M#6QiDqphMe|?XgXTq{B^yOF zKeo1kdXWI^vt!w+KGOew^8IP`gIJ;6R`E>&Fs(9>m$6qpb%gFoIGQ!&-|d`vk=Nrp zGU!L9I9yx716Ma}tGZlkdb^9~x%e&{+PxEDirr~0Xsr^xWOa#Sv%6~&Bg_oQQSL$d zosx#KI8--qP^EQf?QY!FI=zwSbk7Qu+)f&>!*cH& zE5FCd{851$w3>iMuxQzSoWrFi{l=wh=r0?FN@xV_TJa`QZ8Q;n#034Bh$8W z9+L`9C=U!71Pb&#CNafuYXPtUN0(iU`j2~y3H0tgAIwT7-lzJp<{#6pAR+xfsmyNa zQvzLKjy`RX!yn07PKQO4x-^NuwL6|TjX-l#5tCT;nS5RHhUDtn!LpaSnZ^N)gYl<_ z7+9YMqtOFJxFiW;=7@toPo4lAtkNYtknt7Wz;qr%MixETIzxR&j1^K`gJMZ_JyJCi zuLw9-B@`<_Qc>A5WFcvy9gB}O9GOj+ee)cfci8KD?KOUKHgP-27!+{mbR7Vuj$NPT%uY$n({g?VXyIrsh{T^L`$F z$Iy&v5dbTpvmaY?@_8(xB}rLHK_lv|Ka#P+EwjeRt#W|t)EJ6BC`BmI(NCi}nYguR zM@v$_n_5yqz3=Fr>`= z#Ky#T3w;s1&pd^lYylTBB>m>1|1>CAR|x>_29_%Xi?>LWFu(J3YDXbk{Ezr~H22-e zBZN4|J3w3(JJk62R^IN_&(}!qb*X=tiy}@$u5i(EQ5(}|VyrGJo2Rg-SEUP!w5M+P zr%Fn~49PK80$yTyax5}PwbZq5adN9STmdB`)!{urGZVR))y@lfc}JSgE`{v~-gow-gi{H>phqa;yHl!DexGOY;5+m+COl=LHbw86qN;P} z>`-ad_YLyy;DsV0wj7vmp9@Dwy*uut8fe*H?*-&1mNvzmKRv(VdE3=^Dnb?4Uq{sN zDRs8rZ{th2czY%B;vrX`ktzcFU9A7-f(@g$WEWgi9MQFn`2I&2GHPbTfuwi!w2_kk zAg3Qb#p$O-BP)wxHCmdodw$m`rTVefjeM!yC%Eq`xgf{wkxQOm*R!+I+p`Tiq?%3A zaLwFVYY48<1pEi1@=zM8ZC_1weC5R*GpP{i4jN38@Y7WS&{f^R;drEYop0~Ok!v=ki&rxa2j$%L z&y$q5>f>H~%pdFiG^L8`79?Yg2koC^GF;q(an@}J9asH&A972UJ8wo8A*mU~+{FtNJK#;4GJ1W2yzk}>*b0Kig(HTm{9|Ce z?mZ#z)9~E1vC;6@(yC`(>Cav~pHUHc)h4j*KNAZi=&L=*I;pzq$FwP`R4sd6JavOJ z6=o!2L(hCu-d))`*Ed;u{L!rcuwdM?D95K(Q-xCekWaWyKZQs5hj#1?SC(QmGv(MQ z_C`=4<1Fj;IA-KM zZyR)kdp?|j{TCAe5{Eg2=ev*M9?KsfHmK=i=~YceH>eRuC-!m4W;Uk#k60 z&+E2?d3}uNWX6zIyK-NVnl`onFGVTl#ddW=$o%~+z0jA)FDK(OcG83w_ z3Vn54N0l3OFP^FUhM4sa*C$*4yytBFF)1Uv-i7!N_BgCIN_3|ZF^HoQ zS~2%BuN2}r|I`;|WSDipB&osg-YJUoL0DiN>`@{JY95A6^k=l+C&l>S#C?cB_4kKt zqrbFzzGMwoHGeU`%ilJ0Y$ zYw3HsaWmXn3$6t>yPQS+<)uH_^mvC%AVX%KO--Nb8iM3U&;3nK$b?b~dMLSv%pSo* zMRmB39gQN3>+2T!^F!@}DWm)Yae3q5k4%PDoawTK+orfaEQnU-Mr=t zujdR`OPrl3ba%7@lm~P_1n2q)LmKZS$NE@GCzCCd6DeK&M~99!z90)(mT-Pe0IX+h z2U0vC<^iQd7IxdhJjnnl3Bbelp2vq313bdO28f&8%gE`aN6`vRQEMrWWmjt<6Y0%d zSH=TaEt8`%FojZ?;qk(>-j+l{rXq-#Of;X9u?+_W5IzQ5Q`NY06g^rej|v{4t~Bb5K*508P&g!aiU6DVU!lhi~p8%Y#g4H#9;H)&dEAbv& zvPyf?XuceBu_>%ztCpM&Gj_Q!7Gc0t^+hJN>;)szKO*ctnOwh|fGa9NM$j^2!g9I` z(##4#uywqN3``GGfk7F^dm>X&Tzdx?K=Y#sRTqx~z0(n0GE3fAcd( z`&h&60x+UoJEE$t*FKUtr2D6$L;(H3n5@A#9?{Lb8#}$w8aD3bd!>b`K0rZ>y8&;e zMIz*W`0K9JH_I-$=QFxQt&_LEy`NiP5cC^te4K{sQ_?b2u6-)`;r#P6$_6SHwiVeU zCD7bU0E`sW9|)x|6eh4N22g+EJgqR=eoO3G-BViCYDi{RtXQ>{kJ>J3xT{A-$qz(5 zN}}myzbs&0G&wC8Z82?u>$Xtk6FKH~rrPj>z0l4wN`n zR!q1T8WLMTgyxn{DhKos3U2a)&ZJA-b9NEhI6XgDmxAd@>u+5mEAI5&ACfo zLw0-9dxoT%75w`RBO2Y_tPTw%*nGTa0TWW*Cre4cOZp(k*qm?C<-YVW9(Cd3H2&%V zRHnlB9;N#Y^V|6%X;YARl!SNR>KCSDqB*+^;@MmerrRWOX?@NgGenDa=U;v7ik-Sa zGuN^RIZ%=xe8|GPiyEZrXQ%-9VSV_p`*MqJp^{Qqxq?+ub&lo|iI*HzSI??H0G7DL zc&;`!)`e7>CoM$nDWRoE7K!{WmvmEds_DqRw=4{L?glNcG`QDUB6&*wSeFhW4&siR zq#5mo7H%4$MPqVUBd0ONg;-JO3X$NXZ#KE;HUj))KW%pI}6lyXNPI z?p1v)Wkt{5oM;d7$Teo+%|JTskqWpj)bUh)dErGuvk}iV<&wLT^zLLWKyAq_?(s+K zfS#9yma@k3+fr!*=wO%JJkQUR^xex+(qvCPHy%53hLVO#YYx~X>M&EJt0CaB?sFFn zSSChaGf0@b!z=Pd57_WJb{)}gI`HDi@8Zr{WpvMctS0Ko)KNK-*&Cf9A4|qb>Qz7j zWk!er$Ws5I@B(9zc12#LCm2}xbU7b~fVp6CTGO$2lh}$zn{bSWzB|Zd=)4lEgnv}8 z`)-NQ1uARw5%cirSv2j>whp3ugU3l)ZU?Dy)DjydN-7mSERVQn#-@~--20hT&U@sI zE_MmPoc1B{%mD!)uGkzu#5c}MpGiL{`ot|Tq65j&trrugsjDwVgdyYN}{Z zKfCs%yg`|NU^^5Hz0O}X`p>uylU17+q3v;J(#N%(Rr&O5qH#xBg3SZ~>lvb=OaR8F z;gcz$DisZ07un~!V`4I&r&IW9SjKIgPB%+Hm+p4}zHyg3m!UY%FzJju&s-qiUzS$Nt|CJ0P#ui?UdD~ z^!z!aGg8#pxh(2_8yH@QV#KqdI%Nq!N}T3-ApxLJsr0x&wM8Fps8k#~q2^wkyJGX{ zSDT)voeSez^1cp<~nk?bWltuWASYKX=@5j7n?L26@vu zo(y$g{qKAN@S2+m061SX7?#I@Y?n9)diQr&mCbQ*&F#tfUi)jYPFL?tPzHw`qc+k} z?|*AeBnNesdRJXxH(qM#t_BY+&%a4TCpB z?J)h@*B69Ze{jTNb?Cao1K+*A20x~iIJWoz|I1vVe>q6Ift^<$?g-MxL%Z=z1mMTX z5x#L7|LZo$U5|5Vc+6*1t3?q3n8^kK{fE1#f4JKMQW!h`;FP_#>18oO3w>28s0qjn-lkC=*0-IlREa zQ0_fF4;zNWV!2LwD%*qpA*p{t zm;AM*EkffiNTR^ISJ_;ElN)z5apfGMq7aSDQNu%b-(*l`N6)H=$Qhp$!qMl&OR9me zf2#J{{mM-Bd-p!jo;f!R!tH*62pyY=UD+Gm9SorBoR@ zrUR-W)`BS@1r_CmHzmKhpas@0)>FUdxR)+TbMSbv2i&bQe3j(^uJ@v+RM=E^#gt1S{&lID3Ei+%6)CYn5%yp8$ld9zK^NSDBL=Kz#wLA+h#@ zl*tx5oe|d5FI0|jZzgyaAqxpS1faX@;6!S6hihO+x_CZc(kcHXA?2ee8m0`fUOkmTGfpX0`T;U4w$;Kf**Bg*Y44}BNcBI=AXFvf>W7n zs{c#^8@$ooTmje4Y(J2vYSkfcvRVT_rnZt|S(P{!nD8i^IqDTsV8V=V9qY>UpR7nM zne-&ck)m47ifAbGPRv%2`L3`ITdO01^Adl|;pJb~{>+$LrcTP*V`Ak7g83FylkNta zt*CCGv55zYc(*g&BIKst1gx4PS$;I{2mMCY%P|4}4x_s3hiZz&Fmqfr~SrD5*>xHRcNo)z(`uZjVjnW5c!m zZg7r1FjEt<8)D~uv{FREJPy}I^ZJB}Q1T=s&15?SNNLS;rwR?Mx^;Gdcrz5n2b_zB`qe)gU1sj$cRp$+>!e1yE zqV8U7l`Fd|@L9*Ke@=84Oi>P9U3RthZJUY7Wjwt@yheGyM{AlkIC-r9_Ex-of6T_}!)l8%poy#K-Vv ztEeBMZ9mWM8+9t@_qbNN2?SXU4_9nDzy0XZZ(guw(D8?gM%pSktxGeyq@j%Ufv&i8 z(Y8UGwqj-YQIxM*!@m5HS$y8xmks-_`mdv?9i;i%D0afl0(>V`Un;G6o6xH#j(A^h z7;~80FY;MZSNtr_S-c02ROVnG%`Vox|Cp&kV-4RMD62lu!N%|edek4cweVoJJR6E_ z+tOP)FCkBrXsb94GdZ@%u5LY-Cq`j+@mggX-P?NrUU(9`_2yR!4j9#+-_C%FMZq!yOP%hN&pMSiqa`Xd1U*GOR0F+^1b4d9P zs6qQ0!ZZAo z2E%5!8Uk>e`VK{ZdT*Pl!?E6FFfr^6&iklB_7e=u6k7>hKpF98Z*uK>0?sdkfi?tq z*fgd4!)lnGH0pyYTMo6g#5uQtrDf~-N&q~xP_yP2m~S}(Mx013l^Sq~jcaF!L!`=_ z{0n@zFZ|3)yH@CD904Gon%Sp_!{gbNFu~Uz?VL%Rw?oc zI@MEA-&DLcn}4hUrR+B1-&lxG(#4ycrk8i$Lsdwh>WmznoYbl)om=cMVDStFnlks4 zF4vryN0BqwT@_r2rwVdzDy|#ORU^bp9JR>Wbo7gJRp})oo)N_VUp4lS+Yhx+H4A?U z8LXk=t~M?4Y2^la(oq8UAdIK~SnrR@${;8kNuJ7<@a-2GY#vGCgq=+C?_5 z&$WxlG;rE*@jH5KX*?wULxrJ+u=x;4fz0YQVrMM@+#Koh!Ux}+wx8n#ia_U5hj!R5}!KN#aR>gof6H>%SefiZZ+3$R$DmK;aLP<^!JLy~)jAlW4Rjfedlu_J zyu4f6Yld&UL;zHaTQg#>vo3q7%v`VG31Ua+XSi0omZ}MAz16#HhKNaCDDo!8jmvI$ zO6$Ke@~)1tDE=hA(02BN(*7Og&h?YJQ}~){CYJbCc!!RLTPY>K2Os(q$4%KS-t9mD` z=K`%aEy%HA7v03_sy93hHonB@i4o^z#RPEF8F>>6wrn9-^T{sn*8V~gfMGY#roW?j z%7=3qG4`InIJ+wmbr6`nYOp8<$99-rv);Ypr(mM%Td4-G4*foeOXf2UI1_cyuq@HWlc%YSLZO=7EYLzcq%67hiEr3c>taT0(D zFeXA90OAuEDhdE0jbJzl+XosbaZF?;nQMp-!m5edGGw>o)(~W6&9f1K7+TZq_UW)R zR{o5I?NW8E4@xEJybikh#Mn9{^(2Bk96<^z0{r3RO^cUGP-710GB}!ohJCoXqIKQX+oQi(|4~%-WNSOu33+L+ zz#bu=%b@~Itc9?lbSQRnO-KCBl#v zxGiz_#Gtad!xnqI)#o1x#bP~W6u!3fR}_+<%o>T%PZ$ym5t;%Jh9G*4X~6jIRbWXi zP^Q@wP&C$~AeJ*z%u5HhfmvfnyfFkp19-eHt>iii{{a2@)#2BsKFu3qV})=wknIk- zFy995^c+Z$m$mafCTkkKIVq^#NYi>L0a(2rb*bc|@M`{6&adp$PL!@jk{N>WN~uP* z=5?KM&8&XtZXt1(H;$Xu#}m?WaYqlmJ-+Iz4~tRKyh?$k+_~JeFqzo-rZ|zR9vT|} zd6pEE8kM<)pvx?VysY*Fl!PHLTYXQ~z1Y-2##}rTleQ_52bgA}%MP#+=(wcP?k#28 zxx6o*JqtBdVAJ!RwIH(vgaHgm%XNe>cP&s0U>JwcoR=O5hEsmYA@P-yAq$f56NZcc zAY>{`gm-mcO|uniLn>+Wlq?3hCCvKN9nE+TxC}LHbu_-jZZ(1agbfju9Xfyz00V7& zq+OQ%J1yZ=jxF||*@>jZloXzV?XjLjZr5EIu0F(&TN<^ZFaO%IB#Od>w-iHKN~UOp z<`qESt_tgdq+z|ub!95`+MR1r(kn%PM$iCJVr|yI)8)d~rBkybN47 zWDBe15nCrd@c~uM9r?aPtr7Y3bgGA?$gW$d7G%OeDFh^FYbr!-rQt2Wlmpl~N049u zB0~~D$-KfAc>DxIjKf$FX$h0I6Xghm85sW&+3BTJ<(Ov3RWE(r2bq7#t|SwVe0?Jp zP@@KT*)YVDTL{vbAYcxE35C0HhW}jgFIq$4Rd9oY@pcff*#ufhi`eBl0{J#VN1ft* znloagUG@gZ*#tnJy$nW*37;#5#vUim>}+CP5S)_tL3dbSaReChM^OKVm!^otk#sE` zK{V?!n$6TY0YMEDSN@9?{u?wdLPj5b<}m(kZlB@Vbt~yD&)WeCSh)i5TNIQ~vfu%t z0r%E$G!p}oi6)&%4B%ZuWKFmxQW`*aSxWAHw`wfdEHwTN3=GcTz9l3bMeBOf-hTG9 zQk#eKTBNzDOYDG@q$i|-320Rk0d#>`JPQg329N(cR8Wq8W=G@z83x9TS3TBM`G2fJ z=AM@}(9#;tyR;c2xA!5!L6uBOfG>*P0k3qtsk5Ql_ToBqs&qinwWzdkoi6~prnQTT z6VgR52t2i9fT8JhmK%lO4yJ}tf#Ej$QRPLIt(<-auV*3^x&!S#Y;Th(NUxy(N-F3o zqbau#FDqRKu15JIqD5BH5VYVPX5*Oy$TT{0Lk|Vz{Nv{ z3(^LkZ-(*qSdGO8ilaT@<^#}?9}u1gxdRPpIOOIx|1kmcLRgNa-O&KdtNvvM6drxMY)tR=AAVE?K$pEdM z!rm8i2N=qn76rbQpmlJ^qV`#=>bfS?a@{5im* zzo#}7(MwE0Dw0g84}+{DC?KGs(qv^Cf_S4-h$5sB+^=U(LVigS1wjxTg%AWaADUge z9AIT)xk-vvoSZ|QM^q4i_2R$kskilRUysdzltRphC|yqSHFH-b13>XQAbYbMP%_js zCDOdR9W!?9Fbm+oa2ZWO zMgbT*W#F{OdMHqAc#L+=mR#H0R~7c29QS3K$(CB8OSO(QGYyZfsPCKWP zVQ$7}zySP8PT4Q5y2(xBS!owE=-5zeEiQXs@tM$?d9t040BHTca~4b;-?a6yW!}d1Xr<4v8Gyp{l6#+k5X6>IX0@1=5hlVgXDFf7lkCdqC`Z@u$ub5k&5w8-lqBj93$2T||T*|v_Nkr9xR%bZ1kHJsd#-F5Nh94Qdwrcd2wz#xlS6^rn)en5hXXY z4Ek5^jn;4QSwF;9bX=wKE$Rh*3CpCixZmyh+qGpXtC#+EX)25UfetaPgzr(j#N)-_ zHcki5COh^xR1Y`m?6fx2Rk=*~xXM*!nO5cE!dAH&Zpp1IZbN$#0bY(oS)B`?;zF&H zLvB~&K=(?N0302lTJuM~Uh0nVJ=HF-YgFPKNF<3Cf&K=04gUDeI^+O#Eq+?!;|si9 zA$*(HXr1@KudtCf%Hbp+nfB-z+=li&9qOJs$W>4z8vm)o4o6h>T8++DO%9SzEBiOo z!KfDX$>6`0;-;0fzs}8Ha4?%V*pU%QHoD-qLQOO3i?9o&wndd zcb|S{ov3x#18FD^62VIIPWQ+sO$*a6hJnT9X1SwGCzLOJ*KfZgI|a9i--kdM;PkIH zCqlE68j3WhgN6zeDc4I}h3Ft+`m5@YZ#+9>i2h3uRQo(3Z6z4$eoFm;-(;#=29V$U c$+UIW6k491r28dxXnhk$x{4weDBl>h($ literal 0 HcmV?d00001 From 2bf0c21f7169b16dc8fed90bafc56088fc831d48 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Tue, 11 Jul 2023 14:06:01 +0200 Subject: [PATCH 07/44] fix pm starting position --- carl/envs/dmc/carl_dm_pointmass.py | 10 +++- carl/envs/dmc/dmc_tasks/pointmass.py | 76 ++++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 7 deletions(-) diff --git a/carl/envs/dmc/carl_dm_pointmass.py b/carl/envs/dmc/carl_dm_pointmass.py index b6d8baaa..5e7769cf 100644 --- a/carl/envs/dmc/carl_dm_pointmass.py +++ b/carl/envs/dmc/carl_dm_pointmass.py @@ -8,6 +8,7 @@ from carl.utils.trial_logger import TrialLogger from carl.utils.types import Context, Contexts + DEFAULT_CONTEXT = { "gravity": -9.81, # Gravity is disabled via flag "friction_tangential": 1, # Scaling factor for tangential friction of all geoms (objects) @@ -24,8 +25,8 @@ "wind_y": 0.0, "wind_z": 0.0, "mass": 0.3, - "starting_x": 0.0, - "starting_y": 0.0, + "starting_x": 0.14, + "starting_y": 0.14, "target_x": 0.0, "target_y": 0.0, "area_size": 0.6, @@ -105,3 +106,8 @@ def __init__( context_selector=context_selector, context_selector_kwargs=context_selector_kwargs, ) + + def _update_context(self) -> None: + super()._update_context() + self.env.env.task.starting_x = self.context["starting_x"] + self.env.env.task.starting_y = self.context["starting_y"] diff --git a/carl/envs/dmc/dmc_tasks/pointmass.py b/carl/envs/dmc/dmc_tasks/pointmass.py index f30f3f60..2cea2c9a 100644 --- a/carl/envs/dmc/dmc_tasks/pointmass.py +++ b/carl/envs/dmc/dmc_tasks/pointmass.py @@ -30,6 +30,7 @@ PointMass, get_model_and_assets, ) +from dm_control.mujoco.wrapper import mjbindings from carl.envs.dmc.dmc_tasks.utils import adapt_context # type: ignore from carl.utils.types import Context @@ -43,14 +44,14 @@ def check_constraints( target_y, area_size, ) -> None: - if starting_x >= area_size/2 or starting_y >= area_size/2: + if starting_x >= area_size/4 or starting_y >= area_size/4 or starting_x <= -area_size/4 or starting_y <= -area_size/4: raise ValueError( - f"The starting points are located outside of the grid. Choose a value lower than {area_size/2}." + f"The starting points are located outside of the grid. Choose a value lower than {area_size/4}." ) - if target_x >= area_size/2 or target_y >= area_size/2: + if target_x >= area_size/4 or target_y >= area_size/4 or target_x <= -area_size/4 or target_y <= -area_size/4: raise ValueError( - f"The target points are located outside of the grid. Choose a value lower than {area_size/2}." + f"The target points are located outside of the grid. Choose a value lower than {area_size/4}." ) @@ -126,10 +127,30 @@ def get_pointmass_xml_string( xml_string_bytes = xml_string.encode() return xml_string_bytes +def random_limited_quaternion(random, limit): + """Generates a random quaternion limited to the specified rotations.""" + axis = random.randn(3) + axis /= np.linalg.norm(axis) + angle = random.rand() * limit + + quaternion = np.zeros(4) + mjbindings.mjlib.mju_axisAngle2Quat(quaternion, axis, angle) + + return quaternion class ContextualPointMass(PointMass): + starting_x: float = 0.2 + starting_y: float = 0.2 def initialize_episode(self, physics): - """Don't randomize joint positions in contextual setting.""" + """Sets the state of the environment at the start of each episode. + + If _randomize_gains is True, the relationship between the controls and + the joints is randomized, so that each control actuates a random linear + combination of joints. + + Args: + physics: An instance of `mujoco.Physics`. + """ if self._randomize_gains: dir1 = self.random.randn(2) dir1 /= np.linalg.norm(dir1) @@ -142,6 +163,51 @@ def initialize_episode(self, physics): physics.model.wrap_prm[[0, 1]] = dir1 physics.model.wrap_prm[[2, 3]] = dir2 super().initialize_episode(physics) + self.randomize_limited_and_rotational_joints(physics, self.random) + + def randomize_limited_and_rotational_joints(self, physics, random=None): + random = random or np.random + + hinge = mjbindings.enums.mjtJoint.mjJNT_HINGE + slide = mjbindings.enums.mjtJoint.mjJNT_SLIDE + ball = mjbindings.enums.mjtJoint.mjJNT_BALL + free = mjbindings.enums.mjtJoint.mjJNT_FREE + + qpos = physics.named.data.qpos + + for joint_id in range(physics.model.njnt): + joint_name = physics.model.id2name(joint_id, 'joint') + joint_type = physics.model.jnt_type[joint_id] + is_limited = physics.model.jnt_limited[joint_id] + range_min, range_max = physics.model.jnt_range[joint_id] + + if is_limited: + if joint_type == hinge or joint_type == slide: + if 'root_x' in joint_name: + qpos[joint_name] = self.starting_x + elif 'root_y' in joint_name: + qpos[joint_name] = self.starting_y + else: + qpos[joint_name] = random.uniform(range_min, range_max) + + elif joint_type == ball: + qpos[joint_name] = random_limited_quaternion(random, range_max) + + else: + if joint_type == hinge: + qpos[joint_name] = random.uniform(-np.pi, np.pi) + + elif joint_type == ball: + quat = random.randn(4) + quat /= np.linalg.norm(quat) + qpos[joint_name] = quat + + elif joint_type == free: + # this should be random.randn, but changing it now could significantly + # affect benchmark results. + quat = random.rand(4) + quat /= np.linalg.norm(quat) + qpos[joint_name][3:] = quat @SUITE.add("benchmarking") # type: ignore[misc] From cfa6be5c8394039506a7db73769d9d2665a8bc46 Mon Sep 17 00:00:00 2001 From: benjamc Date: Thu, 10 Aug 2023 10:41:49 +0200 Subject: [PATCH 08/44] Fix carracing demo --- examples/demo_carracing.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/demo_carracing.py b/examples/demo_carracing.py index d412b413..9a87c7d6 100644 --- a/examples/demo_carracing.py +++ b/examples/demo_carracing.py @@ -43,6 +43,7 @@ def register_input(): a[2] = 0 contexts = {i: {"VEHICLE_ID": i} for i in range(len(VEHICLE_NAMES))} + CARLVehicleRacing.render_mode = "human" env = CARLVehicleRacing(contexts=contexts) record_video = False @@ -62,14 +63,14 @@ def register_input(): restart = False while True: register_input() - s, r, done, info = env.step(a) + s, r, truncated, terminated, info = env.step(a) time.sleep(0.025) total_reward += r - if steps % 200 == 0 or done: + if steps % 200 == 0 or terminated or truncated: print("\naction " + str(["{:+0.2f}".format(x) for x in a])) print("step {} total_reward {:+0.2f}".format(steps, total_reward)) steps += 1 - isopen = env.render() - if done or restart or not isopen: + env.render() + if terminated or truncated or restart or not isopen: break env.close() From 46618ee89e84951615c7804754b8cb8235a11e41 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 11:15:07 +0100 Subject: [PATCH 09/44] move goal wrapping to main brax env --- carl/envs/brax/__init__.py | 1 + carl/envs/brax/brax_walker_goal_wrapper.py | 53 +++---- carl/envs/brax/carl_ant.py | 9 +- carl/envs/brax/carl_brax_env.py | 11 ++ carl/envs/brax/carl_halfcheetah.py | 9 +- carl/envs/brax/carl_hopper.py | 9 +- carl/envs/brax/carl_humanoid.py | 9 +- carl/envs/brax/carl_pusher.py | 7 + carl/envs/brax/carl_walker2d.py | 9 +- examples/brax_with_goals.ipynb | 158 +++++++++++++++++++++ 10 files changed, 244 insertions(+), 31 deletions(-) create mode 100644 examples/brax_with_goals.ipynb diff --git a/carl/envs/brax/__init__.py b/carl/envs/brax/__init__.py index ed4b23ed..579f4a03 100644 --- a/carl/envs/brax/__init__.py +++ b/carl/envs/brax/__init__.py @@ -10,6 +10,7 @@ from carl.envs.brax.carl_reacher import CARLBraxReacher from carl.envs.brax.carl_walker2d import CARLBraxWalker2d + __all__ = [ "CARLBraxAnt", "CARLBraxHalfcheetah", diff --git a/carl/envs/brax/brax_walker_goal_wrapper.py b/carl/envs/brax/brax_walker_goal_wrapper.py index d26cb401..c2c53f48 100644 --- a/carl/envs/brax/brax_walker_goal_wrapper.py +++ b/carl/envs/brax/brax_walker_goal_wrapper.py @@ -1,17 +1,34 @@ import gym import numpy as np -from carl.utils.types import Context, Contexts STATE_INDICES = { "CARLAnt": [13, 14], "CARLHumanoid": [22, 23], "CARLHalfcheetah": [14, 15], + "CARLHopper": [5, 6], + "CARLWalker2d": [8, 9], } -def sample_walker_language_goals( - num_contexts, low=5, high=2500, normal=False, mean=25000, std=0.1 -): - directions = [ +DIRECTION_NAMES = { + 1: "north", + 3: "south", + 2: "east", + 4: "west", + 12: "north east", + 32: "south east", + 14: "north west", + 34: "south west", + 112: "north north east", + 332: "south south east", + 114: "north north west", + 334: "south south west", + 212: "east north east", + 232: "east south east", + 414: "west north west", + 434: "west south west", +} + +directions = [ 1, # north 3, # south 2, # east @@ -30,23 +47,6 @@ def sample_walker_language_goals( 434, ] - sampled_contexts: Contexts = {} - - for i in range(num_contexts): - c: Context = {} - c["target_direction"] = np.random.choice(directions) - if normal: - c["target_distance"] = np.round( - min(max(np.random.normal(loc=mean, scale=std * mean), low), high), - decimals=2, - ) - else: - c["target_distance"] = np.round( - np.random.uniform(low=low, high=high), decimals=2 - ) - sampled_contexts[i] = c - return sampled_contexts - class BraxWalkerGoalWrapper(gym.Wrapper): """Adds a positional goal to brax walker envs""" @@ -55,6 +55,8 @@ def __init__(self, env) -> None: if ( self.env.__class__.__name__ == "CARLHumanoid" or self.env.__class__.__name__ == "CARLHalfcheetah" + or self.env.__class__.__name__ == "CARLHopper" + or self.env.__class__.__name__ == "CARLWalker2d" ): self.env._forward_reward_weight = 0 self.position = None @@ -117,7 +119,7 @@ def step(self, action): indices = STATE_INDICES[self.env.__class__.__name__] new_position = np.array(list(self.position)) + np.array( [state[indices[0]], state[indices[1]]] - ) + ) * self.env.env.sys.config.dt current_distance_to_goal = np.linalg.norm(self.goal_position - new_position) previous_distance_to_goal = np.linalg.norm(self.goal_position - self.position) direction_reward = max(0, previous_distance_to_goal - current_distance_to_goal) @@ -129,7 +131,6 @@ def step(self, action): info["success"] = 0 return state, direction_reward, done, info - class BraxLanguageWrapper(gym.Wrapper): """Translates the context features target distance and target radius into language""" @@ -155,8 +156,8 @@ def get_goal_desc(self, context): if "target_radius" in context.keys(): target_distance = context["target_distance"] target_radius = context["target_radius"] - return f"The distance to the goal is {target_distance} steps. Move within {target_radius} steps of the goal." + return f"The distance to the goal is {target_distance}m. Move within {target_radius} steps of the goal." else: target_distance = context["target_distance"] target_direction = context["target_direction"] - return f"Move {target_distance} steps {target_direction}." + return f"Move {target_distance}m {DIRECTION_NAMES[target_direction]}." diff --git a/carl/envs/brax/carl_ant.py b/carl/envs/brax/carl_ant.py index e8bb6d7c..fce1bbda 100644 --- a/carl/envs/brax/carl_ant.py +++ b/carl/envs/brax/carl_ant.py @@ -2,8 +2,9 @@ import numpy as np -from carl.context.context_space import ContextFeature, UniformFloatContextFeature +from carl.context.context_space import ContextFeature, UniformFloatContextFeature, CategoricalContextFeature from carl.envs.brax.carl_brax_env import CARLBraxEnv +from carl.envs.brax.brax_walker_goal_wrapper import directions class CARLBraxAnt(CARLBraxEnv): @@ -31,4 +32,10 @@ def get_context_features() -> dict[str, ContextFeature]: "viscosity": UniformFloatContextFeature( "viscosity", lower=0, upper=np.inf, default_value=0 ), + "target_distance": UniformFloatContextFeature( + "target_distance", lower=0, upper=np.inf, default_value=0 + ), + "target_direction": CategoricalContextFeature( + "target_direction", choices=directions, default_value=1 + ), } diff --git a/carl/envs/brax/carl_brax_env.py b/carl/envs/brax/carl_brax_env.py index b03dfd93..eb10ac06 100644 --- a/carl/envs/brax/carl_brax_env.py +++ b/carl/envs/brax/carl_brax_env.py @@ -14,6 +14,7 @@ from carl.context.selection import AbstractSelector from carl.envs.brax.wrappers import GymWrapper, VectorGymWrapper +from carl.envs.brax.brax_walker_goal_wrapper import BraxWalkerGoalWrapper, BraxLanguageWrapper from carl.envs.carl_env import CARLEnv from carl.utils.types import Contexts @@ -152,6 +153,7 @@ def __init__( obs_context_as_dict: bool = True, context_selector: AbstractSelector | type[AbstractSelector] | None = None, context_selector_kwargs: dict = None, + use_language_goals: bool = False, **kwargs, ) -> None: """ @@ -204,6 +206,15 @@ def __init__( dtype=np.float32, ) + if contexts is not None: + if "target_distance" in contexts[contexts.keys()[0]] or "target_direction" in contexts[contexts.keys()[0]]: + max_diff_dir = max([c["target_direction"]- contexts[contexts.keys()[0]]["target_direction"] for c in contexts.values()]) + max_diff_dist = max([c["target_distance"]- contexts[contexts.keys()[0]]["target_distance"] for c in contexts.values()]) + if max_diff_dir > 0.1 or max_diff_dist > 0.1: + env = BraxWalkerGoalWrapper(env) + if use_language_goals: + env = BraxLanguageWrapper(env, contexts) + super().__init__( env=env, contexts=contexts, diff --git a/carl/envs/brax/carl_halfcheetah.py b/carl/envs/brax/carl_halfcheetah.py index c1a69e46..79126f08 100644 --- a/carl/envs/brax/carl_halfcheetah.py +++ b/carl/envs/brax/carl_halfcheetah.py @@ -2,8 +2,9 @@ import numpy as np -from carl.context.context_space import ContextFeature, UniformFloatContextFeature +from carl.context.context_space import ContextFeature, UniformFloatContextFeature, CategoricalContextFeature from carl.envs.brax.carl_brax_env import CARLBraxEnv +from carl.envs.brax.brax_walker_goal_wrapper import directions class CARLBraxHalfcheetah(CARLBraxEnv): @@ -49,4 +50,10 @@ def get_context_features() -> dict[str, ContextFeature]: "mass_ffoot": UniformFloatContextFeature( "mass_ffoot", lower=1e-6, upper=np.inf, default_value=0.8845188 ), + "target_distance": UniformFloatContextFeature( + "target_distance", lower=0, upper=np.inf, default_value=0 + ), + "target_direction": CategoricalContextFeature( + "target_direction", choices=directions, default_value=0 + ), } diff --git a/carl/envs/brax/carl_hopper.py b/carl/envs/brax/carl_hopper.py index be9c1699..4157d6f9 100644 --- a/carl/envs/brax/carl_hopper.py +++ b/carl/envs/brax/carl_hopper.py @@ -2,8 +2,9 @@ import numpy as np -from carl.context.context_space import ContextFeature, UniformFloatContextFeature +from carl.context.context_space import ContextFeature, UniformFloatContextFeature, CategoricalContextFeature from carl.envs.brax.carl_brax_env import CARLBraxEnv +from carl.envs.brax.brax_walker_goal_wrapper import directions class CARLBraxHopper(CARLBraxEnv): @@ -40,4 +41,10 @@ def get_context_features() -> dict[str, ContextFeature]: "mass_foot": UniformFloatContextFeature( "mass_foot", lower=1e-6, upper=np.inf, default_value=5.3155746 ), + "target_distance": UniformFloatContextFeature( + "target_distance", lower=0, upper=np.inf, default_value=0 + ), + "target_direction": CategoricalContextFeature( + "target_direction", choices=directions, default_value=0 + ), } diff --git a/carl/envs/brax/carl_humanoid.py b/carl/envs/brax/carl_humanoid.py index 27a57146..0950219d 100644 --- a/carl/envs/brax/carl_humanoid.py +++ b/carl/envs/brax/carl_humanoid.py @@ -2,8 +2,9 @@ import numpy as np -from carl.context.context_space import ContextFeature, UniformFloatContextFeature +from carl.context.context_space import ContextFeature, UniformFloatContextFeature, CategoricalContextFeature from carl.envs.brax.carl_brax_env import CARLBraxEnv +from carl.envs.brax.brax_walker_goal_wrapper import directions class CARLBraxHumanoid(CARLBraxEnv): @@ -67,4 +68,10 @@ def get_context_features() -> dict[str, ContextFeature]: "mass_left_lower_arm": UniformFloatContextFeature( "mass_left_lower_arm", lower=1e-6, upper=np.inf, default_value=1.2295402 ), + "target_distance": UniformFloatContextFeature( + "target_distance", lower=0, upper=np.inf, default_value=0 + ), + "target_direction": CategoricalContextFeature( + "target_direction", choices=directions, default_value=0 + ), } diff --git a/carl/envs/brax/carl_pusher.py b/carl/envs/brax/carl_pusher.py index d7de1599..7a16a26c 100644 --- a/carl/envs/brax/carl_pusher.py +++ b/carl/envs/brax/carl_pusher.py @@ -76,4 +76,11 @@ def get_context_features() -> dict[str, ContextFeature]: "mass_object": UniformFloatContextFeature( "mass_object", lower=1e-6, upper=np.inf, default_value=1.8325957e-03 ), + "goal_position_x": UniformFloatContextFeature("goal_position_x", lower=0, upper=np.inf, default_value=0.45), + "goal_position_y": UniformFloatContextFeature("goal_position_y", lower=0, upper=np.inf, default_value=0.05), + "goal_position_z": UniformFloatContextFeature("goal_position_z", lower=0, upper=np.inf, default_value=0.05), } + + def _update_context(self) -> None: + super()._update_context() + self.env._goal_pos = np.array([self.context["goal_position_x"], self.context["goal_position_y"], self.context["goal_position_z"]]) diff --git a/carl/envs/brax/carl_walker2d.py b/carl/envs/brax/carl_walker2d.py index 3aa66b89..1155bbb3 100644 --- a/carl/envs/brax/carl_walker2d.py +++ b/carl/envs/brax/carl_walker2d.py @@ -2,8 +2,9 @@ import numpy as np -from carl.context.context_space import ContextFeature, UniformFloatContextFeature +from carl.context.context_space import ContextFeature, UniformFloatContextFeature, CategoricalContextFeature from carl.envs.brax.carl_brax_env import CARLBraxEnv +from carl.envs.brax.brax_walker_goal_wrapper import directions class CARLBraxWalker2d(CARLBraxEnv): @@ -49,4 +50,10 @@ def get_context_features() -> dict[str, ContextFeature]: "mass_foot_left": UniformFloatContextFeature( "mass_foot_left", lower=1e-6, upper=np.inf, default_value=3.1667254 ), + "target_distance": UniformFloatContextFeature( + "target_distance", lower=0, upper=np.inf, default_value=0 + ), + "target_direction": CategoricalContextFeature( + "target_direction", choices=directions, default_value=0 + ), } diff --git a/examples/brax_with_goals.ipynb b/examples/brax_with_goals.ipynb new file mode 100644 index 00000000..a2eada23 --- /dev/null +++ b/examples/brax_with_goals.ipynb @@ -0,0 +1,158 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/theeimer/Documents/git/CARL/carl/envs/__init__.py:28: UserWarning: Module py4j not found. If you want to use these environments, please follow the installation guide.\n", + " warnings.warn(\n", + "/Users/theeimer/Documents/git/CARL/carl/envs/__init__.py:28: UserWarning: Module distance not found. If you want to use these environments, please follow the installation guide.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "from carl.context.context_space import NormalFloatContextFeature\n", + "from carl.context.sampler import ContextSampler\n", + "from carl.envs import CARLBraxAnt, CARLBraxPusher\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{0: {'gravity': -9.8, 'friction': 1.0, 'elasticity': 0.0, 'ang_damping': -0.05, 'mass_torso': 10.0, 'viscosity': 0.0, 'target_distance': 8.82272212012359, 'target_direction': 11.564052345967665}, 1: {'gravity': -9.8, 'friction': 1.0, 'elasticity': 0.0, 'ang_damping': -0.05, 'mass_torso': 10.0, 'viscosity': 0.0, 'target_distance': 10.75008841752559, 'target_direction': 10.200157208367225}, 2: {'gravity': -9.8, 'friction': 1.0, 'elasticity': 0.0, 'ang_damping': -0.05, 'mass_torso': 10.0, 'viscosity': 0.0, 'target_distance': 9.648642791702303, 'target_direction': 10.77873798410574}, 3: {'gravity': -9.8, 'friction': 1.0, 'elasticity': 0.0, 'ang_damping': -0.05, 'mass_torso': 10.0, 'viscosity': 0.0, 'target_distance': 9.696781148206442, 'target_direction': 12.04089319920146}, 4: {'gravity': -9.8, 'friction': 1.0, 'elasticity': 0.0, 'ang_damping': -0.05, 'mass_torso': 10.0, 'viscosity': 0.0, 'target_distance': 10.210598501938373, 'target_direction': 11.667557990149968}}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/theeimer/Documents/git/CARL/carl/envs/brax/carl_ant.py:26: RuntimeWarning: invalid value encountered in scalar divide\n", + " \"ang_damping\": UniformFloatContextFeature(\n" + ] + } + ], + "source": [ + "seed = 0\n", + "context_distributions = [NormalFloatContextFeature(\"target_distance\", mu=9.8, sigma=1), NormalFloatContextFeature(\"target_direction\", mu=9.8, sigma=1)]\n", + "context_sampler = ContextSampler(\n", + " context_distributions=context_distributions,\n", + " context_space=CARLBraxAnt.get_context_space(),\n", + " seed=seed,\n", + " )\n", + "contexts = context_sampler.sample_contexts(n_contexts=5)\n", + "print(contexts)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'mujoco._structs.MjOption' object has no attribute 'collision'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/Users/theeimer/Documents/git/CARL/examples/brax_with_goals.ipynb Cell 3\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m env \u001b[39m=\u001b[39m CARLBraxAnt(contexts\u001b[39m=\u001b[39;49mcontexts, use_language_goals\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n\u001b[1;32m 2\u001b[0m env\u001b[39m.\u001b[39mreset()\n\u001b[1;32m 3\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mCurrent context ID: \u001b[39m\u001b[39m{\u001b[39;00menv\u001b[39m.\u001b[39mcontext_id\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n", + "File \u001b[0;32m~/Documents/git/CARL/carl/envs/brax/carl_brax_env.py:193\u001b[0m, in \u001b[0;36mCARLBraxEnv.__init__\u001b[0;34m(self, env, batch_size, contexts, obs_context_features, obs_context_as_dict, context_selector, context_selector_kwargs, use_language_goals, **kwargs)\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[39mif\u001b[39;00m env \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 192\u001b[0m bs \u001b[39m=\u001b[39m batch_size \u001b[39mif\u001b[39;00m batch_size \u001b[39m!=\u001b[39m \u001b[39m1\u001b[39m \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m--> 193\u001b[0m env \u001b[39m=\u001b[39m brax\u001b[39m.\u001b[39;49menvs\u001b[39m.\u001b[39;49mcreate(\n\u001b[1;32m 194\u001b[0m env_name\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49menv_name, backend\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mbackend, batch_size\u001b[39m=\u001b[39;49mbs\n\u001b[1;32m 195\u001b[0m )\n\u001b[1;32m 196\u001b[0m \u001b[39m# Brax uses gym instead of gymnasium\u001b[39;00m\n\u001b[1;32m 197\u001b[0m \u001b[39mif\u001b[39;00m batch_size \u001b[39m==\u001b[39m \u001b[39m1\u001b[39m:\n", + "File \u001b[0;32m~/anaconda3/envs/carl/lib/python3.9/site-packages/brax/envs/__init__.py:95\u001b[0m, in \u001b[0;36mcreate\u001b[0;34m(env_name, episode_length, action_repeat, auto_reset, batch_size, **kwargs)\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcreate\u001b[39m(\n\u001b[1;32m 75\u001b[0m env_name: \u001b[39mstr\u001b[39m,\n\u001b[1;32m 76\u001b[0m episode_length: \u001b[39mint\u001b[39m \u001b[39m=\u001b[39m \u001b[39m1000\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs,\n\u001b[1;32m 81\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Env:\n\u001b[1;32m 82\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Creates an environment from the registry.\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \n\u001b[1;32m 84\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[39m env: an environment\u001b[39;00m\n\u001b[1;32m 94\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 95\u001b[0m env \u001b[39m=\u001b[39m _envs[env_name](\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 97\u001b[0m \u001b[39mif\u001b[39;00m episode_length \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 98\u001b[0m env \u001b[39m=\u001b[39m training\u001b[39m.\u001b[39mEpisodeWrapper(env, episode_length, action_repeat)\n", + "File \u001b[0;32m~/anaconda3/envs/carl/lib/python3.9/site-packages/brax/envs/ant.py:161\u001b[0m, in \u001b[0;36mAnt.__init__\u001b[0;34m(self, ctrl_cost_weight, use_contact_forces, contact_cost_weight, healthy_reward, terminate_when_unhealthy, healthy_z_range, contact_force_range, reset_noise_scale, exclude_current_positions_from_observation, backend, **kwargs)\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\n\u001b[1;32m 147\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 148\u001b[0m ctrl_cost_weight\u001b[39m=\u001b[39m\u001b[39m0.5\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs,\n\u001b[1;32m 159\u001b[0m ):\n\u001b[1;32m 160\u001b[0m path \u001b[39m=\u001b[39m epath\u001b[39m.\u001b[39mresource_path(\u001b[39m'\u001b[39m\u001b[39mbrax\u001b[39m\u001b[39m'\u001b[39m) \u001b[39m/\u001b[39m \u001b[39m'\u001b[39m\u001b[39menvs/assets/ant.xml\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m--> 161\u001b[0m sys \u001b[39m=\u001b[39m mjcf\u001b[39m.\u001b[39;49mload(path)\n\u001b[1;32m 163\u001b[0m n_frames \u001b[39m=\u001b[39m \u001b[39m5\u001b[39m\n\u001b[1;32m 165\u001b[0m \u001b[39mif\u001b[39;00m backend \u001b[39min\u001b[39;00m [\u001b[39m'\u001b[39m\u001b[39mspring\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mpositional\u001b[39m\u001b[39m'\u001b[39m]:\n", + "File \u001b[0;32m~/anaconda3/envs/carl/lib/python3.9/site-packages/brax/io/mjcf.py:520\u001b[0m, in \u001b[0;36mload\u001b[0;34m(path)\u001b[0m\n\u001b[1;32m 517\u001b[0m xml \u001b[39m=\u001b[39m ElementTree\u001b[39m.\u001b[39mtostring(elem, encoding\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39municode\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 518\u001b[0m mj \u001b[39m=\u001b[39m mujoco\u001b[39m.\u001b[39mMjModel\u001b[39m.\u001b[39mfrom_xml_string(xml, assets\u001b[39m=\u001b[39massets)\n\u001b[0;32m--> 520\u001b[0m \u001b[39mreturn\u001b[39;00m load_model(mj)\n", + "File \u001b[0;32m~/anaconda3/envs/carl/lib/python3.9/site-packages/brax/io/mjcf.py:254\u001b[0m, in \u001b[0;36mload_model\u001b[0;34m(mj)\u001b[0m\n\u001b[1;32m 252\u001b[0m \u001b[39mif\u001b[39;00m (mj\u001b[39m.\u001b[39mgeom_priority[\u001b[39m0\u001b[39m] \u001b[39m!=\u001b[39m mj\u001b[39m.\u001b[39mgeom_priority)\u001b[39m.\u001b[39many():\n\u001b[1;32m 253\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mNotImplementedError\u001b[39;00m(\u001b[39m'\u001b[39m\u001b[39mgeom_priority parameter not supported.\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m--> 254\u001b[0m \u001b[39mif\u001b[39;00m mj\u001b[39m.\u001b[39;49mopt\u001b[39m.\u001b[39;49mcollision \u001b[39m==\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m 255\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mNotImplementedError\u001b[39;00m(\u001b[39m'\u001b[39m\u001b[39mPredefined collisions not supported.\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 256\u001b[0m q_width \u001b[39m=\u001b[39m {\u001b[39m0\u001b[39m: \u001b[39m7\u001b[39m, \u001b[39m1\u001b[39m: \u001b[39m4\u001b[39m, \u001b[39m2\u001b[39m: \u001b[39m1\u001b[39m, \u001b[39m3\u001b[39m: \u001b[39m1\u001b[39m}\n", + "\u001b[0;31mAttributeError\u001b[0m: 'mujoco._structs.MjOption' object has no attribute 'collision'" + ] + } + ], + "source": [ + "env = CARLBraxAnt(contexts=contexts, use_language_goals=True)\n", + "env.reset()\n", + "print(f\"Current context ID: {env.context_id}\")\n", + "print(f\"Current context: {env.context}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "action = env.action_space.sample()\n", + "state, reward, terminated, truncated, info = env.step(action)\n", + "done = terminated or truncated\n", + "plt.imshow(env.render())\n", + "print(state)\n", + "print(reward)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "context_distributions = [NormalFloatContextFeature(\"goal_position_x\", mu=9.8, sigma=1), NormalFloatContextFeature(\"goal_position_y\", mu=9.8, sigma=1)]\n", + "context_sampler = ContextSampler(\n", + " context_distributions=context_distributions,\n", + " context_space=CARLBraxPusher.get_context_space(),\n", + " seed=seed,\n", + " )\n", + "contexts = context_sampler.sample_contexts(n_contexts=5)\n", + "print(contexts)\n", + "env = CARLBraxPusher(contexts)\n", + "env.reset()\n", + "print(f\"Current context ID: {env.context_id}\")\n", + "print(f\"Current context: {env.context}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "action = env.action_space.sample()\n", + "state, reward, terminated, truncated, info = env.step(action)\n", + "done = terminated or truncated\n", + "plt.imshow(env.render())\n", + "print(state)\n", + "print(reward)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "carl", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From d8eda811585f6dad9de6243e1e0a7b5633d79962 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 11:23:49 +0100 Subject: [PATCH 10/44] pre-commit --- carl/envs/brax/__init__.py | 1 - carl/envs/brax/brax_walker_goal_wrapper.py | 44 ++++++++++++---------- carl/envs/brax/carl_ant.py | 8 +++- carl/envs/brax/carl_brax_env.py | 20 ++++++++-- carl/envs/brax/carl_halfcheetah.py | 8 +++- carl/envs/brax/carl_hopper.py | 8 +++- carl/envs/brax/carl_humanoid.py | 8 +++- carl/envs/brax/carl_pusher.py | 19 +++++++--- carl/envs/brax/carl_walker2d.py | 8 +++- carl/envs/dmc/carl_dm_pointmass.py | 1 - carl/envs/dmc/dmc_tasks/pointmass.py | 39 ++++++++++++------- carl/envs/dmc/loader.py | 2 +- test/test_language_goals.py | 6 +-- 13 files changed, 114 insertions(+), 58 deletions(-) diff --git a/carl/envs/brax/__init__.py b/carl/envs/brax/__init__.py index 579f4a03..ed4b23ed 100644 --- a/carl/envs/brax/__init__.py +++ b/carl/envs/brax/__init__.py @@ -10,7 +10,6 @@ from carl.envs.brax.carl_reacher import CARLBraxReacher from carl.envs.brax.carl_walker2d import CARLBraxWalker2d - __all__ = [ "CARLBraxAnt", "CARLBraxHalfcheetah", diff --git a/carl/envs/brax/brax_walker_goal_wrapper.py b/carl/envs/brax/brax_walker_goal_wrapper.py index c2c53f48..7a50de60 100644 --- a/carl/envs/brax/brax_walker_goal_wrapper.py +++ b/carl/envs/brax/brax_walker_goal_wrapper.py @@ -29,23 +29,24 @@ } directions = [ - 1, # north - 3, # south - 2, # east - 4, # west - 12, - 32, - 14, - 34, - 112, - 332, - 114, - 334, - 212, - 232, - 414, - 434, - ] + 1, # north + 3, # south + 2, # east + 4, # west + 12, + 32, + 14, + 34, + 112, + 332, + 114, + 334, + 212, + 232, + 414, + 434, +] + class BraxWalkerGoalWrapper(gym.Wrapper): """Adds a positional goal to brax walker envs""" @@ -117,9 +118,11 @@ def reset(self, return_info=False): def step(self, action): state, _, done, info = self.env.step(action) indices = STATE_INDICES[self.env.__class__.__name__] - new_position = np.array(list(self.position)) + np.array( - [state[indices[0]], state[indices[1]]] - ) * self.env.env.sys.config.dt + new_position = ( + np.array(list(self.position)) + + np.array([state[indices[0]], state[indices[1]]]) + * self.env.env.sys.config.dt + ) current_distance_to_goal = np.linalg.norm(self.goal_position - new_position) previous_distance_to_goal = np.linalg.norm(self.goal_position - self.position) direction_reward = max(0, previous_distance_to_goal - current_distance_to_goal) @@ -131,6 +134,7 @@ def step(self, action): info["success"] = 0 return state, direction_reward, done, info + class BraxLanguageWrapper(gym.Wrapper): """Translates the context features target distance and target radius into language""" diff --git a/carl/envs/brax/carl_ant.py b/carl/envs/brax/carl_ant.py index fce1bbda..7b7d0a39 100644 --- a/carl/envs/brax/carl_ant.py +++ b/carl/envs/brax/carl_ant.py @@ -2,9 +2,13 @@ import numpy as np -from carl.context.context_space import ContextFeature, UniformFloatContextFeature, CategoricalContextFeature -from carl.envs.brax.carl_brax_env import CARLBraxEnv +from carl.context.context_space import ( + CategoricalContextFeature, + ContextFeature, + UniformFloatContextFeature, +) from carl.envs.brax.brax_walker_goal_wrapper import directions +from carl.envs.brax.carl_brax_env import CARLBraxEnv class CARLBraxAnt(CARLBraxEnv): diff --git a/carl/envs/brax/carl_brax_env.py b/carl/envs/brax/carl_brax_env.py index eb10ac06..264af1a6 100644 --- a/carl/envs/brax/carl_brax_env.py +++ b/carl/envs/brax/carl_brax_env.py @@ -13,8 +13,11 @@ from jax import numpy as jp from carl.context.selection import AbstractSelector +from carl.envs.brax.brax_walker_goal_wrapper import ( + BraxLanguageWrapper, + BraxWalkerGoalWrapper, +) from carl.envs.brax.wrappers import GymWrapper, VectorGymWrapper -from carl.envs.brax.brax_walker_goal_wrapper import BraxWalkerGoalWrapper, BraxLanguageWrapper from carl.envs.carl_env import CARLEnv from carl.utils.types import Contexts @@ -207,9 +210,18 @@ def __init__( ) if contexts is not None: - if "target_distance" in contexts[contexts.keys()[0]] or "target_direction" in contexts[contexts.keys()[0]]: - max_diff_dir = max([c["target_direction"]- contexts[contexts.keys()[0]]["target_direction"] for c in contexts.values()]) - max_diff_dist = max([c["target_distance"]- contexts[contexts.keys()[0]]["target_distance"] for c in contexts.values()]) + if ( + "target_distance" in contexts[contexts.keys()[0]] + or "target_direction" in contexts[contexts.keys()[0]] + ): + base_dir = contexts[contexts.keys()[0]]["target_direction"] + base_dist = contexts[contexts.keys()[0]]["target_distance"] + max_diff_dir = max( + [c["target_direction"] - base_dir for c in contexts.values()] + ) + max_diff_dist = max( + [c["target_distance"] - base_dist for c in contexts.values()] + ) if max_diff_dir > 0.1 or max_diff_dist > 0.1: env = BraxWalkerGoalWrapper(env) if use_language_goals: diff --git a/carl/envs/brax/carl_halfcheetah.py b/carl/envs/brax/carl_halfcheetah.py index 79126f08..171015b1 100644 --- a/carl/envs/brax/carl_halfcheetah.py +++ b/carl/envs/brax/carl_halfcheetah.py @@ -2,9 +2,13 @@ import numpy as np -from carl.context.context_space import ContextFeature, UniformFloatContextFeature, CategoricalContextFeature -from carl.envs.brax.carl_brax_env import CARLBraxEnv +from carl.context.context_space import ( + CategoricalContextFeature, + ContextFeature, + UniformFloatContextFeature, +) from carl.envs.brax.brax_walker_goal_wrapper import directions +from carl.envs.brax.carl_brax_env import CARLBraxEnv class CARLBraxHalfcheetah(CARLBraxEnv): diff --git a/carl/envs/brax/carl_hopper.py b/carl/envs/brax/carl_hopper.py index 4157d6f9..138b580a 100644 --- a/carl/envs/brax/carl_hopper.py +++ b/carl/envs/brax/carl_hopper.py @@ -2,9 +2,13 @@ import numpy as np -from carl.context.context_space import ContextFeature, UniformFloatContextFeature, CategoricalContextFeature -from carl.envs.brax.carl_brax_env import CARLBraxEnv +from carl.context.context_space import ( + CategoricalContextFeature, + ContextFeature, + UniformFloatContextFeature, +) from carl.envs.brax.brax_walker_goal_wrapper import directions +from carl.envs.brax.carl_brax_env import CARLBraxEnv class CARLBraxHopper(CARLBraxEnv): diff --git a/carl/envs/brax/carl_humanoid.py b/carl/envs/brax/carl_humanoid.py index 0950219d..1918b4bc 100644 --- a/carl/envs/brax/carl_humanoid.py +++ b/carl/envs/brax/carl_humanoid.py @@ -2,9 +2,13 @@ import numpy as np -from carl.context.context_space import ContextFeature, UniformFloatContextFeature, CategoricalContextFeature -from carl.envs.brax.carl_brax_env import CARLBraxEnv +from carl.context.context_space import ( + CategoricalContextFeature, + ContextFeature, + UniformFloatContextFeature, +) from carl.envs.brax.brax_walker_goal_wrapper import directions +from carl.envs.brax.carl_brax_env import CARLBraxEnv class CARLBraxHumanoid(CARLBraxEnv): diff --git a/carl/envs/brax/carl_pusher.py b/carl/envs/brax/carl_pusher.py index 7a16a26c..2498795a 100644 --- a/carl/envs/brax/carl_pusher.py +++ b/carl/envs/brax/carl_pusher.py @@ -76,11 +76,20 @@ def get_context_features() -> dict[str, ContextFeature]: "mass_object": UniformFloatContextFeature( "mass_object", lower=1e-6, upper=np.inf, default_value=1.8325957e-03 ), - "goal_position_x": UniformFloatContextFeature("goal_position_x", lower=0, upper=np.inf, default_value=0.45), - "goal_position_y": UniformFloatContextFeature("goal_position_y", lower=0, upper=np.inf, default_value=0.05), - "goal_position_z": UniformFloatContextFeature("goal_position_z", lower=0, upper=np.inf, default_value=0.05), + "goal_position_x": UniformFloatContextFeature( + "goal_position_x", lower=0, upper=np.inf, default_value=0.45 + ), + "goal_position_y": UniformFloatContextFeature( + "goal_position_y", lower=0, upper=np.inf, default_value=0.05 + ), + "goal_position_z": UniformFloatContextFeature( + "goal_position_z", lower=0, upper=np.inf, default_value=0.05 + ), } - + def _update_context(self) -> None: super()._update_context() - self.env._goal_pos = np.array([self.context["goal_position_x"], self.context["goal_position_y"], self.context["goal_position_z"]]) + goal_x = self.context["goal_position_x"] + goal_y = self.context["goal_position_y"] + goal_z = self.context["goal_position_z"] + self.env._goal_pos = np.array([goal_x, goal_y, goal_z]) diff --git a/carl/envs/brax/carl_walker2d.py b/carl/envs/brax/carl_walker2d.py index 1155bbb3..7d4f92ca 100644 --- a/carl/envs/brax/carl_walker2d.py +++ b/carl/envs/brax/carl_walker2d.py @@ -2,9 +2,13 @@ import numpy as np -from carl.context.context_space import ContextFeature, UniformFloatContextFeature, CategoricalContextFeature -from carl.envs.brax.carl_brax_env import CARLBraxEnv +from carl.context.context_space import ( + CategoricalContextFeature, + ContextFeature, + UniformFloatContextFeature, +) from carl.envs.brax.brax_walker_goal_wrapper import directions +from carl.envs.brax.carl_brax_env import CARLBraxEnv class CARLBraxWalker2d(CARLBraxEnv): diff --git a/carl/envs/dmc/carl_dm_pointmass.py b/carl/envs/dmc/carl_dm_pointmass.py index 5e7769cf..b9d64c2c 100644 --- a/carl/envs/dmc/carl_dm_pointmass.py +++ b/carl/envs/dmc/carl_dm_pointmass.py @@ -8,7 +8,6 @@ from carl.utils.trial_logger import TrialLogger from carl.utils.types import Context, Contexts - DEFAULT_CONTEXT = { "gravity": -9.81, # Gravity is disabled via flag "friction_tangential": 1, # Scaling factor for tangential friction of all geoms (objects) diff --git a/carl/envs/dmc/dmc_tasks/pointmass.py b/carl/envs/dmc/dmc_tasks/pointmass.py index 2cea2c9a..83a29bc5 100644 --- a/carl/envs/dmc/dmc_tasks/pointmass.py +++ b/carl/envs/dmc/dmc_tasks/pointmass.py @@ -22,6 +22,7 @@ from multiprocessing.sharedctypes import Value import numpy as np +from dm_control.mujoco.wrapper import mjbindings from dm_control.rl import control # type: ignore from dm_control.suite.point_mass import ( # type: ignore _DEFAULT_TIME_LIMIT, @@ -30,7 +31,6 @@ PointMass, get_model_and_assets, ) -from dm_control.mujoco.wrapper import mjbindings from carl.envs.dmc.dmc_tasks.utils import adapt_context # type: ignore from carl.utils.types import Context @@ -44,12 +44,22 @@ def check_constraints( target_y, area_size, ) -> None: - if starting_x >= area_size/4 or starting_y >= area_size/4 or starting_x <= -area_size/4 or starting_y <= -area_size/4: + if ( + starting_x >= area_size / 4 + or starting_y >= area_size / 4 + or starting_x <= -area_size / 4 + or starting_y <= -area_size / 4 + ): raise ValueError( f"The starting points are located outside of the grid. Choose a value lower than {area_size/4}." ) - if target_x >= area_size/4 or target_y >= area_size/4 or target_x <= -area_size/4 or target_y <= -area_size/4: + if ( + target_x >= area_size / 4 + or target_y >= area_size / 4 + or target_x <= -area_size / 4 + or target_y <= -area_size / 4 + ): raise ValueError( f"The target points are located outside of the grid. Choose a value lower than {area_size/4}." ) @@ -127,20 +137,23 @@ def get_pointmass_xml_string( xml_string_bytes = xml_string.encode() return xml_string_bytes + def random_limited_quaternion(random, limit): - """Generates a random quaternion limited to the specified rotations.""" - axis = random.randn(3) - axis /= np.linalg.norm(axis) - angle = random.rand() * limit + """Generates a random quaternion limited to the specified rotations.""" + axis = random.randn(3) + axis /= np.linalg.norm(axis) + angle = random.rand() * limit - quaternion = np.zeros(4) - mjbindings.mjlib.mju_axisAngle2Quat(quaternion, axis, angle) + quaternion = np.zeros(4) + mjbindings.mjlib.mju_axisAngle2Quat(quaternion, axis, angle) + + return quaternion - return quaternion class ContextualPointMass(PointMass): starting_x: float = 0.2 starting_y: float = 0.2 + def initialize_episode(self, physics): """Sets the state of the environment at the start of each episode. @@ -176,16 +189,16 @@ def randomize_limited_and_rotational_joints(self, physics, random=None): qpos = physics.named.data.qpos for joint_id in range(physics.model.njnt): - joint_name = physics.model.id2name(joint_id, 'joint') + joint_name = physics.model.id2name(joint_id, "joint") joint_type = physics.model.jnt_type[joint_id] is_limited = physics.model.jnt_limited[joint_id] range_min, range_max = physics.model.jnt_range[joint_id] if is_limited: if joint_type == hinge or joint_type == slide: - if 'root_x' in joint_name: + if "root_x" in joint_name: qpos[joint_name] = self.starting_x - elif 'root_y' in joint_name: + elif "root_y" in joint_name: qpos[joint_name] = self.starting_y else: qpos[joint_name] = random.uniform(range_min, range_max) diff --git a/carl/envs/dmc/loader.py b/carl/envs/dmc/loader.py index 3fc50404..709ab568 100644 --- a/carl/envs/dmc/loader.py +++ b/carl/envs/dmc/loader.py @@ -8,9 +8,9 @@ from carl.envs.dmc.dmc_tasks import ( # type: ignore [import] # noqa: F401 finger, fish, + pointmass, quadruped, walker, - pointmass, ) from carl.utils.types import Context diff --git a/test/test_language_goals.py b/test/test_language_goals.py index d568f0a7..c6afb379 100644 --- a/test/test_language_goals.py +++ b/test/test_language_goals.py @@ -1,11 +1,11 @@ import unittest from carl.envs.brax import ( + BraxLanguageWrapper, + BraxWalkerGoalWrapper, CARLAnt, - CARLHalfcheetah, CARLFetch, - BraxWalkerGoalWrapper, - BraxLanguageWrapper, + CARLHalfcheetah, sample_walker_language_goals, ) From 9aa51f43eb0d78b851ee36bca64adac57097152d Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 11:29:21 +0100 Subject: [PATCH 11/44] fix: test workflows --- .github/workflows/tests.yaml | 43 +++++------------------------------- 1 file changed, 5 insertions(+), 38 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 26a20757..3a0df0f3 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -21,10 +21,6 @@ on: - main - development - schedule: - # Every day at 7AM UTC - - cron: '0 07 * * *' - env: # Arguments used for pytest @@ -47,32 +43,17 @@ jobs: strategy: fail-fast: false matrix: - os: [windows-latest, macos-latest, ubuntu-latest] - python-version: ['3.9', '3.10'] - kind: ['conda', 'source', 'dist'] - - exclude: - # Exclude all configurations *-*-dist, but include one later in `include` - - kind: 'dist' - - # Exclude windows as bash commands wont work in windows runner - - os: windows-latest - - # Exclude macos as there are permission errors using conda as we do - - os: macos-latest + os: [ubuntu-latest] + python-version: ['3.9', '3.10', '3.11'] + kind: ['conda'] include: # Add the tag code-cov to ubuntu-3.7-source - os: ubuntu-latest python-version: 3.9 - kind: 'source' + kind: 'conda' code-cov: true - # Include one config with dist, ubuntu-3.7-dist - - os: ubuntu-latest - python-version: 3.9 - kind: 'dist' - steps: - name: Checkout @@ -91,21 +72,7 @@ jobs: # Miniconda is available in $CONDA env var $CONDA/bin/conda create -n testenv --yes pip wheel gxx_linux-64 gcc_linux-64 python=${{ matrix.python-version }} $CONDA/envs/testenv/bin/python3 -m pip install --upgrade pip - $CONDA/envs/testenv/bin/pip3 install -e ".[dev,box2d,brax,dm_control,mario]" - - - name: Source install - if: matrix.kind == 'source' - run: | - python -m pip install --upgrade pip - pip install -e ".[dev,box2d,brax,dm_control,mario]" - - - name: Dist install - if: matrix.kind == 'dist' - run: | - python -m pip install --upgrade pip - python setup.py sdist - last_dist=$(ls -t dist/carl-*.tar.gz | head -n 1) - pip install $last_dist[dev,box2d,brax,dm_control,mario] + $CONDA/envs/testenv/bin/pip3 install -e .[dev,dm_control] - name: Tests timeout-minutes: 60 From 86358011b3d42e2b4ce51c0d0630ee23cf12805d Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 11:47:16 +0100 Subject: [PATCH 12/44] mario dependencies in test workflow --- .github/workflows/tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 3a0df0f3..a3ee330f 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -72,7 +72,7 @@ jobs: # Miniconda is available in $CONDA env var $CONDA/bin/conda create -n testenv --yes pip wheel gxx_linux-64 gcc_linux-64 python=${{ matrix.python-version }} $CONDA/envs/testenv/bin/python3 -m pip install --upgrade pip - $CONDA/envs/testenv/bin/pip3 install -e .[dev,dm_control] + $CONDA/envs/testenv/bin/pip3 install -e .[dev,dm_control,mario,brax] - name: Tests timeout-minutes: 60 From c214c3cbf682543a3a9e22961100cf6aa0da0e66 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 12:28:45 +0100 Subject: [PATCH 13/44] add gymnasium registration --- carl/__init__.py | 55 +++++++++++++++++++ carl/envs/__init__.py | 19 ++++++- carl/envs/brax/carl_ant.py | 1 + carl/envs/brax/carl_halfcheetah.py | 1 + carl/envs/brax/carl_hopper.py | 1 + carl/envs/brax/carl_humanoid.py | 1 + carl/envs/brax/carl_humanoidstandup.py | 1 + .../brax/carl_inverted_double_pendulum.py | 1 + carl/envs/brax/carl_inverted_pendulum.py | 1 + carl/envs/brax/carl_pusher.py | 1 + carl/envs/brax/carl_reacher.py | 1 + carl/envs/brax/carl_walker2d.py | 1 + carl/envs/dmc/carl_dm_finger.py | 1 + carl/envs/dmc/carl_dm_fish.py | 1 + carl/envs/dmc/carl_dm_quadruped.py | 1 + carl/envs/dmc/carl_dm_walker.py | 1 + .../gymnasium/box2d/carl_bipedal_walker.py | 1 + carl/envs/gymnasium/box2d/carl_lunarlander.py | 1 + .../gymnasium/box2d/carl_vehicle_racing.py | 1 + .../gymnasium/classic_control/carl_acrobot.py | 1 + .../classic_control/carl_cartpole.py | 1 + .../classic_control/carl_mountaincar.py | 1 + .../carl_mountaincarcontinuous.py | 1 + .../classic_control/carl_pendulum.py | 1 + carl/envs/mario/carl_mario.py | 5 ++ test/test_gymnasium_envs.py | 22 +++++++- 26 files changed, 119 insertions(+), 4 deletions(-) diff --git a/carl/__init__.py b/carl/__init__.py index f1391313..53595d97 100644 --- a/carl/__init__.py +++ b/carl/__init__.py @@ -4,6 +4,12 @@ import datetime +import importlib.util as iutil +import warnings + +from gymnasium.envs.registration import register + +from carl import envs name = "CARL" package_name = "carl-bench" @@ -20,3 +26,52 @@ Copyright {datetime.date.today().strftime('%Y')}, AutoML.org Freiburg-Hannover """ version = __version__ + + +for e in envs.gymnasium.classic_control.__all__: + register( + id=f"carl/{e}-v0", + entry_point=f"carl.envs.gymnasium.classic_control:{e}", + ) + + +def check_spec(spec_name: str) -> bool: + """Check if the spec is installed + + Parameters + ---------- + spec_name : str + Name of package that is necessary for the environment suite. + + Returns + ------- + bool + Whether the spec was found. + """ + spec = iutil.find_spec(spec_name) + found = spec is not None + if not found: + with warnings.catch_warnings(): + warnings.simplefilter("once") + warnings.warn( + f"""Module {spec_name} not found. If you want to use these environments, + please follow the installation guide.""" + ) + return found + + +# Environment loading +found = check_spec("Box2D") +if found: + for e in envs.gymnasium.box2d.__all__: + register( + id=f"carl/{e}-v0", + entry_point=f"carl.envs.gymnasium.box2d:{e}", + ) + +found = check_spec("py4j") +if found: + register( + id="carl/CARLMario-v0", + entry_point="carl.envs.gymnasium:CARLMarioEnv", + ) diff --git a/carl/envs/__init__.py b/carl/envs/__init__.py index 7d34ffc1..b03f906c 100644 --- a/carl/envs/__init__.py +++ b/carl/envs/__init__.py @@ -4,8 +4,11 @@ import warnings # Classic control is in gym and thus necessary for the base version to run +from carl import envs from carl.envs.gymnasium import * +__all__ = envs.gymnasium.__all__ + def check_spec(spec_name: str) -> bool: """Check if the spec is installed @@ -36,18 +39,28 @@ def check_spec(spec_name: str) -> bool: if found: from carl.envs.gymnasium.box2d import * + __all__ += envs.gymnasium.box2d.__all__ + found = check_spec("brax") if found: from carl.envs.brax import * + __all__ += envs.brax.__all__ + found = check_spec("py4j") if found: from carl.envs.mario import * + __all__ += envs.mario.__all__ + found = check_spec("dm_control") if found: from carl.envs.dmc import * -# found = check_spec("distance") -# if found: -# from carl.envs.rna import * + __all__ += envs.dmc.__all__ + +found = check_spec("distance") +if found: + from carl.envs.rna import * + + __all__ += envs.rna.__all__ diff --git a/carl/envs/brax/carl_ant.py b/carl/envs/brax/carl_ant.py index e8bb6d7c..5f80816f 100644 --- a/carl/envs/brax/carl_ant.py +++ b/carl/envs/brax/carl_ant.py @@ -9,6 +9,7 @@ class CARLBraxAnt(CARLBraxEnv): env_name: str = "ant" asset_path: str = "envs/assets/ant.xml" + metadata = {"render_modes": []} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/brax/carl_halfcheetah.py b/carl/envs/brax/carl_halfcheetah.py index c1a69e46..a3c0d8fe 100644 --- a/carl/envs/brax/carl_halfcheetah.py +++ b/carl/envs/brax/carl_halfcheetah.py @@ -9,6 +9,7 @@ class CARLBraxHalfcheetah(CARLBraxEnv): env_name: str = "halfcheetah" asset_path: str = "envs/assets/half_cheetah.xml" + metadata = {"render_modes": []} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/brax/carl_hopper.py b/carl/envs/brax/carl_hopper.py index be9c1699..4b877eb6 100644 --- a/carl/envs/brax/carl_hopper.py +++ b/carl/envs/brax/carl_hopper.py @@ -9,6 +9,7 @@ class CARLBraxHopper(CARLBraxEnv): env_name: str = "hopper" asset_path: str = "envs/assets/hopper.xml" + metadata = {"render_modes": []} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/brax/carl_humanoid.py b/carl/envs/brax/carl_humanoid.py index 27a57146..f9c1353a 100644 --- a/carl/envs/brax/carl_humanoid.py +++ b/carl/envs/brax/carl_humanoid.py @@ -9,6 +9,7 @@ class CARLBraxHumanoid(CARLBraxEnv): env_name: str = "humanoid" asset_path: str = "envs/assets/humanoid.xml" + metadata = {"render_modes": []} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/brax/carl_humanoidstandup.py b/carl/envs/brax/carl_humanoidstandup.py index 7edb6ef6..1d923bbd 100644 --- a/carl/envs/brax/carl_humanoidstandup.py +++ b/carl/envs/brax/carl_humanoidstandup.py @@ -9,6 +9,7 @@ class CARLBraxHumanoidStandup(CARLBraxEnv): env_name: str = "humanoidstandup" asset_path: str = "envs/assets/humanoidstandup.xml" + metadata = {"render_modes": []} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/brax/carl_inverted_double_pendulum.py b/carl/envs/brax/carl_inverted_double_pendulum.py index 07976e49..ea467ae0 100644 --- a/carl/envs/brax/carl_inverted_double_pendulum.py +++ b/carl/envs/brax/carl_inverted_double_pendulum.py @@ -9,6 +9,7 @@ class CARLBraxInvertedDoublePendulum(CARLBraxEnv): env_name: str = "inverted_double_pendulum" asset_path: str = "envs/assets/inverted_double_pendulum.xml" + metadata = {"render_modes": []} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/brax/carl_inverted_pendulum.py b/carl/envs/brax/carl_inverted_pendulum.py index 280d81f5..831330c6 100644 --- a/carl/envs/brax/carl_inverted_pendulum.py +++ b/carl/envs/brax/carl_inverted_pendulum.py @@ -9,6 +9,7 @@ class CARLBraxInvertedPendulum(CARLBraxEnv): env_name: str = "inverted_pendulum" asset_path: str = "envs/assets/inverted_pendulum.xml" + metadata = {"render_modes": []} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/brax/carl_pusher.py b/carl/envs/brax/carl_pusher.py index d7de1599..e2055efa 100644 --- a/carl/envs/brax/carl_pusher.py +++ b/carl/envs/brax/carl_pusher.py @@ -9,6 +9,7 @@ class CARLBraxPusher(CARLBraxEnv): env_name: str = "pusher" asset_path: str = "envs/assets/pusher.xml" + metadata = {"render_modes": []} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/brax/carl_reacher.py b/carl/envs/brax/carl_reacher.py index a6d75b62..5d35d68d 100644 --- a/carl/envs/brax/carl_reacher.py +++ b/carl/envs/brax/carl_reacher.py @@ -9,6 +9,7 @@ class CARLBraxReacher(CARLBraxEnv): env_name: str = "reacher" asset_path: str = "envs/assets/reacher.xml" + metadata = {"render_modes": []} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/brax/carl_walker2d.py b/carl/envs/brax/carl_walker2d.py index 3aa66b89..6fc7953f 100644 --- a/carl/envs/brax/carl_walker2d.py +++ b/carl/envs/brax/carl_walker2d.py @@ -9,6 +9,7 @@ class CARLBraxWalker2d(CARLBraxEnv): env_name: str = "walker2d" asset_path: str = "envs/assets/walker2d.xml" + metadata = {"render_modes": []} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/dmc/carl_dm_finger.py b/carl/envs/dmc/carl_dm_finger.py index b2604fec..88be38e2 100644 --- a/carl/envs/dmc/carl_dm_finger.py +++ b/carl/envs/dmc/carl_dm_finger.py @@ -7,6 +7,7 @@ class CARLDmcFingerEnv(CARLDmcEnv): domain = "finger" task = "spin_context" + metadata = {"render_modes": []} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/dmc/carl_dm_fish.py b/carl/envs/dmc/carl_dm_fish.py index 619364a8..31e077c6 100644 --- a/carl/envs/dmc/carl_dm_fish.py +++ b/carl/envs/dmc/carl_dm_fish.py @@ -7,6 +7,7 @@ class CARLDmcFishEnv(CARLDmcEnv): domain = "fish" task = "swim_context" + metadata = {"render_modes": []} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/dmc/carl_dm_quadruped.py b/carl/envs/dmc/carl_dm_quadruped.py index 8d1fbdd2..3e00d5f8 100644 --- a/carl/envs/dmc/carl_dm_quadruped.py +++ b/carl/envs/dmc/carl_dm_quadruped.py @@ -7,6 +7,7 @@ class CARLDmcQuadrupedEnv(CARLDmcEnv): domain = "quadruped" task = "walk_context" + metadata = {"render_modes": []} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/dmc/carl_dm_walker.py b/carl/envs/dmc/carl_dm_walker.py index 97b6d9b1..b16cdd70 100644 --- a/carl/envs/dmc/carl_dm_walker.py +++ b/carl/envs/dmc/carl_dm_walker.py @@ -7,6 +7,7 @@ class CARLDmcWalkerEnv(CARLDmcEnv): domain = "walker" task = "walk_context" + metadata = {"render_modes": []} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/gymnasium/box2d/carl_bipedal_walker.py b/carl/envs/gymnasium/box2d/carl_bipedal_walker.py index 2ea9f4dc..a36cd62c 100644 --- a/carl/envs/gymnasium/box2d/carl_bipedal_walker.py +++ b/carl/envs/gymnasium/box2d/carl_bipedal_walker.py @@ -14,6 +14,7 @@ class CARLBipedalWalker(CARLGymnasiumEnv): env_name: str = "BipedalWalker-v3" + metadata = {"render.modes": ["human", "rgb_array"]} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/gymnasium/box2d/carl_lunarlander.py b/carl/envs/gymnasium/box2d/carl_lunarlander.py index 9f3c59f5..52dfcfda 100644 --- a/carl/envs/gymnasium/box2d/carl_lunarlander.py +++ b/carl/envs/gymnasium/box2d/carl_lunarlander.py @@ -14,6 +14,7 @@ class CARLLunarLander(CARLGymnasiumEnv): env_name: str = "LunarLander-v2" + metadata = {"render.modes": ["human", "rgb_array"]} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/gymnasium/box2d/carl_vehicle_racing.py b/carl/envs/gymnasium/box2d/carl_vehicle_racing.py index dcf61d75..0db60a60 100644 --- a/carl/envs/gymnasium/box2d/carl_vehicle_racing.py +++ b/carl/envs/gymnasium/box2d/carl_vehicle_racing.py @@ -209,6 +209,7 @@ def render_if_min(value, points, color): class CARLVehicleRacing(CARLGymnasiumEnv): env_name: str = "CustomCarRacing-v2" + metadata = {"render.modes": ["human", "rgb_array"]} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/gymnasium/classic_control/carl_acrobot.py b/carl/envs/gymnasium/classic_control/carl_acrobot.py index d81a7534..01d66b9a 100644 --- a/carl/envs/gymnasium/classic_control/carl_acrobot.py +++ b/carl/envs/gymnasium/classic_control/carl_acrobot.py @@ -10,6 +10,7 @@ class CARLAcrobot(CARLGymnasiumEnv): env_name: str = "Acrobot-v1" + metadata = {"render.modes": ["human", "rgb_array"]} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/gymnasium/classic_control/carl_cartpole.py b/carl/envs/gymnasium/classic_control/carl_cartpole.py index ff8c7a31..93e44b9f 100644 --- a/carl/envs/gymnasium/classic_control/carl_cartpole.py +++ b/carl/envs/gymnasium/classic_control/carl_cartpole.py @@ -10,6 +10,7 @@ class CARLCartPole(CARLGymnasiumEnv): env_name: str = "CartPole-v1" + metadata = {"render.modes": ["human", "rgb_array"]} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/gymnasium/classic_control/carl_mountaincar.py b/carl/envs/gymnasium/classic_control/carl_mountaincar.py index dcde2e77..2ea59621 100644 --- a/carl/envs/gymnasium/classic_control/carl_mountaincar.py +++ b/carl/envs/gymnasium/classic_control/carl_mountaincar.py @@ -10,6 +10,7 @@ class CARLMountainCar(CARLGymnasiumEnv): env_name: str = "MountainCar-v0" + metadata = {"render.modes": ["human", "rgb_array"]} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/gymnasium/classic_control/carl_mountaincarcontinuous.py b/carl/envs/gymnasium/classic_control/carl_mountaincarcontinuous.py index 155823b2..3aeab6d9 100644 --- a/carl/envs/gymnasium/classic_control/carl_mountaincarcontinuous.py +++ b/carl/envs/gymnasium/classic_control/carl_mountaincarcontinuous.py @@ -10,6 +10,7 @@ class CARLMountainCarContinuous(CARLGymnasiumEnv): env_name: str = "MountainCarContinuous-v0" + metadata = {"render.modes": ["human", "rgb_array"]} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/gymnasium/classic_control/carl_pendulum.py b/carl/envs/gymnasium/classic_control/carl_pendulum.py index 4148886a..10226dd8 100644 --- a/carl/envs/gymnasium/classic_control/carl_pendulum.py +++ b/carl/envs/gymnasium/classic_control/carl_pendulum.py @@ -10,6 +10,7 @@ class CARLPendulum(CARLGymnasiumEnv): env_name: str = "Pendulum-v1" + metadata = {"render_modes": ["human", "rgb_array"]} @staticmethod def get_context_features() -> dict[str, ContextFeature]: diff --git a/carl/envs/mario/carl_mario.py b/carl/envs/mario/carl_mario.py index 75fae6ca..2b4c5d09 100644 --- a/carl/envs/mario/carl_mario.py +++ b/carl/envs/mario/carl_mario.py @@ -22,6 +22,11 @@ class CARLMarioEnv(CARLEnv): + metadata = { + "render_modes": ["rgb_array", "tiny_rgb_array"], + "render_fps": 24, + } + def __init__( self, env: MarioEnv = None, diff --git a/test/test_gymnasium_envs.py b/test/test_gymnasium_envs.py index 5be182c2..5e4d0c66 100644 --- a/test/test_gymnasium_envs.py +++ b/test/test_gymnasium_envs.py @@ -1,6 +1,9 @@ import inspect import unittest +import gymnasium as gym + +import carl import carl.envs.gymnasium @@ -21,5 +24,22 @@ def test_envs(self): raise e +class TestGymnasiumRegistration(unittest.TestCase): + def test_registration(self): + registered_envs = gym.envs.registration.registry.keys() + for e in carl.envs.__all__: + if "RNA" not in e and "Brax" not in e and "Dmc" not in e: + env_name = f"carl/{e}-v0" + self.assertTrue(env_name in registered_envs) + + def test_make(self): + for e in carl.envs.__all__: + if "RNA" not in e and "Brax" not in e and "Dmc" not in e: + print(e) + env_name = f"carl/{e}-v0" + env = gym.make(env_name) + self.assertTrue(isinstance(env, gym.Env)) + + if __name__ == "__main__": - TestGymnasiumEnvs().test_envs() + unittest.main() From 2e62f6855affee7e9c40a92572e5f4156e7e8acf Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 12:39:30 +0100 Subject: [PATCH 14/44] fix: avoid gym registration in installation --- carl/__init__.py | 87 +++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 42 deletions(-) diff --git a/carl/__init__.py b/carl/__init__.py index 53595d97..02ea9cb3 100644 --- a/carl/__init__.py +++ b/carl/__init__.py @@ -7,10 +7,6 @@ import importlib.util as iutil import warnings -from gymnasium.envs.registration import register - -from carl import envs - name = "CARL" package_name = "carl-bench" author = __author__ @@ -27,51 +23,58 @@ """ version = __version__ +try: + from gymnasium.envs.registration import register -for e in envs.gymnasium.classic_control.__all__: - register( - id=f"carl/{e}-v0", - entry_point=f"carl.envs.gymnasium.classic_control:{e}", - ) + from carl import envs + + for e in envs.gymnasium.classic_control.__all__: + register( + id=f"carl/{e}-v0", + entry_point=f"carl.envs.gymnasium.classic_control:{e}", + ) + def check_spec(spec_name: str) -> bool: + """Check if the spec is installed -def check_spec(spec_name: str) -> bool: - """Check if the spec is installed + Parameters + ---------- + spec_name : str + Name of package that is necessary for the environment suite. - Parameters - ---------- - spec_name : str - Name of package that is necessary for the environment suite. + Returns + ------- + bool + Whether the spec was found. + """ + spec = iutil.find_spec(spec_name) + found = spec is not None + if not found: + with warnings.catch_warnings(): + warnings.simplefilter("once") + warnings.warn( + f"""Module {spec_name} not found. If you want to use these environments, + please follow the installation guide.""" + ) + return found - Returns - ------- - bool - Whether the spec was found. - """ - spec = iutil.find_spec(spec_name) - found = spec is not None - if not found: - with warnings.catch_warnings(): - warnings.simplefilter("once") - warnings.warn( - f"""Module {spec_name} not found. If you want to use these environments, - please follow the installation guide.""" + # Environment loading + found = check_spec("Box2D") + if found: + for e in envs.gymnasium.box2d.__all__: + register( + id=f"carl/{e}-v0", + entry_point=f"carl.envs.gymnasium.box2d:{e}", ) - return found - -# Environment loading -found = check_spec("Box2D") -if found: - for e in envs.gymnasium.box2d.__all__: + found = check_spec("py4j") + if found: register( - id=f"carl/{e}-v0", - entry_point=f"carl.envs.gymnasium.box2d:{e}", + id="carl/CARLMario-v0", + entry_point="carl.envs.gymnasium:CARLMarioEnv", ) - -found = check_spec("py4j") -if found: - register( - id="carl/CARLMario-v0", - entry_point="carl.envs.gymnasium:CARLMarioEnv", +except: + print( + """Gym registration failed - this is normal during installation. + After that, please check that gymnasium is installed correctly.""" ) From fac01d2af5fe8b381de69a24b365733969b2e390 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 12:44:48 +0100 Subject: [PATCH 15/44] add coverage makro to makefile --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 995451d5..ee8e8b60 100644 --- a/Makefile +++ b/Makefile @@ -73,6 +73,9 @@ format: format-black format-isort test: $(PYTEST) test +test-cov: + $(PYTEST) test --cov=carl --cov-report=html:test_coverage_carl + clean-doc: $(MAKE) -C ${DOCDIR} clean From 18a8ca70162ad599343e11d55193dd5ae1c210c1 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 12:45:39 +0100 Subject: [PATCH 16/44] remove superflous tool configs --- .codecov.yml | 46 ---------------------------------------------- .readthedocs.yaml | 22 ---------------------- 2 files changed, 68 deletions(-) delete mode 100644 .codecov.yml delete mode 100644 .readthedocs.yaml diff --git a/.codecov.yml b/.codecov.yml deleted file mode 100644 index a92352ef..00000000 --- a/.codecov.yml +++ /dev/null @@ -1,46 +0,0 @@ -#see https://github.com/codecov/support/wiki/Codecov-Yaml -codecov: - require_ci_to_pass: yes - -coverage: - - # 2 = xx.xx%, 0 = xx% - precision: 2 - - # https://docs.codecov.com/docs/commit-status - status: - - # We want our total main project to always remain above 87% coverage, a - # drop of 0.20% is allowed. It should fail if coverage couldn't be uploaded - # of the CI fails otherwise - project: - default: - target: 10% - threshold: 0.20% - if_not_found: failure - if_ci_failed: error - - # The code changed by a PR should have 90% coverage. This is different from the - # overall number shown above. - # This encourages small PR's as they are easier to test. - patch: - default: - target: 10% - if_not_found: failure - if_ci_failed: failure - -# We upload additional information on branching with pytest-cov `--cov-branch` -# This information can be used by codecov.com to increase analysis of code -parsers: - gcov: - branch_detection: - conditional: true - loop: true - method: true - macro: false - - -comment: - layout: diff, reach - behavior: default - require_changes: false diff --git a/.readthedocs.yaml b/.readthedocs.yaml deleted file mode 100644 index 98fba55a..00000000 --- a/.readthedocs.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# use version 2, which is now recommended -version: 2 - -build: - os: ubuntu-20.04 - tools: - python: "3.9" - -# Build from the docs/ directory with Sphinx -sphinx: - configuration: docs/conf.py - -# build all -formats: all - -# Explicitly set the version of Python and its requirements -python: - install: - - method: pip - path: . - extra_requirements: - - docs From 6b7b00cd0b7eff9881bebef148f4e39c6f076568 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 12:47:40 +0100 Subject: [PATCH 17/44] fix gymnasium tests worflow --- .github/workflows/tests.yaml | 2 +- carl/__init__.py | 2 +- test/test_gymnasium_envs.py | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index a3ee330f..c63da07a 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -72,7 +72,7 @@ jobs: # Miniconda is available in $CONDA env var $CONDA/bin/conda create -n testenv --yes pip wheel gxx_linux-64 gcc_linux-64 python=${{ matrix.python-version }} $CONDA/envs/testenv/bin/python3 -m pip install --upgrade pip - $CONDA/envs/testenv/bin/pip3 install -e .[dev,dm_control,mario,brax] + $CONDA/envs/testenv/bin/pip3 install -e .[dev,dm_control,mario,brax,box2d] - name: Tests timeout-minutes: 60 diff --git a/carl/__init__.py b/carl/__init__.py index 02ea9cb3..2961528e 100644 --- a/carl/__init__.py +++ b/carl/__init__.py @@ -70,7 +70,7 @@ def check_spec(spec_name: str) -> bool: found = check_spec("py4j") if found: register( - id="carl/CARLMario-v0", + id="carl/CARLMarioEnv-v0", entry_point="carl.envs.gymnasium:CARLMarioEnv", ) except: diff --git a/test/test_gymnasium_envs.py b/test/test_gymnasium_envs.py index 5e4d0c66..e2c6ddd0 100644 --- a/test/test_gymnasium_envs.py +++ b/test/test_gymnasium_envs.py @@ -15,7 +15,6 @@ def test_envs(self): if inspect.isclass(env_obj) and "CARL" in env_name: try: env_obj.get_context_features() - env = env_obj() env._progress_instance() env._update_context() @@ -35,7 +34,6 @@ def test_registration(self): def test_make(self): for e in carl.envs.__all__: if "RNA" not in e and "Brax" not in e and "Dmc" not in e: - print(e) env_name = f"carl/{e}-v0" env = gym.make(env_name) self.assertTrue(isinstance(env, gym.Env)) From d96c7885ba5f46244ea55b05cdace25916298619 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 12:53:10 +0100 Subject: [PATCH 18/44] disable code coverage bot(?) --- .github/workflows/tests.yaml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index c63da07a..4649c1ef 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -90,10 +90,3 @@ jobs: else $PYTHON -m pytest ${{ env.pytest-args }} --ignore=test/local_only test fi - - - name: Upload coverage - if: matrix.code-cov && always() - uses: codecov/codecov-action@v2 - with: - fail_ci_if_error: true - verbose: true From add396d07ec63974333cbd3ca78dcc9fcb35054a Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 12:54:43 +0100 Subject: [PATCH 19/44] fix error in mario registry --- carl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/carl/__init__.py b/carl/__init__.py index 2961528e..b2d3d700 100644 --- a/carl/__init__.py +++ b/carl/__init__.py @@ -71,7 +71,7 @@ def check_spec(spec_name: str) -> bool: if found: register( id="carl/CARLMarioEnv-v0", - entry_point="carl.envs.gymnasium:CARLMarioEnv", + entry_point="carl.envs.mario:CARLMarioEnv", ) except: print( From 15e0314bb38bbd95a74879db435af1997964ff07 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 12:56:44 +0100 Subject: [PATCH 20/44] Add templates for PRs/issues --- .github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md | 37 ++++++++++++++++++ .../PULL_REQUEST_TEMPLATE.md | 39 +++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md create mode 100644 .github/PULL_REQUEST_TEMPLATE/PULL_REQUEST_TEMPLATE.md diff --git a/.github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md new file mode 100644 index 00000000..b79752ce --- /dev/null +++ b/.github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md @@ -0,0 +1,37 @@ +--- +name: Issue Template +about: General template issues +labels: + +--- + +* {{ cookiecutter.project_name }} version: +* Python version: +* Operating System: + + + + +#### Description + + +#### Steps/Code to Reproduce + + +#### Expected Results + + +#### Actual Results + + +#### Additional Info + +- Did you try upgrading to the most current version? yes/no +- Are you using a supported operating system (version)? yes/no +- How did you install this package (e.g. GitHub, pip, etc.)? + + \ No newline at end of file diff --git a/.github/PULL_REQUEST_TEMPLATE/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..6556baad --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,39 @@ + + +#### Reference Issues/PRs + + +#### What does this implement/fix? Explain your changes. + + + +#### Checklist + +- Are the tests passing locally? yes/no +- Is the pre-commit passing locally? yes/no +- Are all new features documented in code and docs? yes/no +- Are all examples still running? yes/no +- Are the requirements up to date? yes/no +- Did you add yourself to the contributors in the authors file? yes/no + +#### Any other comments? + + \ No newline at end of file From 6354fb0bc8535f23aa4ea330e367ece321cd995d Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 13:03:05 +0100 Subject: [PATCH 21/44] change to cff for citation --- CITATION.bib | 14 -------------- CITATION.cff | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 14 deletions(-) delete mode 100644 CITATION.bib create mode 100644 CITATION.cff diff --git a/CITATION.bib b/CITATION.bib deleted file mode 100644 index f6a2b599..00000000 --- a/CITATION.bib +++ /dev/null @@ -1,14 +0,0 @@ -@inproceedings { BenEim2023a, - author = {Carolin Benjamins and - Theresa Eimer and - Frederik Schubert and - Aditya Mohan and - Sebastian Döhler and - André Biedenkapp and - Bodo Rosenhahn and - Frank Hutter and - Marius Lindauer}, - title = {Contextualize Me - The Case for Context in Reinforcement Learning}, - journal = {Transactions on Machine Learning Research}, - year = {2023}, -} diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..825e9c10 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,39 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite our paper:" + +url: "https://automl.github.io/CARL/" +repository-code: "https://github.com/automl/CARL" + +authors: + - family-names: "Benjamins" + given-names: "Carolin" + affiliation: "Leibniz University Hannover, Germany" + - family-names: "Eimer" + given-names: "Theresa" + affiliation: "Leibniz University Hannover, Germany" + - family-names: "Schubert" + given-names: "Frederik" + affiliation: "Leibniz University Hannover, Germany" + - family-names: "Mohan" + given-names: "Aditya" + affiliation: "Leibniz University Hannover, Germany" + - family-names: "Döhler" + given-names: "Sebastian" + affiliation: "Leibniz University Hannover, Germany" + - family-names: "Biedenkapp" + given-names: "André" + affiliation: "Albert-Ludwigs University Freiburg, Germany" + - family-names: "Rosenhahn" + given-names: "Bodo" + affiliation: "Leibniz University Hannover, Germany" + - family-names: "Hutter" + given-names: "Frank" + affiliation: "Albert-Ludwigs University Freiburg, Germany" + - family-names: "Lindauer" + given-names: "Marius" + affiliation: "Leibniz University Hannover, Germany" + +type: "article" +title: "Contextualize Me - The Case for Context in Reinforcement Learning" +year: 2023 +journal: "Transactions on Machine Learning Research" \ No newline at end of file From 0f87044266a72bc34c11637a84cca2b865c737a9 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 13:14:32 +0100 Subject: [PATCH 22/44] fix citation file --- CITATION.cff | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 825e9c10..509cd5f7 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -3,6 +3,7 @@ message: "If you use this software, please cite our paper:" url: "https://automl.github.io/CARL/" repository-code: "https://github.com/automl/CARL" +title: "CARL - Context Adaptive Reinforcement Learning" authors: - family-names: "Benjamins" @@ -33,7 +34,36 @@ authors: given-names: "Marius" affiliation: "Leibniz University Hannover, Germany" -type: "article" -title: "Contextualize Me - The Case for Context in Reinforcement Learning" -year: 2023 -journal: "Transactions on Machine Learning Research" \ No newline at end of file +preferred-citation: + type: "article" + title: "Contextualize Me - The Case for Context in Reinforcement Learning" + year: 2023 + journal: "Transactions on Machine Learning Research" + authors: + - family-names: "Benjamins" + given-names: "Carolin" + affiliation: "Leibniz University Hannover, Germany" + - family-names: "Eimer" + given-names: "Theresa" + affiliation: "Leibniz University Hannover, Germany" + - family-names: "Schubert" + given-names: "Frederik" + affiliation: "Leibniz University Hannover, Germany" + - family-names: "Mohan" + given-names: "Aditya" + affiliation: "Leibniz University Hannover, Germany" + - family-names: "Döhler" + given-names: "Sebastian" + affiliation: "Leibniz University Hannover, Germany" + - family-names: "Biedenkapp" + given-names: "André" + affiliation: "Albert-Ludwigs University Freiburg, Germany" + - family-names: "Rosenhahn" + given-names: "Bodo" + affiliation: "Leibniz University Hannover, Germany" + - family-names: "Hutter" + given-names: "Frank" + affiliation: "Albert-Ludwigs University Freiburg, Germany" + - family-names: "Lindauer" + given-names: "Marius" + affiliation: "Leibniz University Hannover, Germany" \ No newline at end of file From 1a72211390b84d20571dba2dd626f87f92facaab Mon Sep 17 00:00:00 2001 From: amsks Date: Mon, 11 Dec 2023 12:08:19 +0100 Subject: [PATCH 23/44] enhancement: tests added fr Brax, Box2D, Context Bounds --- test/test_box2d_envs.py | 26 ++++++++++++++++ test/test_brax_env.py | 27 ++++++++++++++++ test/test_context_bounds.py | 62 +++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) create mode 100644 test/test_box2d_envs.py create mode 100644 test/test_brax_env.py create mode 100644 test/test_context_bounds.py diff --git a/test/test_box2d_envs.py b/test/test_box2d_envs.py new file mode 100644 index 00000000..2cbabe51 --- /dev/null +++ b/test/test_box2d_envs.py @@ -0,0 +1,26 @@ +import inspect +import unittest + +import carl.envs.gymnasium + + +class TestBox2DEnvs(unittest.TestCase): + def test_envs(self): + envs = inspect.getmembers(carl.envs.gymnasium.box2d) + + for env_name, env_obj in envs: + if inspect.isclass(env_obj) and "CARL" in env_name: + try: + env_obj.get_context_features() + + env = env_obj() + env._progress_instance() + env._update_context() + env.reset() + except Exception as e: + print(f"Cannot instantiate {env_name} environment.") + raise e + + +if __name__ == "__main__": + TestBox2DEnvs().test_envs() diff --git a/test/test_brax_env.py b/test/test_brax_env.py new file mode 100644 index 00000000..16f36de4 --- /dev/null +++ b/test/test_brax_env.py @@ -0,0 +1,27 @@ +import inspect +import unittest + +import carl.envs.gymnasium + + +class TestBraxEnvs(unittest.TestCase): + def test_envs(self): + envs = inspect.getmembers(carl.envs.brax) + + for env_name, env_obj in envs: + if inspect.isclass(env_obj) and "CARL" in env_name: + try: + env_obj.get_context_features() + + env = env_obj() + env._progress_instance() + env._update_context() + env.reset() + + except Exception as e: + print(f"Cannot instantiate {env_name} environment.") + raise e + + +if __name__ == "__main__": + TestBraxEnvs().test_envs() diff --git a/test/test_context_bounds.py b/test/test_context_bounds.py new file mode 100644 index 00000000..c7ef8561 --- /dev/null +++ b/test/test_context_bounds.py @@ -0,0 +1,62 @@ +import unittest + +from carl.context.utils import get_context_bounds +import numpy as np + + +class TestContextBounds(unittest.TestCase): + def test_context_bounds(self): + DEFAULT_CONTEXT = { + "min_position": -1.2, # unit? + "max_position": 0.6, # unit? + "max_speed": 0.07, # unit? + "goal_position": 0.5, # unit? + "goal_velocity": 0, # unit? + "force": 0.001, # unit? + "gravity": 0.0025, # unit? + "min_position_start": -0.6, + "max_position_start": -0.4, + "min_velocity_start": 0.0, + "max_velocity_start": 0.0, + } + + CONTEXT_BOUNDS = { + "min_position": (-np.inf, np.inf, float), + "max_position": (-np.inf, np.inf, float), + "max_speed": (0, np.inf, float), + "goal_position": (-np.inf, np.inf, float), + "goal_velocity": (-np.inf, np.inf, float), + "force": (-np.inf, np.inf, float), + "gravity": (0, np.inf, float), + "min_position_start": (-np.inf, np.inf, float), + "max_position_start": (-np.inf, np.inf, float), + "min_velocity_start": (-np.inf, np.inf, float), + "max_velocity_start": (-np.inf, np.inf, float), + } + + lower, upper = get_context_bounds(list(DEFAULT_CONTEXT.keys()), CONTEXT_BOUNDS) + + self.assertEqual( + lower.all(), + np.array( + [ + -np.inf, + -np.inf, + 0.0, + -np.inf, + -np.inf, + -np.inf, + 0.0, + -np.inf, + -np.inf, + -np.inf, + -np.inf, + ] + ).all(), + ) + + self.assertEqual(upper.all(), np.array([np.inf] * upper.shape[0]).all()) + + +if __name__ == "__main__": + TestContextBounds.test_context_bounds() From 4a15397b51a0701afc557d0f754e4e7e177ad644 Mon Sep 17 00:00:00 2001 From: amsks Date: Mon, 11 Dec 2023 13:28:23 +0100 Subject: [PATCH 24/44] enhancement: test for search space encoding --- test/test_search_space_encoding.py | 40 ++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 test/test_search_space_encoding.py diff --git a/test/test_search_space_encoding.py b/test/test_search_space_encoding.py new file mode 100644 index 00000000..5ce06175 --- /dev/null +++ b/test/test_search_space_encoding.py @@ -0,0 +1,40 @@ +import unittest + +from carl.context.search_space_encoding import search_space_to_config_space +from ConfigSpace import ConfigurationSpace +from omegaconf import DictConfig + + +class TestSearchSpacEncoding(unittest.TestCase): + def setUp(self): + self.test_space = None + self.test_space = ConfigurationSpace( + name="myspace", + space={ + "uniform_integer": (1, 10), + "uniform_float": (1.0, 10.0), + "categorical": ["a", "b", "c"], + "constant": 1337, + }, + ) + return super().setUp() + + def test_config_spaces(self): + try: + search_space_to_config_space(self.test_space) + except Exception as e: + print(f"Cannot encode search space -- {self.test_space}.") + raise e + + def test_dict_configs(self): + try: + dict_space = DictConfig({"hyperparameters": {}}) + + search_space_to_config_space(dict_space) + except Exception as e: + print(f"Cannot encode search space -- {dict_space}.") + raise e + + +if __name__ == "__main__": + unittest.main() From fb22b257b6f0e4e40aedb8de3c1f9e81e6008ab9 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 13:42:04 +0100 Subject: [PATCH 25/44] fully move dmc to gymnasium --- carl/__init__.py | 9 ++++++++- carl/envs/dmc/wrappers.py | 4 ++-- test/test_gymnasium_envs.py | 4 ++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/carl/__init__.py b/carl/__init__.py index b2d3d700..fa63768c 100644 --- a/carl/__init__.py +++ b/carl/__init__.py @@ -58,7 +58,6 @@ def check_spec(spec_name: str) -> bool: ) return found - # Environment loading found = check_spec("Box2D") if found: for e in envs.gymnasium.box2d.__all__: @@ -67,6 +66,14 @@ def check_spec(spec_name: str) -> bool: entry_point=f"carl.envs.gymnasium.box2d:{e}", ) + found = check_spec("dm_control") + if found: + for e in envs.dmc.__all__: + register( + id=f"carl/{e}-v0", + entry_point=f"carl.envs.dmc:{e}", + ) + found = check_spec("py4j") if found: register( diff --git a/carl/envs/dmc/wrappers.py b/carl/envs/dmc/wrappers.py index 7ac9a059..665e5c26 100644 --- a/carl/envs/dmc/wrappers.py +++ b/carl/envs/dmc/wrappers.py @@ -1,10 +1,10 @@ from typing import Any, Optional, Tuple, TypeVar, Union import dm_env # type: ignore -import gym +import gymnasium as gym import numpy as np from dm_env import StepType -from gym import spaces +from gymnasium import spaces ObsType = TypeVar("ObsType") ActType = TypeVar("ActType") diff --git a/test/test_gymnasium_envs.py b/test/test_gymnasium_envs.py index e2c6ddd0..246f002e 100644 --- a/test/test_gymnasium_envs.py +++ b/test/test_gymnasium_envs.py @@ -27,13 +27,13 @@ class TestGymnasiumRegistration(unittest.TestCase): def test_registration(self): registered_envs = gym.envs.registration.registry.keys() for e in carl.envs.__all__: - if "RNA" not in e and "Brax" not in e and "Dmc" not in e: + if "RNA" not in e and "Brax" not in e: env_name = f"carl/{e}-v0" self.assertTrue(env_name in registered_envs) def test_make(self): for e in carl.envs.__all__: - if "RNA" not in e and "Brax" not in e and "Dmc" not in e: + if "RNA" not in e and "Brax" not in e: env_name = f"carl/{e}-v0" env = gym.make(env_name) self.assertTrue(isinstance(env, gym.Env)) From 0b433046a742e272c49849a0afd33425911fc21f Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 13:46:15 +0100 Subject: [PATCH 26/44] pre-commit fixes --- test/test_context_bounds.py | 3 ++- test/test_search_space_encoding.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_context_bounds.py b/test/test_context_bounds.py index c7ef8561..6c022b8d 100644 --- a/test/test_context_bounds.py +++ b/test/test_context_bounds.py @@ -1,8 +1,9 @@ import unittest -from carl.context.utils import get_context_bounds import numpy as np +from carl.context.utils import get_context_bounds + class TestContextBounds(unittest.TestCase): def test_context_bounds(self): diff --git a/test/test_search_space_encoding.py b/test/test_search_space_encoding.py index 5ce06175..e89d0cc5 100644 --- a/test/test_search_space_encoding.py +++ b/test/test_search_space_encoding.py @@ -1,9 +1,10 @@ import unittest -from carl.context.search_space_encoding import search_space_to_config_space from ConfigSpace import ConfigurationSpace from omegaconf import DictConfig +from carl.context.search_space_encoding import search_space_to_config_space + class TestSearchSpacEncoding(unittest.TestCase): def setUp(self): From 1ba15be817cd5e49a41d3b140449f655b2dd5c47 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 14:00:04 +0100 Subject: [PATCH 27/44] add sb3 example --- examples/carl_with_sb3.py | 38 ++++++++++++++++++++++++++++++++++++++ setup.py | 3 +++ 2 files changed, 41 insertions(+) create mode 100644 examples/carl_with_sb3.py diff --git a/examples/carl_with_sb3.py b/examples/carl_with_sb3.py new file mode 100644 index 00000000..44992c1b --- /dev/null +++ b/examples/carl_with_sb3.py @@ -0,0 +1,38 @@ +import carl +import gymnasium as gym +from gymnasium.wrappers import FlattenObservation +from stable_baselines3 import DQN +from stable_baselines3.common.evaluation import evaluate_policy + +from carl.envs import CARLLunarLander +from carl.context.context_space import NormalFloatContextFeature +from carl.context.sampler import ContextSampler + +# Create environment +context_distributions = [NormalFloatContextFeature("GRAVITY_X", mu=9.8, sigma=1)] +context_sampler = ContextSampler( + context_distributions=context_distributions, + context_space=CARLLunarLander.get_context_space(), + seed=42, + ) +contexts = context_sampler.sample_contexts(n_contexts=5) + +print("Training contexts are:") +print(contexts) + +env = gym.make("carl/CARLLunarLander-v0", render_mode="rgb_array", contexts=contexts) +env = FlattenObservation(env) + +# Instantiate the agent +model = DQN("MlpPolicy", env, verbose=1) +# Train the agent and display a progress bar +model.learn(total_timesteps=int(2e4), progress_bar=True) +mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10) + +# Enjoy trained agent +vec_env = model.get_env() +obs = vec_env.reset() +for i in range(1000): + action, _states = model.predict(obs, deterministic=True) + obs, rewards, dones, info = vec_env.step(action) + vec_env.render("human") \ No newline at end of file diff --git a/setup.py b/setup.py index d1f48f8b..ba5712e8 100644 --- a/setup.py +++ b/setup.py @@ -56,6 +56,9 @@ def read_file(filepath: str) -> str: "sphinx-autoapi>=1.8.4", "automl-sphinx-theme>=0.1.9", ], + "examples": [ + "stable-baselines3", + ] } setuptools.setup( From 09ae3ce02ca12e3654e8b5fc12b33c9887abaa49 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 14:03:27 +0100 Subject: [PATCH 28/44] fix pre-commit --- examples/carl_with_sb3.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/carl_with_sb3.py b/examples/carl_with_sb3.py index 44992c1b..01109ef4 100644 --- a/examples/carl_with_sb3.py +++ b/examples/carl_with_sb3.py @@ -11,10 +11,10 @@ # Create environment context_distributions = [NormalFloatContextFeature("GRAVITY_X", mu=9.8, sigma=1)] context_sampler = ContextSampler( - context_distributions=context_distributions, - context_space=CARLLunarLander.get_context_space(), - seed=42, - ) + context_distributions=context_distributions, + context_space=CARLLunarLander.get_context_space(), + seed=42, +) contexts = context_sampler.sample_contexts(n_contexts=5) print("Training contexts are:") @@ -35,4 +35,4 @@ for i in range(1000): action, _states = model.predict(obs, deterministic=True) obs, rewards, dones, info = vec_env.step(action) - vec_env.render("human") \ No newline at end of file + vec_env.render("human") From a55850876849e80eb406d3dd284a368f6a32c85d Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 14:04:32 +0100 Subject: [PATCH 29/44] added examples to formatting --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ee8e8b60..474acc1b 100644 --- a/Makefile +++ b/Makefile @@ -63,7 +63,7 @@ pre-commit: $(PRECOMMIT) run --all-files format-black: - $(BLACK) carl test + $(BLACK) carl test examples format-isort: $(ISORT) carl test From 36f2c3b913304e5202b4b99e9c4ed0b1f49a9175 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 15:14:37 +0100 Subject: [PATCH 30/44] cov reporting makro --- Makefile | 4 ++-- pyproject.toml | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 474acc1b..6c5fce1c 100644 --- a/Makefile +++ b/Makefile @@ -73,8 +73,8 @@ format: format-black format-isort test: $(PYTEST) test -test-cov: - $(PYTEST) test --cov=carl --cov-report=html:test_coverage_carl +cov-report: + coverage html -d coverage_html clean-doc: $(MAKE) -C ${DOCDIR} clean diff --git a/pyproject.toml b/pyproject.toml index 588d7984..9d01dce0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,11 +4,18 @@ [tool.pytest.ini_options] testpaths = ["test"] minversion = "3.9" -addopts = "--cov=carl" +addopts="--cov=carl" [tool.coverage.run] branch = true -context = "carl" +include = ["carl/*"] +omit = [ + "*/mario/pcg_smb_env/*", + "*/rna/*", + "*/utils/doc_building/*", + "*/mario/models/*", + "__init__.py" +] [tool.coverage.report] show_missing = true @@ -19,6 +26,13 @@ exclude_lines = [ "raise NotImplementedError", "if TYPE_CHECKING" ] +omit = [ + "*/mario/pcg_smb_env/*", + "*/rna/*", + "*/utils/doc_building/*", + "*/mario/models/*", + "__init__.py" +] [tool.black] target-version = ['py39'] From 4eca8011166a65a61eb093d661e6e15bdbb86078 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 15:34:47 +0100 Subject: [PATCH 31/44] some more tests --- carl/context/context_space.py | 2 +- carl/envs/gymnasium/carl_gymnasium_env.py | 6 +-- test/test_all_envs.py | 1 + test/test_context_sampler.py | 15 +++++++ test/test_context_space.py | 19 +++++++++ test/test_search_space_encoding.py | 50 ++++++++++++++++++++--- 6 files changed, 83 insertions(+), 10 deletions(-) diff --git a/carl/context/context_space.py b/carl/context/context_space.py index 4409a370..8a8cf5ce 100644 --- a/carl/context/context_space.py +++ b/carl/context/context_space.py @@ -219,7 +219,7 @@ def sample_contexts( contexts = [] for _ in range(size): - context = {cf.name: cf.sample() for cf in self.context_space.values()} + context = {cf.name: cf.rvs() for cf in self.context_space.values()} context = self.insert_defaults(context, context_keys) contexts += [context] diff --git a/carl/envs/gymnasium/carl_gymnasium_env.py b/carl/envs/gymnasium/carl_gymnasium_env.py index 856e79b7..ca7c773e 100644 --- a/carl/envs/gymnasium/carl_gymnasium_env.py +++ b/carl/envs/gymnasium/carl_gymnasium_env.py @@ -10,10 +10,10 @@ try: pygame.display.init() -except: - import os +except: # pragma: no cover + import os # pragma: no cover - os.environ["SDL_VIDEODRIVER"] = "dummy" + os.environ["SDL_VIDEODRIVER"] = "dummy" # pragma: no cover class CARLGymnasiumEnv(CARLEnv): diff --git a/test/test_all_envs.py b/test/test_all_envs.py index 141b06b4..68f57ccf 100644 --- a/test/test_all_envs.py +++ b/test/test_all_envs.py @@ -16,6 +16,7 @@ def test_init_all_envs(self): env = ( # noqa: F841 local variable is assigned to but never used var() ) + env.reset() except Exception as e: print(f"Cannot instantiate {var} environment.") raise e diff --git a/test/test_context_sampler.py b/test/test_context_sampler.py index 111255ec..0bad9b2f 100644 --- a/test/test_context_sampler.py +++ b/test/test_context_sampler.py @@ -1,4 +1,5 @@ import unittest +from omegaconf import DictConfig from carl.context.context_space import ( ContextSpace, @@ -44,11 +45,25 @@ def test_init(self): name="TestSampler", ) + with self.assertRaises(ValueError): + ContextSampler( + context_distributions=0, + context_space=self.cspace, + seed=0, + name="TestSampler", + ) + def test_sample_contexts(self): contexts = self.sampler.sample_contexts(n_contexts=3) self.assertEqual(len(contexts), 3) self.assertEqual(contexts[0]["gravity"], 9.8) + contexts = self.sampler.sample_contexts(n_contexts=1) + self.assertEqual(len(contexts), 1) + self.assertEqual(contexts[0]["gravity"], 9.8) + + + if __name__ == "__main__": unittest.main() diff --git a/test/test_context_space.py b/test/test_context_space.py index 00478679..e8c60a65 100644 --- a/test/test_context_space.py +++ b/test/test_context_space.py @@ -88,6 +88,25 @@ def test_verify_context(self): is_valid = self.context_space.verify_context(context) self.assertEqual(is_valid, False) + def test_sample(self): + context = self.context_space.sample_contexts(["gravity"], size=1) + is_valid = self.context_space.verify_context(context) + self.assertEqual(is_valid, True) + + contexts = self.context_space.sample_contexts(["gravity"], size=10) + self.assertTrue(len(contexts) == 10) + for context in contexts: + is_valid = self.context_space.verify_context(context) + self.assertEqual(is_valid, True) + + contexts = self.context_space.sample_contexts(None, size=10) + self.assertTrue(len(contexts) == 10) + for context in contexts: + is_valid = self.context_space.verify_context(context) + self.assertEqual(is_valid, True) + + with self.assertRaises(ValueError): + self.context_space.sample_contexts(["false_feature"], size=0) if __name__ == "__main__": unittest.main() diff --git a/test/test_search_space_encoding.py b/test/test_search_space_encoding.py index e89d0cc5..55acd258 100644 --- a/test/test_search_space_encoding.py +++ b/test/test_search_space_encoding.py @@ -6,20 +6,58 @@ from carl.context.search_space_encoding import search_space_to_config_space +dict_space = { + "uniform_integer": (1, 10), + "uniform_float": (1.0, 10.0), + "categorical": ["a", "b", "c"], + "constant": 1337, + } + +dict_space_2 = { "hyperparameters": [ + {"name": "x0", + "type": "uniform_float", + "log": False, + "lower": -512.0, + "upper": 512.0, + "default": -3.0}, + {"name": "x1", + "type": "uniform_float", + "log": False, + "lower": -512.0, + "upper": 512.0, + "default": -4.0}], + "conditions": [], + "forbiddens": [], + "python_module_version": "0.4.17", + "json_format_version": 0.2} + +str_space = """{ + "uniform_integer": (1, 10), + "uniform_float": (1.0, 10.0), + "categorical": ["a", "b", "c"], + "constant": 1337, + }""" + class TestSearchSpacEncoding(unittest.TestCase): def setUp(self): self.test_space = None self.test_space = ConfigurationSpace( name="myspace", - space={ - "uniform_integer": (1, 10), - "uniform_float": (1.0, 10.0), - "categorical": ["a", "b", "c"], - "constant": 1337, - }, + space=dict_space ) return super().setUp() + def test_init(self): + self.test_space = ConfigurationSpace( + name="myspace", + space=dict_space_2 + ) + + self.test_space = ConfigurationSpace( + name="myspace", + space=str_space + ) + def test_config_spaces(self): try: search_space_to_config_space(self.test_space) From 897f373f547c74e9f3f37614ad506d5f2c97ff81 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Mon, 11 Dec 2023 15:38:04 +0100 Subject: [PATCH 32/44] fix pre-commit --- carl/envs/gymnasium/carl_gymnasium_env.py | 6 +-- test/test_context_sampler.py | 5 +- test/test_context_space.py | 1 + test/test_search_space_encoding.py | 62 +++++++++++------------ 4 files changed, 35 insertions(+), 39 deletions(-) diff --git a/carl/envs/gymnasium/carl_gymnasium_env.py b/carl/envs/gymnasium/carl_gymnasium_env.py index ca7c773e..fb96d6d8 100644 --- a/carl/envs/gymnasium/carl_gymnasium_env.py +++ b/carl/envs/gymnasium/carl_gymnasium_env.py @@ -10,10 +10,10 @@ try: pygame.display.init() -except: # pragma: no cover - import os # pragma: no cover +except: # pragma: no cover + import os # pragma: no cover - os.environ["SDL_VIDEODRIVER"] = "dummy" # pragma: no cover + os.environ["SDL_VIDEODRIVER"] = "dummy" # pragma: no cover class CARLGymnasiumEnv(CARLEnv): diff --git a/test/test_context_sampler.py b/test/test_context_sampler.py index 0bad9b2f..7a1b73ac 100644 --- a/test/test_context_sampler.py +++ b/test/test_context_sampler.py @@ -1,5 +1,4 @@ import unittest -from omegaconf import DictConfig from carl.context.context_space import ( ContextSpace, @@ -47,7 +46,7 @@ def test_init(self): with self.assertRaises(ValueError): ContextSampler( - context_distributions=0, + context_distributions=0, context_space=self.cspace, seed=0, name="TestSampler", @@ -63,7 +62,5 @@ def test_sample_contexts(self): self.assertEqual(contexts[0]["gravity"], 9.8) - - if __name__ == "__main__": unittest.main() diff --git a/test/test_context_space.py b/test/test_context_space.py index e8c60a65..10c8c953 100644 --- a/test/test_context_space.py +++ b/test/test_context_space.py @@ -108,5 +108,6 @@ def test_sample(self): with self.assertRaises(ValueError): self.context_space.sample_contexts(["false_feature"], size=0) + if __name__ == "__main__": unittest.main() diff --git a/test/test_search_space_encoding.py b/test/test_search_space_encoding.py index 55acd258..42a0489d 100644 --- a/test/test_search_space_encoding.py +++ b/test/test_search_space_encoding.py @@ -5,31 +5,37 @@ from carl.context.search_space_encoding import search_space_to_config_space - dict_space = { - "uniform_integer": (1, 10), - "uniform_float": (1.0, 10.0), - "categorical": ["a", "b", "c"], - "constant": 1337, - } + "uniform_integer": (1, 10), + "uniform_float": (1.0, 10.0), + "categorical": ["a", "b", "c"], + "constant": 1337, +} -dict_space_2 = { "hyperparameters": [ - {"name": "x0", - "type": "uniform_float", - "log": False, - "lower": -512.0, - "upper": 512.0, - "default": -3.0}, - {"name": "x1", - "type": "uniform_float", - "log": False, - "lower": -512.0, - "upper": 512.0, - "default": -4.0}], +dict_space_2 = { + "hyperparameters": [ + { + "name": "x0", + "type": "uniform_float", + "log": False, + "lower": -512.0, + "upper": 512.0, + "default": -3.0, + }, + { + "name": "x1", + "type": "uniform_float", + "log": False, + "lower": -512.0, + "upper": 512.0, + "default": -4.0, + }, + ], "conditions": [], "forbiddens": [], "python_module_version": "0.4.17", - "json_format_version": 0.2} + "json_format_version": 0.2, +} str_space = """{ "uniform_integer": (1, 10), @@ -38,25 +44,17 @@ "constant": 1337, }""" + class TestSearchSpacEncoding(unittest.TestCase): def setUp(self): self.test_space = None - self.test_space = ConfigurationSpace( - name="myspace", - space=dict_space - ) + self.test_space = ConfigurationSpace(name="myspace", space=dict_space) return super().setUp() def test_init(self): - self.test_space = ConfigurationSpace( - name="myspace", - space=dict_space_2 - ) + self.test_space = ConfigurationSpace(name="myspace", space=dict_space_2) - self.test_space = ConfigurationSpace( - name="myspace", - space=str_space - ) + self.test_space = ConfigurationSpace(name="myspace", space=str_space) def test_config_spaces(self): try: From ce69cc46bca058000c7602dd5f3141a9a92dcec5 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Tue, 9 Jan 2024 15:43:59 +0100 Subject: [PATCH 33/44] tests running --- Makefile | 2 +- carl/envs/brax/brax_walker_goal_wrapper.py | 77 ++++---- carl/envs/brax/carl_brax_env.py | 30 +++- carl/envs/brax/carl_halfcheetah.py | 2 +- carl/envs/brax/carl_hopper.py | 2 +- carl/envs/brax/carl_humanoid.py | 2 +- carl/envs/brax/carl_walker2d.py | 2 +- examples/brax_with_goals.ipynb | 43 ++--- test/test_language_goals.py | 195 ++++++++++++--------- 9 files changed, 202 insertions(+), 153 deletions(-) diff --git a/Makefile b/Makefile index 995451d5..0cc36ae6 100644 --- a/Makefile +++ b/Makefile @@ -71,7 +71,7 @@ format-isort: format: format-black format-isort test: - $(PYTEST) test + $(PYTEST) --disable-warnings test clean-doc: $(MAKE) -C ${DOCDIR} clean diff --git a/carl/envs/brax/brax_walker_goal_wrapper.py b/carl/envs/brax/brax_walker_goal_wrapper.py index 7a50de60..6158b373 100644 --- a/carl/envs/brax/brax_walker_goal_wrapper.py +++ b/carl/envs/brax/brax_walker_goal_wrapper.py @@ -1,12 +1,14 @@ import gym import numpy as np +from brax.io import mjcf +from etils import epath STATE_INDICES = { - "CARLAnt": [13, 14], - "CARLHumanoid": [22, 23], - "CARLHalfcheetah": [14, 15], - "CARLHopper": [5, 6], - "CARLWalker2d": [8, 9], + "ant": [13, 14], + "humanoid": [22, 23], + "halfcheetah": [14, 15], + "hopper": [5, 6], + "walker2d": [8, 9], } DIRECTION_NAMES = { @@ -51,15 +53,17 @@ class BraxWalkerGoalWrapper(gym.Wrapper): """Adds a positional goal to brax walker envs""" - def __init__(self, env) -> None: + def __init__(self, env, env_name, asset_path) -> None: super().__init__(env) + self.env_name = env_name if ( - self.env.__class__.__name__ == "CARLHumanoid" - or self.env.__class__.__name__ == "CARLHalfcheetah" - or self.env.__class__.__name__ == "CARLHopper" - or self.env.__class__.__name__ == "CARLWalker2d" + self.env_name == "humanoid" + or self.env_name == "halfcheetah" + or self.env_name == "hopper" + or self.env_name == "walker2d" ): self.env._forward_reward_weight = 0 + self.context = None self.position = None self.goal_position = None self.direction_values = { @@ -101,38 +105,39 @@ def __init__(self, env) -> None: ], 212: [np.sin(22.5 * np.pi / 180), np.cos(22.5 * np.pi / 180)], } + path = epath.resource_path("brax") / asset_path + sys = mjcf.load(path) + self.dt = sys.dt - def reset(self, return_info=False): - state, info = self.env.reset(info=True) + def reset(self, seed=None, options={}): + state, info = self.env.reset(seed=seed, options=options) self.position = (0, 0) self.goal_position = ( np.array(self.direction_values[self.context["target_direction"]]) * self.context["target_distance"] ) - if return_info: - info["success"] = 0 - return state, info - else: - return state + info["success"] = 0 + return state, info + def step(self, action): - state, _, done, info = self.env.step(action) - indices = STATE_INDICES[self.env.__class__.__name__] + state, _, te, tr, info = self.env.step(action) + indices = STATE_INDICES[self.env_name] new_position = ( np.array(list(self.position)) + np.array([state[indices[0]], state[indices[1]]]) - * self.env.env.sys.config.dt + * self.dt ) current_distance_to_goal = np.linalg.norm(self.goal_position - new_position) previous_distance_to_goal = np.linalg.norm(self.goal_position - self.position) direction_reward = max(0, previous_distance_to_goal - current_distance_to_goal) self.position = new_position if abs(current_distance_to_goal) <= 5: - done = True + te = True info["success"] = 1 else: info["success"] = 0 - return state, direction_reward, done, info + return state, direction_reward, te, tr, info class BraxLanguageWrapper(gym.Wrapper): @@ -140,21 +145,27 @@ class BraxLanguageWrapper(gym.Wrapper): def __init__(self, env) -> None: super().__init__(env) + self.context = None - def reset(self, return_info=False): - state, info = self.env.reset(info=True) - goal_str = self.get_goal_desc(info["context"]) - extended_state = {"env_state": state, "goal": goal_str} - if return_info: - return extended_state, info + def reset(self, seed=None, options={}): + print(self.context) + self.env.context = self.context + state, info = self.env.reset(seed=seed, options=options) + goal_str = self.get_goal_desc(self.context) + if isinstance(state, dict): + state["goal"] = goal_str else: - return extended_state + state = {"obs": state, "goal": goal_str} + return state, info def step(self, action): - state, reward, done, info = self.env.step(action) - goal_str = self.get_goal_desc(info["context"]) - extended_state = {"env_state": state, "goal": goal_str} - return extended_state, reward, done, info + state, reward, te, tr, info = self.env.step(action) + goal_str = self.get_goal_desc(self.context) + if isinstance(state, dict): + state["goal"] = goal_str + else: + state = {"obs": state, "goal": goal_str} + return state, reward, te, tr, info def get_goal_desc(self, context): if "target_radius" in context.keys(): diff --git a/carl/envs/brax/carl_brax_env.py b/carl/envs/brax/carl_brax_env.py index cba21ec8..f3d2337f 100644 --- a/carl/envs/brax/carl_brax_env.py +++ b/carl/envs/brax/carl_brax_env.py @@ -211,11 +211,11 @@ def __init__( if contexts is not None: if ( - "target_distance" in contexts[contexts.keys()[0]] - or "target_direction" in contexts[contexts.keys()[0]] + "target_distance" in contexts[list(contexts.keys())[0]].keys() + or "target_direction" in contexts[list(contexts.keys())[0]].keys() ): - base_dir = contexts[contexts.keys()[0]]["target_direction"] - base_dist = contexts[contexts.keys()[0]]["target_distance"] + base_dir = contexts[list(contexts.keys())[0]]["target_direction"] + base_dist = contexts[list(contexts.keys())[0]]["target_distance"] max_diff_dir = max( [c["target_direction"] - base_dir for c in contexts.values()] ) @@ -223,9 +223,10 @@ def __init__( [c["target_distance"] - base_dist for c in contexts.values()] ) if max_diff_dir > 0.1 or max_diff_dist > 0.1: - env = BraxWalkerGoalWrapper(env) + env = BraxWalkerGoalWrapper(env, self.env_name, self.asset_path) if use_language_goals: - env = BraxLanguageWrapper(env, contexts) + env = BraxLanguageWrapper(env) + self.use_language_goals = use_language_goals super().__init__( env=env, @@ -236,6 +237,7 @@ def __init__( context_selector_kwargs=context_selector_kwargs, **kwargs, ) + self.env.context = self.context def _update_context(self) -> None: context = self.context @@ -247,6 +249,8 @@ def _update_context(self) -> None: "gravity", "viscosity", "elasticity", + "target_distance", + "target_direction", ] check_context(context, registered_cfs) @@ -275,3 +279,17 @@ def _update_context(self) -> None: sys = sys.replace(geoms=updated_geoms) self.env.unwrapped.sys = sys + + def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[Any, dict[str, Any]]: + """Overwrites reset in super to update context in wrapper.""" + last_context_id = self.context_id + self._progress_instance() + if self.context_id != last_context_id: + self._update_context() + #if self.use_language_goals: + #self.env.env.context = self.context + self.env.context = self.context + state, info = self.env.reset(seed=seed, options=options) + state = self._add_context_to_state(state) + info["context_id"] = self.context_id + return state, info diff --git a/carl/envs/brax/carl_halfcheetah.py b/carl/envs/brax/carl_halfcheetah.py index 171015b1..8780c2a7 100644 --- a/carl/envs/brax/carl_halfcheetah.py +++ b/carl/envs/brax/carl_halfcheetah.py @@ -58,6 +58,6 @@ def get_context_features() -> dict[str, ContextFeature]: "target_distance", lower=0, upper=np.inf, default_value=0 ), "target_direction": CategoricalContextFeature( - "target_direction", choices=directions, default_value=0 + "target_direction", choices=directions, default_value=1 ), } diff --git a/carl/envs/brax/carl_hopper.py b/carl/envs/brax/carl_hopper.py index 138b580a..ff28c33f 100644 --- a/carl/envs/brax/carl_hopper.py +++ b/carl/envs/brax/carl_hopper.py @@ -49,6 +49,6 @@ def get_context_features() -> dict[str, ContextFeature]: "target_distance", lower=0, upper=np.inf, default_value=0 ), "target_direction": CategoricalContextFeature( - "target_direction", choices=directions, default_value=0 + "target_direction", choices=directions, default_value=1 ), } diff --git a/carl/envs/brax/carl_humanoid.py b/carl/envs/brax/carl_humanoid.py index 1918b4bc..4aff5139 100644 --- a/carl/envs/brax/carl_humanoid.py +++ b/carl/envs/brax/carl_humanoid.py @@ -76,6 +76,6 @@ def get_context_features() -> dict[str, ContextFeature]: "target_distance", lower=0, upper=np.inf, default_value=0 ), "target_direction": CategoricalContextFeature( - "target_direction", choices=directions, default_value=0 + "target_direction", choices=directions, default_value=1 ), } diff --git a/carl/envs/brax/carl_walker2d.py b/carl/envs/brax/carl_walker2d.py index 7d4f92ca..36f61380 100644 --- a/carl/envs/brax/carl_walker2d.py +++ b/carl/envs/brax/carl_walker2d.py @@ -58,6 +58,6 @@ def get_context_features() -> dict[str, ContextFeature]: "target_distance", lower=0, upper=np.inf, default_value=0 ), "target_direction": CategoricalContextFeature( - "target_direction", choices=directions, default_value=0 + "target_direction", choices=directions, default_value=1 ), } diff --git a/examples/brax_with_goals.ipynb b/examples/brax_with_goals.ipynb index a2eada23..c3a68c1f 100644 --- a/examples/brax_with_goals.ipynb +++ b/examples/brax_with_goals.ipynb @@ -9,18 +9,22 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/theeimer/Documents/git/CARL/carl/envs/__init__.py:28: UserWarning: Module py4j not found. If you want to use these environments, please follow the installation guide.\n", + "/Users/theeimer/anaconda3/envs/carl/lib/python3.9/site-packages/carl/envs/__init__.py:31: UserWarning: Module py4j not found. If you want to use these environments, please follow the installation guide.\n", " warnings.warn(\n", - "/Users/theeimer/Documents/git/CARL/carl/envs/__init__.py:28: UserWarning: Module distance not found. If you want to use these environments, please follow the installation guide.\n", + "/Users/theeimer/anaconda3/envs/carl/lib/python3.9/site-packages/carl/envs/__init__.py:31: UserWarning: Module distance not found. If you want to use these environments, please follow the installation guide.\n", + " warnings.warn(\n", + "/Users/theeimer/anaconda3/envs/carl/lib/python3.9/site-packages/carl/__init__.py:55: UserWarning: Module py4j not found. If you want to use these environments,\n", + " please follow the installation guide.\n", " warnings.warn(\n" ] } ], "source": [ "import matplotlib.pyplot as plt\n", - "from carl.context.context_space import NormalFloatContextFeature\n", + "from carl.context.context_space import NormalFloatContextFeature, CategoricalContextFeature\n", "from carl.context.sampler import ContextSampler\n", - "from carl.envs import CARLBraxAnt, CARLBraxPusher\n" + "from carl.envs import CARLBraxAnt, CARLBraxPusher\n", + "from carl.envs.brax.brax_walker_goal_wrapper import directions\n" ] }, { @@ -32,21 +36,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{0: {'gravity': -9.8, 'friction': 1.0, 'elasticity': 0.0, 'ang_damping': -0.05, 'mass_torso': 10.0, 'viscosity': 0.0, 'target_distance': 8.82272212012359, 'target_direction': 11.564052345967665}, 1: {'gravity': -9.8, 'friction': 1.0, 'elasticity': 0.0, 'ang_damping': -0.05, 'mass_torso': 10.0, 'viscosity': 0.0, 'target_distance': 10.75008841752559, 'target_direction': 10.200157208367225}, 2: {'gravity': -9.8, 'friction': 1.0, 'elasticity': 0.0, 'ang_damping': -0.05, 'mass_torso': 10.0, 'viscosity': 0.0, 'target_distance': 9.648642791702303, 'target_direction': 10.77873798410574}, 3: {'gravity': -9.8, 'friction': 1.0, 'elasticity': 0.0, 'ang_damping': -0.05, 'mass_torso': 10.0, 'viscosity': 0.0, 'target_distance': 9.696781148206442, 'target_direction': 12.04089319920146}, 4: {'gravity': -9.8, 'friction': 1.0, 'elasticity': 0.0, 'ang_damping': -0.05, 'mass_torso': 10.0, 'viscosity': 0.0, 'target_distance': 10.210598501938373, 'target_direction': 11.667557990149968}}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/theeimer/Documents/git/CARL/carl/envs/brax/carl_ant.py:26: RuntimeWarning: invalid value encountered in scalar divide\n", - " \"ang_damping\": UniformFloatContextFeature(\n" + "{0: {'gravity': -9.8, 'friction': 1.0, 'elasticity': 0.0, 'ang_damping': -0.05, 'mass_torso': 10.0, 'viscosity': 0.0, 'target_direction': 112, 'target_distance': 8.957275946170714}, 1: {'gravity': -9.8, 'friction': 1.0, 'elasticity': 0.0, 'ang_damping': -0.05, 'mass_torso': 10.0, 'viscosity': 0.0, 'target_direction': 334, 'target_distance': 11.769924447869895}, 2: {'gravity': -9.8, 'friction': 1.0, 'elasticity': 0.0, 'ang_damping': -0.05, 'mass_torso': 10.0, 'viscosity': 0.0, 'target_direction': 332, 'target_distance': 11.066118529857778}, 3: {'gravity': -9.8, 'friction': 1.0, 'elasticity': 0.0, 'ang_damping': -0.05, 'mass_torso': 10.0, 'viscosity': 0.0, 'target_direction': 112, 'target_distance': 9.294123460239488}, 4: {'gravity': -9.8, 'friction': 1.0, 'elasticity': 0.0, 'ang_damping': -0.05, 'mass_torso': 10.0, 'viscosity': 0.0, 'target_direction': 14, 'target_distance': 12.345200778471055}}\n" ] } ], "source": [ "seed = 0\n", - "context_distributions = [NormalFloatContextFeature(\"target_distance\", mu=9.8, sigma=1), NormalFloatContextFeature(\"target_direction\", mu=9.8, sigma=1)]\n", + "context_distributions = [NormalFloatContextFeature(\"target_distance\", mu=9.8, sigma=1), CategoricalContextFeature(\"target_direction\", choices=directions)]\n", "context_sampler = ContextSampler(\n", " context_distributions=context_distributions,\n", " context_space=CARLBraxAnt.get_context_space(),\n", @@ -62,19 +58,18 @@ "metadata": {}, "outputs": [ { - "ename": "AttributeError", - "evalue": "'mujoco._structs.MjOption' object has no attribute 'collision'", + "ename": "KeyError", + "evalue": "'target_direction'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/Users/theeimer/Documents/git/CARL/examples/brax_with_goals.ipynb Cell 3\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m env \u001b[39m=\u001b[39m CARLBraxAnt(contexts\u001b[39m=\u001b[39;49mcontexts, use_language_goals\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n\u001b[1;32m 2\u001b[0m env\u001b[39m.\u001b[39mreset()\n\u001b[1;32m 3\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mCurrent context ID: \u001b[39m\u001b[39m{\u001b[39;00menv\u001b[39m.\u001b[39mcontext_id\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n", - "File \u001b[0;32m~/Documents/git/CARL/carl/envs/brax/carl_brax_env.py:193\u001b[0m, in \u001b[0;36mCARLBraxEnv.__init__\u001b[0;34m(self, env, batch_size, contexts, obs_context_features, obs_context_as_dict, context_selector, context_selector_kwargs, use_language_goals, **kwargs)\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[39mif\u001b[39;00m env \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 192\u001b[0m bs \u001b[39m=\u001b[39m batch_size \u001b[39mif\u001b[39;00m batch_size \u001b[39m!=\u001b[39m \u001b[39m1\u001b[39m \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m--> 193\u001b[0m env \u001b[39m=\u001b[39m brax\u001b[39m.\u001b[39;49menvs\u001b[39m.\u001b[39;49mcreate(\n\u001b[1;32m 194\u001b[0m env_name\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49menv_name, backend\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mbackend, batch_size\u001b[39m=\u001b[39;49mbs\n\u001b[1;32m 195\u001b[0m )\n\u001b[1;32m 196\u001b[0m \u001b[39m# Brax uses gym instead of gymnasium\u001b[39;00m\n\u001b[1;32m 197\u001b[0m \u001b[39mif\u001b[39;00m batch_size \u001b[39m==\u001b[39m \u001b[39m1\u001b[39m:\n", - "File \u001b[0;32m~/anaconda3/envs/carl/lib/python3.9/site-packages/brax/envs/__init__.py:95\u001b[0m, in \u001b[0;36mcreate\u001b[0;34m(env_name, episode_length, action_repeat, auto_reset, batch_size, **kwargs)\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcreate\u001b[39m(\n\u001b[1;32m 75\u001b[0m env_name: \u001b[39mstr\u001b[39m,\n\u001b[1;32m 76\u001b[0m episode_length: \u001b[39mint\u001b[39m \u001b[39m=\u001b[39m \u001b[39m1000\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs,\n\u001b[1;32m 81\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Env:\n\u001b[1;32m 82\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Creates an environment from the registry.\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \n\u001b[1;32m 84\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[39m env: an environment\u001b[39;00m\n\u001b[1;32m 94\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 95\u001b[0m env \u001b[39m=\u001b[39m _envs[env_name](\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 97\u001b[0m \u001b[39mif\u001b[39;00m episode_length \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 98\u001b[0m env \u001b[39m=\u001b[39m training\u001b[39m.\u001b[39mEpisodeWrapper(env, episode_length, action_repeat)\n", - "File \u001b[0;32m~/anaconda3/envs/carl/lib/python3.9/site-packages/brax/envs/ant.py:161\u001b[0m, in \u001b[0;36mAnt.__init__\u001b[0;34m(self, ctrl_cost_weight, use_contact_forces, contact_cost_weight, healthy_reward, terminate_when_unhealthy, healthy_z_range, contact_force_range, reset_noise_scale, exclude_current_positions_from_observation, backend, **kwargs)\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\n\u001b[1;32m 147\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 148\u001b[0m ctrl_cost_weight\u001b[39m=\u001b[39m\u001b[39m0.5\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs,\n\u001b[1;32m 159\u001b[0m ):\n\u001b[1;32m 160\u001b[0m path \u001b[39m=\u001b[39m epath\u001b[39m.\u001b[39mresource_path(\u001b[39m'\u001b[39m\u001b[39mbrax\u001b[39m\u001b[39m'\u001b[39m) \u001b[39m/\u001b[39m \u001b[39m'\u001b[39m\u001b[39menvs/assets/ant.xml\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m--> 161\u001b[0m sys \u001b[39m=\u001b[39m mjcf\u001b[39m.\u001b[39;49mload(path)\n\u001b[1;32m 163\u001b[0m n_frames \u001b[39m=\u001b[39m \u001b[39m5\u001b[39m\n\u001b[1;32m 165\u001b[0m \u001b[39mif\u001b[39;00m backend \u001b[39min\u001b[39;00m [\u001b[39m'\u001b[39m\u001b[39mspring\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mpositional\u001b[39m\u001b[39m'\u001b[39m]:\n", - "File \u001b[0;32m~/anaconda3/envs/carl/lib/python3.9/site-packages/brax/io/mjcf.py:520\u001b[0m, in \u001b[0;36mload\u001b[0;34m(path)\u001b[0m\n\u001b[1;32m 517\u001b[0m xml \u001b[39m=\u001b[39m ElementTree\u001b[39m.\u001b[39mtostring(elem, encoding\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39municode\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 518\u001b[0m mj \u001b[39m=\u001b[39m mujoco\u001b[39m.\u001b[39mMjModel\u001b[39m.\u001b[39mfrom_xml_string(xml, assets\u001b[39m=\u001b[39massets)\n\u001b[0;32m--> 520\u001b[0m \u001b[39mreturn\u001b[39;00m load_model(mj)\n", - "File \u001b[0;32m~/anaconda3/envs/carl/lib/python3.9/site-packages/brax/io/mjcf.py:254\u001b[0m, in \u001b[0;36mload_model\u001b[0;34m(mj)\u001b[0m\n\u001b[1;32m 252\u001b[0m \u001b[39mif\u001b[39;00m (mj\u001b[39m.\u001b[39mgeom_priority[\u001b[39m0\u001b[39m] \u001b[39m!=\u001b[39m mj\u001b[39m.\u001b[39mgeom_priority)\u001b[39m.\u001b[39many():\n\u001b[1;32m 253\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mNotImplementedError\u001b[39;00m(\u001b[39m'\u001b[39m\u001b[39mgeom_priority parameter not supported.\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m--> 254\u001b[0m \u001b[39mif\u001b[39;00m mj\u001b[39m.\u001b[39;49mopt\u001b[39m.\u001b[39;49mcollision \u001b[39m==\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m 255\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mNotImplementedError\u001b[39;00m(\u001b[39m'\u001b[39m\u001b[39mPredefined collisions not supported.\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 256\u001b[0m q_width \u001b[39m=\u001b[39m {\u001b[39m0\u001b[39m: \u001b[39m7\u001b[39m, \u001b[39m1\u001b[39m: \u001b[39m4\u001b[39m, \u001b[39m2\u001b[39m: \u001b[39m1\u001b[39m, \u001b[39m3\u001b[39m: \u001b[39m1\u001b[39m}\n", - "\u001b[0;31mAttributeError\u001b[0m: 'mujoco._structs.MjOption' object has no attribute 'collision'" + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/Users/theeimer/Documents/git/CARL/examples/brax_with_goals.ipynb Cell 3\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m env \u001b[39m=\u001b[39m CARLBraxAnt(contexts\u001b[39m=\u001b[39;49mcontexts, use_language_goals\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n\u001b[1;32m 2\u001b[0m env\u001b[39m.\u001b[39mreset()\n\u001b[1;32m 3\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mCurrent context ID: \u001b[39m\u001b[39m{\u001b[39;00menv\u001b[39m.\u001b[39mcontext_id\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n", + "File \u001b[0;32m~/anaconda3/envs/carl/lib/python3.9/site-packages/carl/envs/brax/carl_brax_env.py:207\u001b[0m, in \u001b[0;36mCARLBraxEnv.__init__\u001b[0;34m(self, env, batch_size, contexts, obs_context_features, obs_context_as_dict, context_selector, context_selector_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m 200\u001b[0m \u001b[39m# The observation space also needs to from gymnasium\u001b[39;00m\n\u001b[1;32m 201\u001b[0m env\u001b[39m.\u001b[39mobservation_space \u001b[39m=\u001b[39m gymnasium\u001b[39m.\u001b[39mspaces\u001b[39m.\u001b[39mBox(\n\u001b[1;32m 202\u001b[0m low\u001b[39m=\u001b[39menv\u001b[39m.\u001b[39mobservation_space\u001b[39m.\u001b[39mlow,\n\u001b[1;32m 203\u001b[0m high\u001b[39m=\u001b[39menv\u001b[39m.\u001b[39mobservation_space\u001b[39m.\u001b[39mhigh,\n\u001b[1;32m 204\u001b[0m dtype\u001b[39m=\u001b[39mnp\u001b[39m.\u001b[39mfloat32,\n\u001b[1;32m 205\u001b[0m )\n\u001b[0;32m--> 207\u001b[0m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49m\u001b[39m__init__\u001b[39;49m(\n\u001b[1;32m 208\u001b[0m env\u001b[39m=\u001b[39;49menv,\n\u001b[1;32m 209\u001b[0m contexts\u001b[39m=\u001b[39;49mcontexts,\n\u001b[1;32m 210\u001b[0m obs_context_features\u001b[39m=\u001b[39;49mobs_context_features,\n\u001b[1;32m 211\u001b[0m obs_context_as_dict\u001b[39m=\u001b[39;49mobs_context_as_dict,\n\u001b[1;32m 212\u001b[0m context_selector\u001b[39m=\u001b[39;49mcontext_selector,\n\u001b[1;32m 213\u001b[0m context_selector_kwargs\u001b[39m=\u001b[39;49mcontext_selector_kwargs,\n\u001b[1;32m 214\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs,\n\u001b[1;32m 215\u001b[0m )\n", + "File \u001b[0;32m~/anaconda3/envs/carl/lib/python3.9/site-packages/carl/envs/carl_env.py:110\u001b[0m, in \u001b[0;36mCARLEnv.__init__\u001b[0;34m(self, env, contexts, obs_context_features, obs_context_as_dict, context_selector, context_selector_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m 104\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 105\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 106\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mContext selector must be None or an AbstractSelector class or instance. \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 107\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mGot type \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mtype\u001b[39m(context_selector)\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 108\u001b[0m )\n\u001b[0;32m--> 110\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobservation_space: gymnasium\u001b[39m.\u001b[39mspaces\u001b[39m.\u001b[39mDict \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mget_observation_space(\n\u001b[1;32m 111\u001b[0m obs_context_feature_names\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mobs_context_features\n\u001b[1;32m 112\u001b[0m )\n", + "File \u001b[0;32m~/anaconda3/envs/carl/lib/python3.9/site-packages/carl/envs/carl_env.py:177\u001b[0m, in \u001b[0;36mCARLEnv.get_observation_space\u001b[0;34m(self, obs_context_feature_names)\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Get the observation space for the context.\u001b[39;00m\n\u001b[1;32m 163\u001b[0m \n\u001b[1;32m 164\u001b[0m \u001b[39mParameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[39m underlying environment (\"state\") and for the context (\"context\").\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 176\u001b[0m context_space \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_context_space()\n\u001b[0;32m--> 177\u001b[0m obs_space_context \u001b[39m=\u001b[39m context_space\u001b[39m.\u001b[39;49mto_gymnasium_space(\n\u001b[1;32m 178\u001b[0m context_feature_names\u001b[39m=\u001b[39;49mobs_context_feature_names,\n\u001b[1;32m 179\u001b[0m as_dict\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mobs_context_as_dict,\n\u001b[1;32m 180\u001b[0m )\n\u001b[1;32m 182\u001b[0m obs_space \u001b[39m=\u001b[39m spaces\u001b[39m.\u001b[39mDict(\n\u001b[1;32m 183\u001b[0m {\n\u001b[1;32m 184\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mobs\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mbase_observation_space,\n\u001b[1;32m 185\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mcontext\u001b[39m\u001b[39m\"\u001b[39m: obs_space_context,\n\u001b[1;32m 186\u001b[0m }\n\u001b[1;32m 187\u001b[0m )\n\u001b[1;32m 188\u001b[0m \u001b[39mreturn\u001b[39;00m obs_space\n", + "File \u001b[0;32m~/anaconda3/envs/carl/lib/python3.9/site-packages/carl/context/context_space.py:170\u001b[0m, in \u001b[0;36mContextSpace.to_gymnasium_space\u001b[0;34m(self, context_feature_names, as_dict)\u001b[0m\n\u001b[1;32m 167\u001b[0m context_space \u001b[39m=\u001b[39m {}\n\u001b[1;32m 169\u001b[0m \u001b[39mfor\u001b[39;00m cf_name \u001b[39min\u001b[39;00m context_feature_names:\n\u001b[0;32m--> 170\u001b[0m context_feature \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mcontext_space[cf_name]\n\u001b[1;32m 171\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(context_feature, NumericalContextFeature):\n\u001b[1;32m 172\u001b[0m context_space[context_feature\u001b[39m.\u001b[39mname] \u001b[39m=\u001b[39m spaces\u001b[39m.\u001b[39mBox(\n\u001b[1;32m 173\u001b[0m low\u001b[39m=\u001b[39mcontext_feature\u001b[39m.\u001b[39mlower, high\u001b[39m=\u001b[39mcontext_feature\u001b[39m.\u001b[39mupper\n\u001b[1;32m 174\u001b[0m )\n", + "\u001b[0;31mKeyError\u001b[0m: 'target_direction'" ] } ], diff --git a/test/test_language_goals.py b/test/test_language_goals.py index c6afb379..af0504c1 100644 --- a/test/test_language_goals.py +++ b/test/test_language_goals.py @@ -1,13 +1,9 @@ import unittest -from carl.envs.brax import ( - BraxLanguageWrapper, - BraxWalkerGoalWrapper, - CARLAnt, - CARLFetch, - CARLHalfcheetah, - sample_walker_language_goals, -) +from carl.context.context_space import NormalFloatContextFeature, CategoricalContextFeature +from carl.context.sampler import ContextSampler +from carl.envs import CARLBraxAnt, CARLBraxHalfcheetah +from carl.envs.brax.brax_walker_goal_wrapper import BraxLanguageWrapper, BraxWalkerGoalWrapper DIRECTIONS = [ 1, # north @@ -31,7 +27,13 @@ class TestGoalSampling(unittest.TestCase): def test_uniform_sampling(self): - contexts = sample_walker_language_goals(10, low=4, high=200) + context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_sampler = ContextSampler( + context_distributions=context_distributions, + context_space=CARLBraxAnt.get_context_space(), + seed=0, + ) + contexts = context_sampler.sample_contexts(n_contexts=10) assert len(contexts.keys()) == 10 assert "target_distance" in contexts[0].keys() assert "target_direction" in contexts[0].keys() @@ -40,7 +42,13 @@ def test_uniform_sampling(self): assert all([contexts[i]["target_distance"] >= 4 for i in range(10)]) def test_normal_sampling(self): - contexts = sample_walker_language_goals(10, normal=True, low=4, high=200) + context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_sampler = ContextSampler( + context_distributions=context_distributions, + context_space=CARLBraxAnt.get_context_space(), + seed=0, + ) + contexts = context_sampler.sample_contexts(n_contexts=10) assert len(contexts.keys()) == 10 assert "target_distance" in contexts[0].keys() assert "target_direction" in contexts[0].keys() @@ -51,108 +59,125 @@ def test_normal_sampling(self): class TestGoalWrapper(unittest.TestCase): def test_reset(self): - contexts = sample_walker_language_goals(10, low=4, high=200) - env = CARLAnt(contexts=contexts) - wrapped_env = BraxWalkerGoalWrapper(env) - - assert wrapped_env.position is None - state = wrapped_env.reset() - assert state is not None - assert wrapped_env.position is not None - - state, info = wrapped_env.reset(return_info=True) + context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_sampler = ContextSampler( + context_distributions=context_distributions, + context_space=CARLBraxAnt.get_context_space(), + seed=0, + ) + contexts = context_sampler.sample_contexts(n_contexts=10) + env = CARLBraxAnt(contexts=contexts) + + assert isinstance(env.env, BraxWalkerGoalWrapper) + assert env.position is None + + state, info = env.reset() assert state is not None assert info is not None + assert env.position is not None - env = CARLHalfcheetah(contexts=contexts) - wrapped_env = BraxWalkerGoalWrapper(env) + context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_sampler = ContextSampler( + context_distributions=context_distributions, + context_space=CARLBraxHalfcheetah.get_context_space(), + seed=0, + ) + contexts = context_sampler.sample_contexts(n_contexts=10) + env = CARLBraxHalfcheetah(contexts=contexts, use_language_goals=True) - assert wrapped_env.position is None - state = wrapped_env.reset() - assert state is not None - assert wrapped_env.position is not None + assert isinstance(env.env, BraxLanguageWrapper) + assert env.position is None - state, info = wrapped_env.reset(return_info=True) + state, info = env.reset() assert state is not None assert info is not None + assert env.position is not None def test_reward_scale(self): - contexts = sample_walker_language_goals(10, low=4, high=200) - env = CARLAnt(contexts=contexts) - wrapped_env = BraxWalkerGoalWrapper(env) + context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_sampler = ContextSampler( + context_distributions=context_distributions, + context_space=CARLBraxAnt.get_context_space(), + seed=0, + ) + contexts = context_sampler.sample_contexts(n_contexts=10) + env = CARLBraxAnt(contexts=contexts) for _ in range(10): - wrapped_env.reset() + env.reset() for _ in range(10): - action = wrapped_env.action_space.sample() - _, wrapped_reward, _, _ = wrapped_env.step(action) + action = env.action_space.sample() + _, wrapped_reward, _, _, _ = env.step(action) assert wrapped_reward >= 0 - contexts = sample_walker_language_goals(10, low=4, high=200) - env = CARLHalfcheetah(contexts=contexts) - wrapped_env = BraxWalkerGoalWrapper(env) + context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_sampler = ContextSampler( + context_distributions=context_distributions, + context_space=CARLBraxHalfcheetah.get_context_space(), + seed=0, + ) + contexts = context_sampler.sample_contexts(n_contexts=10) + env = CARLBraxHalfcheetah(contexts=contexts) for _ in range(10): - wrapped_env.reset() + env.reset() for _ in range(10): - action = wrapped_env.action_space.sample() - _, wrapped_reward, _, _ = wrapped_env.step(action) + action = env.action_space.sample() + _, wrapped_reward, _, _, _ = env.step(action) assert wrapped_reward >= 0 class TestLanguageWrapper(unittest.TestCase): def test_reset(self) -> None: - env = CARLFetch() - wrapped_env = BraxLanguageWrapper(env) - state = wrapped_env.reset() + context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_sampler = ContextSampler( + context_distributions=context_distributions, + context_space=CARLBraxAnt.get_context_space(), + seed=0, + ) + contexts = context_sampler.sample_contexts(n_contexts=10) + env = CARLBraxAnt(contexts=contexts, use_language_goals=True) + state, info = env.reset() assert type(state) is dict - assert "env_state" in state.keys() - assert "goal" in state.keys() - assert type(state["goal"]) is str - assert str(wrapped_env.context["target_distance"]) in state["goal"] - assert str(wrapped_env.context["target_radius"]) in state["goal"] - state, info = wrapped_env.reset(return_info=True) + assert "obs" in state.keys() + assert "goal" in state["obs"].keys() + assert type(state["obs"]["goal"]) is str + assert str(env.context["target_distance"]) in state["obs"]["goal"] + assert "north north east" in state["obs"]["goal"] assert info is not None - assert type(state) is dict - - contexts = sample_walker_language_goals(10, low=4, high=200) - env = CARLAnt(contexts=contexts) - wrapped_env = BraxLanguageWrapper(env) - state = wrapped_env.reset() - assert type(state) is dict - assert "env_state" in state.keys() - assert "goal" in state.keys() - assert type(state["goal"]) is str - assert str(wrapped_env.context["target_distance"]) in state["goal"] - assert str(wrapped_env.context["target_direction"]) in state["goal"] - state, info = wrapped_env.reset(return_info=True) - assert info is not None - assert type(state) is dict def test_step(self): - contexts = sample_walker_language_goals(10, low=4, high=200) - env = CARLFetch(contexts=contexts) - wrapped_env = BraxLanguageWrapper(env) - wrapped_env.reset() + context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_sampler = ContextSampler( + context_distributions=context_distributions, + context_space=CARLBraxHalfcheetah.get_context_space(), + seed=0, + ) + contexts = context_sampler.sample_contexts(n_contexts=10) + env = CARLBraxHalfcheetah(contexts=contexts, use_language_goals=True) + env.reset() for _ in range(10): - action = wrapped_env.action_space.sample() - state, _, _, _ = wrapped_env.step(action) + action = env.action_space.sample() + state, _, _, _, _ = env.step(action) assert type(state) is dict - assert "env_state" in state.keys() - assert "goal" in state.keys() - assert type(state["goal"]) is str - assert str(wrapped_env.context["target_distance"]) in state["goal"] - assert str(wrapped_env.context["target_radius"]) in state["goal"] - - env = CARLAnt(contexts=contexts) - wrapped_env = BraxLanguageWrapper(env) - wrapped_env.reset() + assert "obs" in state.keys() + assert "goal" in state["obs"].keys() + assert type(state["obs"]["goal"]) is str + assert "north north east" in state["obs"]["goal"] + assert str(env.context["target_distance"]) in state["obs"]["goal"] + + context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_sampler = ContextSampler( + context_distributions=context_distributions, + context_space=CARLBraxAnt.get_context_space(), + seed=0, + ) + contexts = context_sampler.sample_contexts(n_contexts=10) + env = CARLBraxAnt(contexts=contexts) + env.reset() for _ in range(10): - action = wrapped_env.action_space.sample() - state, _, _, _ = wrapped_env.step(action) + action = env.action_space.sample() + state, _, _, _, _ = env.step(action) assert type(state) is dict - assert "env_state" in state.keys() - assert "goal" in state.keys() - assert type(state["goal"]) is str - assert str(wrapped_env.context["target_distance"]) in state["goal"] - assert str(wrapped_env.context["target_direction"]) in state["goal"] + assert "obs" in state.keys() + assert "goal" not in state.keys() From cb522b45fbacaa119f4cebc0c62a957c55599121 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Tue, 9 Jan 2024 15:53:12 +0100 Subject: [PATCH 34/44] formatting --- carl/envs/brax/brax_walker_goal_wrapper.py | 4 +- carl/envs/brax/carl_brax_env.py | 8 ++-- test/test_language_goals.py | 55 +++++++++++++++++----- 3 files changed, 50 insertions(+), 17 deletions(-) diff --git a/carl/envs/brax/brax_walker_goal_wrapper.py b/carl/envs/brax/brax_walker_goal_wrapper.py index 6158b373..ee912c5f 100644 --- a/carl/envs/brax/brax_walker_goal_wrapper.py +++ b/carl/envs/brax/brax_walker_goal_wrapper.py @@ -119,14 +119,12 @@ def reset(self, seed=None, options={}): info["success"] = 0 return state, info - def step(self, action): state, _, te, tr, info = self.env.step(action) indices = STATE_INDICES[self.env_name] new_position = ( np.array(list(self.position)) - + np.array([state[indices[0]], state[indices[1]]]) - * self.dt + + np.array([state[indices[0]], state[indices[1]]]) * self.dt ) current_distance_to_goal = np.linalg.norm(self.goal_position - new_position) previous_distance_to_goal = np.linalg.norm(self.goal_position - self.position) diff --git a/carl/envs/brax/carl_brax_env.py b/carl/envs/brax/carl_brax_env.py index f3d2337f..8d970a5f 100644 --- a/carl/envs/brax/carl_brax_env.py +++ b/carl/envs/brax/carl_brax_env.py @@ -280,14 +280,16 @@ def _update_context(self) -> None: self.env.unwrapped.sys = sys - def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[Any, dict[str, Any]]: + def reset( + self, *, seed: int | None = None, options: dict[str, Any] | None = None + ) -> tuple[Any, dict[str, Any]]: """Overwrites reset in super to update context in wrapper.""" last_context_id = self.context_id self._progress_instance() if self.context_id != last_context_id: self._update_context() - #if self.use_language_goals: - #self.env.env.context = self.context + # if self.use_language_goals: + # self.env.env.context = self.context self.env.context = self.context state, info = self.env.reset(seed=seed, options=options) state = self._add_context_to_state(state) diff --git a/test/test_language_goals.py b/test/test_language_goals.py index af0504c1..d22a2382 100644 --- a/test/test_language_goals.py +++ b/test/test_language_goals.py @@ -1,9 +1,15 @@ import unittest -from carl.context.context_space import NormalFloatContextFeature, CategoricalContextFeature +from carl.context.context_space import ( + CategoricalContextFeature, + NormalFloatContextFeature, +) from carl.context.sampler import ContextSampler from carl.envs import CARLBraxAnt, CARLBraxHalfcheetah -from carl.envs.brax.brax_walker_goal_wrapper import BraxLanguageWrapper, BraxWalkerGoalWrapper +from carl.envs.brax.brax_walker_goal_wrapper import ( + BraxLanguageWrapper, + BraxWalkerGoalWrapper, +) DIRECTIONS = [ 1, # north @@ -27,7 +33,10 @@ class TestGoalSampling(unittest.TestCase): def test_uniform_sampling(self): - context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_distributions = [ + NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), + CategoricalContextFeature("target_direction", choices=DIRECTIONS), + ] context_sampler = ContextSampler( context_distributions=context_distributions, context_space=CARLBraxAnt.get_context_space(), @@ -42,7 +51,10 @@ def test_uniform_sampling(self): assert all([contexts[i]["target_distance"] >= 4 for i in range(10)]) def test_normal_sampling(self): - context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_distributions = [ + NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), + CategoricalContextFeature("target_direction", choices=DIRECTIONS), + ] context_sampler = ContextSampler( context_distributions=context_distributions, context_space=CARLBraxAnt.get_context_space(), @@ -59,7 +71,10 @@ def test_normal_sampling(self): class TestGoalWrapper(unittest.TestCase): def test_reset(self): - context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_distributions = [ + NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), + CategoricalContextFeature("target_direction", choices=DIRECTIONS), + ] context_sampler = ContextSampler( context_distributions=context_distributions, context_space=CARLBraxAnt.get_context_space(), @@ -76,7 +91,10 @@ def test_reset(self): assert info is not None assert env.position is not None - context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_distributions = [ + NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), + CategoricalContextFeature("target_direction", choices=DIRECTIONS), + ] context_sampler = ContextSampler( context_distributions=context_distributions, context_space=CARLBraxHalfcheetah.get_context_space(), @@ -94,7 +112,10 @@ def test_reset(self): assert env.position is not None def test_reward_scale(self): - context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_distributions = [ + NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), + CategoricalContextFeature("target_direction", choices=DIRECTIONS), + ] context_sampler = ContextSampler( context_distributions=context_distributions, context_space=CARLBraxAnt.get_context_space(), @@ -110,7 +131,10 @@ def test_reward_scale(self): _, wrapped_reward, _, _, _ = env.step(action) assert wrapped_reward >= 0 - context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_distributions = [ + NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), + CategoricalContextFeature("target_direction", choices=DIRECTIONS), + ] context_sampler = ContextSampler( context_distributions=context_distributions, context_space=CARLBraxHalfcheetah.get_context_space(), @@ -129,7 +153,10 @@ def test_reward_scale(self): class TestLanguageWrapper(unittest.TestCase): def test_reset(self) -> None: - context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_distributions = [ + NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), + CategoricalContextFeature("target_direction", choices=DIRECTIONS), + ] context_sampler = ContextSampler( context_distributions=context_distributions, context_space=CARLBraxAnt.get_context_space(), @@ -147,7 +174,10 @@ def test_reset(self) -> None: assert info is not None def test_step(self): - context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_distributions = [ + NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), + CategoricalContextFeature("target_direction", choices=DIRECTIONS), + ] context_sampler = ContextSampler( context_distributions=context_distributions, context_space=CARLBraxHalfcheetah.get_context_space(), @@ -166,7 +196,10 @@ def test_step(self): assert "north north east" in state["obs"]["goal"] assert str(env.context["target_distance"]) in state["obs"]["goal"] - context_distributions = [NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), CategoricalContextFeature("target_direction", choices=DIRECTIONS)] + context_distributions = [ + NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), + CategoricalContextFeature("target_direction", choices=DIRECTIONS), + ] context_sampler = ContextSampler( context_distributions=context_distributions, context_space=CARLBraxAnt.get_context_space(), From afbd479a155b0bf6be21c38fd894202593d624ff Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Tue, 9 Jan 2024 16:39:22 +0100 Subject: [PATCH 35/44] fix: gravity limits inverted --- Makefile | 3 +++ carl/envs/__init__.py | 35 ++++++++++++++++++++++------ carl/envs/dmc/carl_dm_finger.py | 2 +- carl/envs/dmc/carl_dm_fish.py | 2 +- carl/envs/dmc/carl_dm_quadruped.py | 2 +- carl/envs/dmc/carl_dm_walker.py | 2 +- carl/envs/dmc/dmc_tasks/utils.py | 6 ++++- carl/envs/gymnasium/__init__.py | 37 ++++++++++++++++++++++++++++++ 8 files changed, 77 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 6c5fce1c..65613bfb 100644 --- a/Makefile +++ b/Makefile @@ -40,6 +40,9 @@ install-dev: $(PIP) install -e ".[dev, docs]" pre-commit install +install: + $(PIP) install -e . + check-black: $(BLACK) carl test --check || : diff --git a/carl/envs/__init__.py b/carl/envs/__init__.py index b03f906c..0578a7a5 100644 --- a/carl/envs/__init__.py +++ b/carl/envs/__init__.py @@ -4,10 +4,15 @@ import warnings # Classic control is in gym and thus necessary for the base version to run -from carl import envs from carl.envs.gymnasium import * -__all__ = envs.gymnasium.__all__ +__all__ = [ + "CARLAcrobot", + "CARLCartPole", + "CARLMountainCar", + "CARLMountainCarContinuous", + "CARLPendulum", +] def check_spec(spec_name: str) -> bool: @@ -39,28 +44,44 @@ def check_spec(spec_name: str) -> bool: if found: from carl.envs.gymnasium.box2d import * - __all__ += envs.gymnasium.box2d.__all__ + __all__ += ["CARLBipedalWalker", "CARLLunarLander", "CARLVehicleRacing"] found = check_spec("brax") if found: from carl.envs.brax import * - __all__ += envs.brax.__all__ + __all__ += [ + "CARLBraxAnt", + "CARLBraxHalfcheetah", + "CARLBraxHopper", + "CARLBraxHumanoid", + "CARLBraxHumanoidStandup", + "CARLBraxInvertedDoublePendulum", + "CARLBraxInvertedPendulum", + "CARLBraxPusher", + "CARLBraxReacher", + "CARLBraxWalker2d", + ] found = check_spec("py4j") if found: from carl.envs.mario import * - __all__ += envs.mario.__all__ + __all__ += ["CARLMarioEnv"] found = check_spec("dm_control") if found: from carl.envs.dmc import * - __all__ += envs.dmc.__all__ + __all__ += [ + "CARLDmcFingerEnv", + "CARLDmcFishEnv", + "CARLDmcQuadrupedEnv", + "CARLDmcWalkerEnv", + ] found = check_spec("distance") if found: from carl.envs.rna import * - __all__ += envs.rna.__all__ + __all__ += ["CARLRnaDesignEnv"] diff --git a/carl/envs/dmc/carl_dm_finger.py b/carl/envs/dmc/carl_dm_finger.py index 88be38e2..ce1ef575 100644 --- a/carl/envs/dmc/carl_dm_finger.py +++ b/carl/envs/dmc/carl_dm_finger.py @@ -13,7 +13,7 @@ class CARLDmcFingerEnv(CARLDmcEnv): def get_context_features() -> dict[str, ContextFeature]: return { "gravity": UniformFloatContextFeature( - "gravity", lower=-np.inf, upper=-0.1, default_value=-9.81 + "gravity", lower=0.1, upper=np.inf, default_value=9.81 ), "friction_torsional": UniformFloatContextFeature( "friction_torsional", lower=0, upper=np.inf, default_value=1.0 diff --git a/carl/envs/dmc/carl_dm_fish.py b/carl/envs/dmc/carl_dm_fish.py index 31e077c6..669c88dc 100644 --- a/carl/envs/dmc/carl_dm_fish.py +++ b/carl/envs/dmc/carl_dm_fish.py @@ -13,7 +13,7 @@ class CARLDmcFishEnv(CARLDmcEnv): def get_context_features() -> dict[str, ContextFeature]: return { "gravity": UniformFloatContextFeature( - "gravity", lower=-np.inf, upper=-0.1, default_value=-9.81 + "gravity", lower=0.1, upper=np.inf, default_value=9.81 ), "friction_torsional": UniformFloatContextFeature( "friction_torsional", lower=0, upper=np.inf, default_value=1.0 diff --git a/carl/envs/dmc/carl_dm_quadruped.py b/carl/envs/dmc/carl_dm_quadruped.py index 3e00d5f8..697750d0 100644 --- a/carl/envs/dmc/carl_dm_quadruped.py +++ b/carl/envs/dmc/carl_dm_quadruped.py @@ -13,7 +13,7 @@ class CARLDmcQuadrupedEnv(CARLDmcEnv): def get_context_features() -> dict[str, ContextFeature]: return { "gravity": UniformFloatContextFeature( - "gravity", lower=-np.inf, upper=-0.1, default_value=-9.81 + "gravity", lower=0.1, upper=np.inf, default_value=9.81 ), "friction_torsional": UniformFloatContextFeature( "friction_torsional", lower=0, upper=np.inf, default_value=1.0 diff --git a/carl/envs/dmc/carl_dm_walker.py b/carl/envs/dmc/carl_dm_walker.py index b16cdd70..9e88e051 100644 --- a/carl/envs/dmc/carl_dm_walker.py +++ b/carl/envs/dmc/carl_dm_walker.py @@ -13,7 +13,7 @@ class CARLDmcWalkerEnv(CARLDmcEnv): def get_context_features() -> dict[str, ContextFeature]: return { "gravity": UniformFloatContextFeature( - "gravity", lower=-np.inf, upper=-0.1, default_value=-9.81 + "gravity", lower=0.1, upper=np.inf, default_value=9.81 ), "friction_torsional": UniformFloatContextFeature( "friction_torsional", lower=0, upper=np.inf, default_value=1.0 diff --git a/carl/envs/dmc/dmc_tasks/utils.py b/carl/envs/dmc/dmc_tasks/utils.py index 7482dfbe..08abc366 100644 --- a/carl/envs/dmc/dmc_tasks/utils.py +++ b/carl/envs/dmc/dmc_tasks/utils.py @@ -122,17 +122,21 @@ def adapt_context(xml_string: bytes, context: Context) -> bytes: # find option settings and override them if they exist, otherwise create new option option = mjcf.find(".//option") + import logging + if option is None: option = etree.Element("option") mjcf.append(option) if "gravity" in context: gravity = option.get("gravity") + logging.info(gravity) if gravity is not None: g = gravity.split(" ") gravity = " ".join([g[0], g[1], str(-context["gravity"])]) else: - gravity = " ".join(["0", "0", str(-context["gravity"])]) + gravity = " ".join(["0", "0", f"{str(-context['gravity'])}"]) + logging.info(gravity) option.set("gravity", gravity) if "wind_x" in context and "wind_y" in context and "wind_z" in context: diff --git a/carl/envs/gymnasium/__init__.py b/carl/envs/gymnasium/__init__.py index 62ba5092..7df661e1 100644 --- a/carl/envs/gymnasium/__init__.py +++ b/carl/envs/gymnasium/__init__.py @@ -1,3 +1,8 @@ +# flake8: noqa: F401 +# Modular imports +import importlib.util as iutil +import warnings + from carl.envs.gymnasium.classic_control import ( CARLAcrobot, CARLCartPole, @@ -13,3 +18,35 @@ "CARLMountainCarContinuous", "CARLPendulum", ] + + +def check_spec(spec_name: str) -> bool: + """Check if the spec is installed + + Parameters + ---------- + spec_name : str + Name of package that is necessary for the environment suite. + + Returns + ------- + bool + Whether the spec was found. + """ + spec = iutil.find_spec(spec_name) + found = spec is not None + if not found: + with warnings.catch_warnings(): + warnings.simplefilter("once") + warnings.warn( + f"Module {spec_name} not found. If you want to use these environments, please follow the installation guide." + ) + return found + + +# Environment loading +found = check_spec("Box2D") +if found: + from carl.envs.gymnasium.box2d import * + + __all__ += ["CARLBipedalWalker", "CARLLunarLander", "CARLVehicleRacing"] From 2d6670bbd65119844854514a8e1da2a5a5581800 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Fri, 9 Feb 2024 10:48:36 +0100 Subject: [PATCH 36/44] update pointmass env --- carl/envs/dmc/__init__.py | 2 + carl/envs/dmc/carl_dm_pointmass.py | 172 +++++++++++---------------- carl/envs/dmc/dmc_tasks/pointmass.py | 14 +-- test/test_dmc.py | 130 +++++++++++++++----- 4 files changed, 173 insertions(+), 145 deletions(-) diff --git a/carl/envs/dmc/__init__.py b/carl/envs/dmc/__init__.py index 430665fe..1b3d526a 100644 --- a/carl/envs/dmc/__init__.py +++ b/carl/envs/dmc/__init__.py @@ -2,6 +2,7 @@ # Contexts and bounds by name from carl.envs.dmc.carl_dm_finger import CARLDmcFingerEnv from carl.envs.dmc.carl_dm_fish import CARLDmcFishEnv +from carl.envs.dmc.carl_dm_pointmass import CARLDmcPointMassEnv from carl.envs.dmc.carl_dm_quadruped import CARLDmcQuadrupedEnv from carl.envs.dmc.carl_dm_walker import CARLDmcWalkerEnv @@ -10,4 +11,5 @@ "CARLDmcFishEnv", "CARLDmcQuadrupedEnv", "CARLDmcWalkerEnv", + "CARLDmcPointMassEnv", ] diff --git a/carl/envs/dmc/carl_dm_pointmass.py b/carl/envs/dmc/carl_dm_pointmass.py index b9d64c2c..dc02d434 100644 --- a/carl/envs/dmc/carl_dm_pointmass.py +++ b/carl/envs/dmc/carl_dm_pointmass.py @@ -1,112 +1,74 @@ -from typing import Dict, List, Optional, Union - import numpy as np -from carl.context.selection import AbstractSelector +from carl.context.context_space import ContextFeature, UniformFloatContextFeature from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv -from carl.envs.dmc.dmc_tasks.fish import STEP_LIMIT # type: ignore -from carl.utils.trial_logger import TrialLogger -from carl.utils.types import Context, Contexts - -DEFAULT_CONTEXT = { - "gravity": -9.81, # Gravity is disabled via flag - "friction_tangential": 1, # Scaling factor for tangential friction of all geoms (objects) - "friction_torsional": 1, # Scaling factor for torsional friction of all geoms (objects) - "friction_rolling": 1, # Scaling factor for rolling friction of all geoms (objects) - "timestep": 0.004, # Seconds between updates - "joint_damping": 1.0, # Scaling factor for all joints - "joint_stiffness": 0.0, - "actuator_strength": 1, # Scaling factor for all actuators in the model - "density": 5000.0, - "viscosity": 0.0, - "geom_density": 1.0, # No effect, because no gravity - "wind_x": 0.0, - "wind_y": 0.0, - "wind_z": 0.0, - "mass": 0.3, - "starting_x": 0.14, - "starting_y": 0.14, - "target_x": 0.0, - "target_y": 0.0, - "area_size": 0.6, -} - -CONTEXT_BOUNDS = { - "gravity": (-np.inf, -0.1, float), - "friction_tangential": (0, np.inf, float), - "friction_torsional": (0, np.inf, float), - "friction_rolling": (0, np.inf, float), - "timestep": ( - 0.001, - 0.1, - float, - ), - "joint_damping": (0, np.inf, float), - "joint_stiffness": (0, np.inf, float), - "actuator_strength": (0, np.inf, float), - "density": (0, np.inf, float), - "viscosity": (0, np.inf, float), - "geom_density": (0, np.inf, float), - "wind_x": (-np.inf, np.inf, float), - "wind_y": (-np.inf, np.inf, float), - "wind_z": (-np.inf, np.inf, float), - "mass": (0, np.inf, float), - "starting_x": (0, np.inf, float), - "starting_y": (0, np.inf, float), - "target_x": (0, np.inf, float), - "target_y": (0, np.inf, float), - "area_size": (0, np.inf, float), -} - -CONTEXT_MASK = [ - "gravity", - "geom_density", - "wind_x", - "wind_y", - "wind_z", -] class CARLDmcPointMassEnv(CARLDmcEnv): - def __init__( - self, - domain: str = "pointmass", - task: str = "easy_context", - contexts: Contexts = {}, - context_mask: Optional[List[str]] = [], - hide_context: bool = True, - add_gaussian_noise_to_context: bool = False, - gaussian_noise_std_percentage: float = 0.01, - logger: Optional[TrialLogger] = None, - scale_context_features: str = "no", - default_context: Optional[Context] = DEFAULT_CONTEXT, - max_episode_length: int = STEP_LIMIT, - state_context_features: Optional[List[str]] = None, - dict_observation_space: bool = False, - context_selector: Optional[ - Union[AbstractSelector, type[AbstractSelector]] - ] = None, - context_selector_kwargs: Optional[Dict] = None, - ): - super().__init__( - domain=domain, - task=task, - contexts=contexts, - context_mask=context_mask, - hide_context=hide_context, - add_gaussian_noise_to_context=add_gaussian_noise_to_context, - gaussian_noise_std_percentage=gaussian_noise_std_percentage, - logger=logger, - scale_context_features=scale_context_features, - default_context=default_context, - max_episode_length=max_episode_length, - state_context_features=state_context_features, - dict_observation_space=dict_observation_space, - context_selector=context_selector, - context_selector_kwargs=context_selector_kwargs, - ) + domain = "pointmass" + task = "easy_pointmass" - def _update_context(self) -> None: - super()._update_context() - self.env.env.task.starting_x = self.context["starting_x"] - self.env.env.task.starting_y = self.context["starting_y"] + @staticmethod + def get_context_features() -> dict[str, ContextFeature]: + return { + "gravity": UniformFloatContextFeature( + "gravity", lower=-np.inf, upper=-0.1, default_value=-9.81 + ), + "friction_torsional": UniformFloatContextFeature( + "friction_torsional", lower=0, upper=np.inf, default_value=1.0 + ), + "friction_rolling": UniformFloatContextFeature( + "friction_rolling", lower=0, upper=np.inf, default_value=1.0 + ), + "friction_tangential": UniformFloatContextFeature( + "friction_tangential", lower=0, upper=np.inf, default_value=1.0 + ), + "timestep": UniformFloatContextFeature( + "timestep", lower=0.001, upper=0.1, default_value=0.004 + ), + "joint_damping": UniformFloatContextFeature( + "joint_damping", lower=0.0, upper=np.inf, default_value=1.0 + ), + "joint_stiffness": UniformFloatContextFeature( + "joint_stiffness", lower=0.0, upper=np.inf, default_value=0.0 + ), + "actuator_strength": UniformFloatContextFeature( + "actuator_strength", lower=0.0, upper=np.inf, default_value=1.0 + ), + "density": UniformFloatContextFeature( + "density", lower=0.0, upper=np.inf, default_value=5000.0 + ), + "viscosity": UniformFloatContextFeature( + "viscosity", lower=0.0, upper=np.inf, default_value=0.0 + ), + "geom_density": UniformFloatContextFeature( + "geom_density", lower=0.0, upper=np.inf, default_value=1.0 + ), + "wind_x": UniformFloatContextFeature( + "wind_x", lower=-np.inf, upper=np.inf, default_value=0.0 + ), + "wind_y": UniformFloatContextFeature( + "wind_y", lower=-np.inf, upper=np.inf, default_value=0.0 + ), + "wind_z": UniformFloatContextFeature( + "wind_z", lower=-np.inf, upper=np.inf, default_value=0.0 + ), + "mass": UniformFloatContextFeature( + "mass", lower=0.0, upper=np.inf, default_value=0.3 + ), + "starting_x": UniformFloatContextFeature( + "starting_x", lower=-np.inf, upper=np.inf, default_value=0.14 + ), + "starting_y": UniformFloatContextFeature( + "starting_y", lower=-np.inf, upper=np.inf, default_value=0.14 + ), + "target_x": UniformFloatContextFeature( + "target_x", lower=-np.inf, upper=np.inf, default_value=0.0 + ), + "target_y": UniformFloatContextFeature( + "target_y", lower=-np.inf, upper=np.inf, default_value=0.0 + ), + "area_size": UniformFloatContextFeature( + "area_size", lower=-np.inf, upper=np.inf, default_value=0.6 + ), + } diff --git a/carl/envs/dmc/dmc_tasks/pointmass.py b/carl/envs/dmc/dmc_tasks/pointmass.py index 83a29bc5..2c076293 100644 --- a/carl/envs/dmc/dmc_tasks/pointmass.py +++ b/carl/envs/dmc/dmc_tasks/pointmass.py @@ -65,7 +65,7 @@ def check_constraints( ) -def get_pointmass_xml_string( +def make_model( mass: float = 0.3, starting_x: float = 0.0, starting_y: float = 0.0, @@ -224,16 +224,16 @@ def randomize_limited_and_rotational_joints(self, physics, random=None): @SUITE.add("benchmarking") # type: ignore[misc] -def easy_context( +def easy_pointmass( context: Context = {}, context_mask: list = [], time_limit: float = _DEFAULT_TIME_LIMIT, random: np.random.RandomState | int | None = None, environment_kwargs: dict | None = None, ) -> control.Environment: - """Returns the Spin task.""" + """No randomization.""" xml_string, assets = get_model_and_assets() - xml_string = get_pointmass_xml_string(**context) + xml_string = make_model(**context) if context != {}: xml_string = adapt_context( xml_string=xml_string, context=context, context_mask=context_mask @@ -250,16 +250,16 @@ def easy_context( @SUITE.add("benchmarking") # type: ignore[misc] -def hard_context( +def hard_pointmass( context: Context = {}, context_mask: list = [], time_limit: float = _DEFAULT_TIME_LIMIT, random: np.random.RandomState | int | None = None, environment_kwargs: dict | None = None, ) -> control.Environment: - """Returns the hard Turn task.""" + """Randomized initializations.""" xml_string, assets = get_model_and_assets() - xml_string = get_pointmass_xml_string(**context) + xml_string = make_model(**context) if context != {}: xml_string = adapt_context( xml_string=xml_string, context=context, context_mask=context_mask diff --git a/test/test_dmc.py b/test/test_dmc.py index 83a9c05c..5556b64c 100644 --- a/test/test_dmc.py +++ b/test/test_dmc.py @@ -1,16 +1,34 @@ -import unittest - +import pytest + +from carl.envs.dmc import ( + CARLDmcFingerEnv, + CARLDmcFishEnv, + CARLDmcPointMassEnv, + CARLDmcQuadrupedEnv, + CARLDmcWalkerEnv, +) +from carl.envs.dmc.dmc_tasks.finger import check_constraints +from carl.envs.dmc.dmc_tasks.finger import ( + get_model_and_assets as get_finger_model_and_assets, +) from carl.envs.dmc.dmc_tasks.finger import ( - check_constraints, spin_context, turn_easy_context, turn_hard_context, ) +from carl.envs.dmc.dmc_tasks.pointmass import ( + check_constraints as check_constraints_pointmass, +) +from carl.envs.dmc.dmc_tasks.pointmass import make_model as make_pointmass_model +from carl.envs.dmc.dmc_tasks.quadruped import make_model as make_quadruped_model from carl.envs.dmc.dmc_tasks.utils import adapt_context +from carl.envs.dmc.dmc_tasks.walker import ( + get_model_and_assets as get_walker_model_and_assets, +) from carl.envs.dmc.loader import load_dmc_env -class TestDMCLoader(unittest.TestCase): +class TestDMCLoader: def test_load_classic_dmc_env(self): _ = load_dmc_env( domain_name="walker", @@ -24,24 +42,24 @@ def test_load_context_dmc_env(self): ) def test_load_unknowntask_dmc_env(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): _ = load_dmc_env( domain_name="walker", task_name="walk_context_blub", ) def test_load_unknowndomain_dmc_env(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): _ = load_dmc_env( domain_name="sdfsdf", task_name="walk", ) -class TestDmcEnvs(unittest.TestCase): +class TestFinger: def test_finger_constraints(self): # Finger can reach spinner? - with self.assertRaises(ValueError): + with pytest.raises(ValueError): check_constraints( limb_length_0=0.17, limb_length_1=0.16, @@ -49,7 +67,7 @@ def test_finger_constraints(self): raise_error=True, ) # Spinner collides with finger hinge? - with self.assertRaises(ValueError): + with pytest.raises(ValueError): check_constraints( limb_length_0=0.17, limb_length_1=0.16, @@ -65,50 +83,40 @@ def test_finger_tasks(self): _ = task(context=context) -class TestDmcUtils(unittest.TestCase): - def setUp(self) -> None: - from carl.envs.dmc.carl_dm_finger import CARLDmcFingerEnv - from carl.envs.dmc.dmc_tasks.finger import get_model_and_assets - - self.xml_string, _ = get_model_and_assets() - self.default_context = CARLDmcFingerEnv.get_default_context() +class TestDmcUtils: + def get_string_and_context(self): + xml_string, _ = get_finger_model_and_assets() + default_context = CARLDmcFingerEnv.get_default_context() + return xml_string, default_context def test_adapt_context_no_context(self): context = {} - _ = adapt_context(xml_string=self.xml_string, context=context) + xml_string, _ = self.get_string_and_context() + _ = adapt_context(xml_string=xml_string, context=context) def test_adapt_context_partialcontext(self): context = {"gravity": 10} - _ = adapt_context(xml_string=self.xml_string, context=context) + xml_string, _ = self.get_string_and_context() + _ = adapt_context(xml_string=xml_string, context=context) def test_adapt_context_fullcontext(self): # only continuous context features - context = self.default_context + xml_string, context = self.get_string_and_context() context["gravity"] *= 1.25 - _ = adapt_context(xml_string=self.xml_string, context=context) + _ = adapt_context(xml_string=xml_string, context=context) def test_adapt_context_friction(self): - from carl.envs.dmc.carl_dm_walker import CARLDmcWalkerEnv - from carl.envs.dmc.dmc_tasks.walker import get_model_and_assets - - xml_string, _ = get_model_and_assets() + xml_string, _ = get_walker_model_and_assets() context = CARLDmcWalkerEnv.get_default_context() context["friction_tangential"] *= 1.3 _ = adapt_context(xml_string=xml_string, context=context) -class TestQuadruped(unittest.TestCase): - def setUp(self) -> None: - pass - +class TestQuadruped: def test_make_model(self): - from carl.envs.dmc.dmc_tasks.quadruped import make_model - - _ = make_model(floor_size=1) + _ = make_quadruped_model(floor_size=1) def test_instantiate_env_with_context(self): - from carl.envs.dmc.carl_dm_quadruped import CARLDmcQuadrupedEnv - tasks = ["escape_context", "run_context", "walk_context", "fetch_context"] for task in tasks: _ = CARLDmcQuadrupedEnv( @@ -119,3 +127,59 @@ def test_instantiate_env_with_context(self): }, task=task, ) + + +class TestFish: + def test_make_model(self): + _ = make_quadruped_model(floor_size=1) + + def test_instantiate_env_with_context(self): + tasks = ["swim_context", "upright_context"] + for task in tasks: + _ = CARLDmcFishEnv( + contexts={ + 0: { + "gravity": -10, + } + }, + task=task, + ) + + +class TestPointmass: + def test_make_model(self): + _ = make_pointmass_model(floor_size=1) + + def test_instantiate_env_with_context(self): + tasks = ["easy_pointmass", "hard_pointmass"] + for task in tasks: + _ = CARLDmcPointMassEnv( + contexts={ + 0: { + "starting_x": 0.3, + } + }, + task=task, + ) + + def test_constraints(self): + # Is starting point inside grid? + with pytest.raises(ValueError): + check_constraints_pointmass( + mass=0.3, + starting_x=0.3, + starting_y=0.3, + target_x=0.0, + target_y=0.0, + area_size=0.6, + ) + # Is target inside grid? + with pytest.raises(ValueError): + check_constraints_pointmass( + mass=0.3, + starting_x=0.0, + starting_y=0.0, + target_x=0.3, + target_y=0.3, + area_size=0.6, + ) From 974b9c6fee024e27a8627b75328389fdb340a26f Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Fri, 9 Feb 2024 10:57:29 +0100 Subject: [PATCH 37/44] fix: set brax version at 9 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ba5712e8..d6ca10bd 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def read_file(filepath: str) -> str: "gymnasium[box2d]>=0.27.1", ], "brax": [ - "brax>=0.9.1", + "brax==0.9", "protobuf>=3.17.3", ], "dm_control": [ From b0b9da1a5466fb953182db4b9ef7947197e4043a Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Fri, 9 Feb 2024 11:02:56 +0100 Subject: [PATCH 38/44] feat: readme badges + even more specific brax version --- README.md | 5 +++++ setup.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2a5ed021..c379ec96 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,11 @@ CARL # – The Benchmark Library +[![PyPI Version](https://img.shields.io/pypi/v/carl-bench.svg)](https://pypi.python.org/pypi/carl-bench) +[![Test](https://github.com/automl/carl/actions/workflows/tests.yaml/badge.svg)](https://github.com/automl/carl/actions/workflows/tests.yaml) +[![Doc Status](https://github.com/automl/carl/actions/workflows/docs.yaml/badge.svg)](https://github.com/automl/carl/actions/workflows/docs.yaml) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + CARL (context adaptive RL) provides highly configurable contextual extensions to several well-known RL environments. It's designed to test your agent's generalization capabilities diff --git a/setup.py b/setup.py index d6ca10bd..1a84d934 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def read_file(filepath: str) -> str: "gymnasium[box2d]>=0.27.1", ], "brax": [ - "brax==0.9", + "brax==0.9.1", "protobuf>=3.17.3", ], "dm_control": [ From a576d1819fdd69da0de6f03dbf643bc36b7710e0 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Fri, 9 Feb 2024 11:44:29 +0100 Subject: [PATCH 39/44] fix: tests run again --- carl/envs/brax/brax_walker_goal_wrapper.py | 11 ++- carl/envs/brax/carl_ant.py | 3 + carl/envs/brax/carl_brax_env.py | 15 +++- carl/envs/brax/carl_halfcheetah.py | 3 + carl/envs/brax/carl_hopper.py | 3 + carl/envs/brax/carl_humanoid.py | 3 + carl/envs/brax/carl_pusher.py | 9 +- carl/envs/brax/carl_walker2d.py | 3 + carl/envs/dmc/dmc_tasks/pointmass.py | 8 +- setup.py | 1 + test/test_language_goals.py | 98 +++++++++++++--------- 11 files changed, 107 insertions(+), 50 deletions(-) diff --git a/carl/envs/brax/brax_walker_goal_wrapper.py b/carl/envs/brax/brax_walker_goal_wrapper.py index ee912c5f..efdc9654 100644 --- a/carl/envs/brax/brax_walker_goal_wrapper.py +++ b/carl/envs/brax/brax_walker_goal_wrapper.py @@ -53,7 +53,7 @@ class BraxWalkerGoalWrapper(gym.Wrapper): """Adds a positional goal to brax walker envs""" - def __init__(self, env, env_name, asset_path) -> None: + def __init__(self, env: gym.Env, env_name: str, asset_path: str) -> None: super().__init__(env) self.env_name = env_name if ( @@ -66,6 +66,7 @@ def __init__(self, env, env_name, asset_path) -> None: self.context = None self.position = None self.goal_position = None + self.goal_radius = None self.direction_values = { 3: [0, -1], 1: [0, 1], @@ -116,6 +117,7 @@ def reset(self, seed=None, options={}): np.array(self.direction_values[self.context["target_direction"]]) * self.context["target_distance"] ) + self.goal_radius = self.context["target_radius"] info["success"] = 0 return state, info @@ -130,7 +132,7 @@ def step(self, action): previous_distance_to_goal = np.linalg.norm(self.goal_position - self.position) direction_reward = max(0, previous_distance_to_goal - current_distance_to_goal) self.position = new_position - if abs(current_distance_to_goal) <= 5: + if abs(current_distance_to_goal) <= self.goal_radius: te = True info["success"] = 1 else: @@ -168,8 +170,11 @@ def step(self, action): def get_goal_desc(self, context): if "target_radius" in context.keys(): target_distance = context["target_distance"] + target_direction = context["target_direction"] target_radius = context["target_radius"] - return f"The distance to the goal is {target_distance}m. Move within {target_radius} steps of the goal." + return f"""The distance to the goal is {target_distance}m + {DIRECTION_NAMES[target_direction]}. + Move within {target_radius} steps of the goal.""" else: target_distance = context["target_distance"] target_direction = context["target_direction"] diff --git a/carl/envs/brax/carl_ant.py b/carl/envs/brax/carl_ant.py index 68dca775..38711b43 100644 --- a/carl/envs/brax/carl_ant.py +++ b/carl/envs/brax/carl_ant.py @@ -43,4 +43,7 @@ def get_context_features() -> dict[str, ContextFeature]: "target_direction": CategoricalContextFeature( "target_direction", choices=directions, default_value=1 ), + "target_radius": UniformFloatContextFeature( + "target_radius", lower=0.1, upper=np.inf, default_value=5 + ), } diff --git a/carl/envs/brax/carl_brax_env.py b/carl/envs/brax/carl_brax_env.py index 8d970a5f..2c99d3d0 100644 --- a/carl/envs/brax/carl_brax_env.py +++ b/carl/envs/brax/carl_brax_env.py @@ -214,6 +214,18 @@ def __init__( "target_distance" in contexts[list(contexts.keys())[0]].keys() or "target_direction" in contexts[list(contexts.keys())[0]].keys() ): + assert all( + [ + "target_direction" in contexts[list(contexts.keys())[i]].keys() + for i in range(len(contexts)) + ] + ), "All contexts must have a 'target_direction' key" + assert all( + [ + "target_distance" in contexts[list(contexts.keys())[i]].keys() + for i in range(len(contexts)) + ] + ), "All contexts must have a 'target_distance' key" base_dir = contexts[list(contexts.keys())[0]]["target_direction"] base_dist = contexts[list(contexts.keys())[0]]["target_distance"] max_diff_dir = max( @@ -251,6 +263,7 @@ def _update_context(self) -> None: "elasticity", "target_distance", "target_direction", + "target_radius", ] check_context(context, registered_cfs) @@ -288,8 +301,6 @@ def reset( self._progress_instance() if self.context_id != last_context_id: self._update_context() - # if self.use_language_goals: - # self.env.env.context = self.context self.env.context = self.context state, info = self.env.reset(seed=seed, options=options) state = self._add_context_to_state(state) diff --git a/carl/envs/brax/carl_halfcheetah.py b/carl/envs/brax/carl_halfcheetah.py index a4c249a4..b5855cd6 100644 --- a/carl/envs/brax/carl_halfcheetah.py +++ b/carl/envs/brax/carl_halfcheetah.py @@ -61,4 +61,7 @@ def get_context_features() -> dict[str, ContextFeature]: "target_direction": CategoricalContextFeature( "target_direction", choices=directions, default_value=1 ), + "target_radius": UniformFloatContextFeature( + "target_radius", lower=0.1, upper=np.inf, default_value=5 + ), } diff --git a/carl/envs/brax/carl_hopper.py b/carl/envs/brax/carl_hopper.py index 759e08db..d9cdaf1a 100644 --- a/carl/envs/brax/carl_hopper.py +++ b/carl/envs/brax/carl_hopper.py @@ -52,4 +52,7 @@ def get_context_features() -> dict[str, ContextFeature]: "target_direction": CategoricalContextFeature( "target_direction", choices=directions, default_value=1 ), + "target_radius": UniformFloatContextFeature( + "target_radius", lower=0.1, upper=np.inf, default_value=5 + ), } diff --git a/carl/envs/brax/carl_humanoid.py b/carl/envs/brax/carl_humanoid.py index 763c5a08..ad4af4cf 100644 --- a/carl/envs/brax/carl_humanoid.py +++ b/carl/envs/brax/carl_humanoid.py @@ -79,4 +79,7 @@ def get_context_features() -> dict[str, ContextFeature]: "target_direction": CategoricalContextFeature( "target_direction", choices=directions, default_value=1 ), + "target_radius": UniformFloatContextFeature( + "target_radius", lower=0.1, upper=np.inf, default_value=5 + ), } diff --git a/carl/envs/brax/carl_pusher.py b/carl/envs/brax/carl_pusher.py index 2c5ec32c..19cdec86 100644 --- a/carl/envs/brax/carl_pusher.py +++ b/carl/envs/brax/carl_pusher.py @@ -1,5 +1,7 @@ from __future__ import annotations +from copy import deepcopy + import numpy as np from carl.context.context_space import ContextFeature, UniformFloatContextFeature @@ -89,8 +91,13 @@ def get_context_features() -> dict[str, ContextFeature]: } def _update_context(self) -> None: - super()._update_context() goal_x = self.context["goal_position_x"] goal_y = self.context["goal_position_y"] goal_z = self.context["goal_position_z"] + context = deepcopy(self.context) + del self.context["goal_position_x"] + del self.context["goal_position_y"] + del self.context["goal_position_z"] + super()._update_context() self.env._goal_pos = np.array([goal_x, goal_y, goal_z]) + self.context = context diff --git a/carl/envs/brax/carl_walker2d.py b/carl/envs/brax/carl_walker2d.py index 6b94b998..db08dbe2 100644 --- a/carl/envs/brax/carl_walker2d.py +++ b/carl/envs/brax/carl_walker2d.py @@ -61,4 +61,7 @@ def get_context_features() -> dict[str, ContextFeature]: "target_direction": CategoricalContextFeature( "target_direction", choices=directions, default_value=1 ), + "target_radius": UniformFloatContextFeature( + "target_radius", lower=0.1, upper=np.inf, default_value=5 + ), } diff --git a/carl/envs/dmc/dmc_tasks/pointmass.py b/carl/envs/dmc/dmc_tasks/pointmass.py index 2c076293..1eb80db5 100644 --- a/carl/envs/dmc/dmc_tasks/pointmass.py +++ b/carl/envs/dmc/dmc_tasks/pointmass.py @@ -235,9 +235,7 @@ def easy_pointmass( xml_string, assets = get_model_and_assets() xml_string = make_model(**context) if context != {}: - xml_string = adapt_context( - xml_string=xml_string, context=context, context_mask=context_mask - ) + xml_string = adapt_context(xml_string=xml_string, context=context) physics = Physics.from_xml_string(xml_string, assets) task = ContextualPointMass(randomize_gains=False, random=random) environment_kwargs = environment_kwargs or {} @@ -261,9 +259,7 @@ def hard_pointmass( xml_string, assets = get_model_and_assets() xml_string = make_model(**context) if context != {}: - xml_string = adapt_context( - xml_string=xml_string, context=context, context_mask=context_mask - ) + xml_string = adapt_context(xml_string=xml_string, context=context) physics = Physics.from_xml_string(xml_string, assets) task = ContextualPointMass(randomize_gains=True, random=random) environment_kwargs = environment_kwargs or {} diff --git a/setup.py b/setup.py index 1a84d934..d19673cc 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ def read_file(filepath: str) -> str: "brax": [ "brax==0.9.1", "protobuf>=3.17.3", + "mujoco==3.0.1" ], "dm_control": [ "dm_control>=1.0.3", diff --git a/test/test_language_goals.py b/test/test_language_goals.py index d22a2382..0ad5d09b 100644 --- a/test/test_language_goals.py +++ b/test/test_language_goals.py @@ -44,11 +44,19 @@ def test_uniform_sampling(self): ) contexts = context_sampler.sample_contexts(n_contexts=10) assert len(contexts.keys()) == 10 - assert "target_distance" in contexts[0].keys() - assert "target_direction" in contexts[0].keys() - assert all([contexts[i]["target_direction"] in DIRECTIONS for i in range(10)]) - assert all([contexts[i]["target_distance"] <= 200 for i in range(10)]) - assert all([contexts[i]["target_distance"] >= 4 for i in range(10)]) + assert "target_distance" in contexts[0].keys(), "target_distance not in context" + assert ( + "target_direction" in contexts[0].keys() + ), "target_direction not in context" + assert all( + [contexts[i]["target_direction"] in DIRECTIONS for i in range(10)] + ), "Not all directions are valid." + assert all( + [contexts[i]["target_distance"] <= 200 for i in range(10)] + ), "Not all distances are valid (too large)." + assert all( + [contexts[i]["target_distance"] >= 4 for i in range(10)] + ), "Not all distances are valid (too small)." def test_normal_sampling(self): context_distributions = [ @@ -61,12 +69,22 @@ def test_normal_sampling(self): seed=0, ) contexts = context_sampler.sample_contexts(n_contexts=10) - assert len(contexts.keys()) == 10 - assert "target_distance" in contexts[0].keys() - assert "target_direction" in contexts[0].keys() - assert all([contexts[i]["target_direction"] in DIRECTIONS for i in range(10)]) - assert all([contexts[i]["target_distance"] <= 200 for i in range(10)]) - assert all([contexts[i]["target_distance"] >= 4 for i in range(10)]) + assert ( + len(contexts.keys()) == 10 + ), "Number of sampled contexts does not match the requested number." + assert "target_distance" in contexts[0].keys(), "target_distance not in context" + assert ( + "target_direction" in contexts[0].keys() + ), "target_direction not in context" + assert all( + [contexts[i]["target_direction"] in DIRECTIONS for i in range(10)] + ), "Not all directions are valid." + assert all( + [contexts[i]["target_distance"] <= 200 for i in range(10)] + ), "Not all distances are valid (too large)." + assert all( + [contexts[i]["target_distance"] >= 4 for i in range(10)] + ), "Not all distances are valid (too small)." class TestGoalWrapper(unittest.TestCase): @@ -84,12 +102,12 @@ def test_reset(self): env = CARLBraxAnt(contexts=contexts) assert isinstance(env.env, BraxWalkerGoalWrapper) - assert env.position is None + assert env.position is None, "Position set before reset." state, info = env.reset() - assert state is not None - assert info is not None - assert env.position is not None + assert state is not None, "No state returned." + assert info is not None, "No info returned." + assert env.position is not None, "Position not set." context_distributions = [ NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), @@ -103,13 +121,13 @@ def test_reset(self): contexts = context_sampler.sample_contexts(n_contexts=10) env = CARLBraxHalfcheetah(contexts=contexts, use_language_goals=True) - assert isinstance(env.env, BraxLanguageWrapper) - assert env.position is None + assert isinstance(env.env, BraxLanguageWrapper), "Language wrapper not used." + assert env.position is None, "Position set before reset." state, info = env.reset() - assert state is not None - assert info is not None - assert env.position is not None + assert state is not None, "No state returned." + assert info is not None, "No info returned." + assert env.position is not None, "Position not set." def test_reward_scale(self): context_distributions = [ @@ -129,7 +147,7 @@ def test_reward_scale(self): for _ in range(10): action = env.action_space.sample() _, wrapped_reward, _, _, _ = env.step(action) - assert wrapped_reward >= 0 + assert wrapped_reward >= 0, "Negative reward." context_distributions = [ NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), @@ -148,7 +166,7 @@ def test_reward_scale(self): for _ in range(10): action = env.action_space.sample() _, wrapped_reward, _, _, _ = env.step(action) - assert wrapped_reward >= 0 + assert wrapped_reward >= 0, "Negative reward." class TestLanguageWrapper(unittest.TestCase): @@ -165,13 +183,15 @@ def test_reset(self) -> None: contexts = context_sampler.sample_contexts(n_contexts=10) env = CARLBraxAnt(contexts=contexts, use_language_goals=True) state, info = env.reset() - assert type(state) is dict - assert "obs" in state.keys() - assert "goal" in state["obs"].keys() - assert type(state["obs"]["goal"]) is str - assert str(env.context["target_distance"]) in state["obs"]["goal"] - assert "north north east" in state["obs"]["goal"] - assert info is not None + assert type(state) is dict, "State is not a dictionary." + assert "obs" in state.keys(), "Observation not in state." + assert "goal" in state["obs"].keys(), "Goal not in observation." + assert type(state["obs"]["goal"]) is str, "Goal is not a string." + assert ( + str(env.context["target_distance"]) in state["obs"]["goal"] + ), "Distance not in goal." + assert "north north east" in state["obs"]["goal"], "Direction not in goal." + assert info is not None, "No info returned." def test_step(self): context_distributions = [ @@ -189,12 +209,14 @@ def test_step(self): for _ in range(10): action = env.action_space.sample() state, _, _, _, _ = env.step(action) - assert type(state) is dict - assert "obs" in state.keys() - assert "goal" in state["obs"].keys() - assert type(state["obs"]["goal"]) is str - assert "north north east" in state["obs"]["goal"] - assert str(env.context["target_distance"]) in state["obs"]["goal"] + assert type(state) is dict, "State is not a dictionary." + assert "obs" in state.keys(), "Observation not in state." + assert "goal" in state["obs"].keys(), "Goal not in observation." + assert type(state["obs"]["goal"]) is str, "Goal is not a string." + assert "north north east" in state["obs"]["goal"], "Direction not in goal." + assert ( + str(env.context["target_distance"]) in state["obs"]["goal"] + ), "Distance not in goal." context_distributions = [ NormalFloatContextFeature("target_distance", mu=9.8, sigma=1), @@ -211,6 +233,6 @@ def test_step(self): for _ in range(10): action = env.action_space.sample() state, _, _, _, _ = env.step(action) - assert type(state) is dict - assert "obs" in state.keys() - assert "goal" not in state.keys() + assert type(state) is dict, "State is not a dictionary." + assert "obs" in state.keys(), "Observation not in state." + assert "goal" not in state.keys(), "Goal in observation." From 7ae0a52eaeaf9df36b71ad19fc652b003576b2d7 Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Fri, 9 Feb 2024 11:53:22 +0100 Subject: [PATCH 40/44] fix: change default target distance --- carl/envs/brax/carl_ant.py | 2 +- carl/envs/brax/carl_halfcheetah.py | 2 +- carl/envs/brax/carl_hopper.py | 2 +- carl/envs/brax/carl_humanoid.py | 2 +- carl/envs/brax/carl_walker2d.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/carl/envs/brax/carl_ant.py b/carl/envs/brax/carl_ant.py index 38711b43..ee181ecc 100644 --- a/carl/envs/brax/carl_ant.py +++ b/carl/envs/brax/carl_ant.py @@ -38,7 +38,7 @@ def get_context_features() -> dict[str, ContextFeature]: "viscosity", lower=0, upper=np.inf, default_value=0 ), "target_distance": UniformFloatContextFeature( - "target_distance", lower=0, upper=np.inf, default_value=0 + "target_distance", lower=0, upper=np.inf, default_value=100 ), "target_direction": CategoricalContextFeature( "target_direction", choices=directions, default_value=1 diff --git a/carl/envs/brax/carl_halfcheetah.py b/carl/envs/brax/carl_halfcheetah.py index b5855cd6..97c8d7ab 100644 --- a/carl/envs/brax/carl_halfcheetah.py +++ b/carl/envs/brax/carl_halfcheetah.py @@ -56,7 +56,7 @@ def get_context_features() -> dict[str, ContextFeature]: "mass_ffoot", lower=1e-6, upper=np.inf, default_value=0.8845188 ), "target_distance": UniformFloatContextFeature( - "target_distance", lower=0, upper=np.inf, default_value=0 + "target_distance", lower=0, upper=np.inf, default_value=100 ), "target_direction": CategoricalContextFeature( "target_direction", choices=directions, default_value=1 diff --git a/carl/envs/brax/carl_hopper.py b/carl/envs/brax/carl_hopper.py index d9cdaf1a..1b042e1b 100644 --- a/carl/envs/brax/carl_hopper.py +++ b/carl/envs/brax/carl_hopper.py @@ -47,7 +47,7 @@ def get_context_features() -> dict[str, ContextFeature]: "mass_foot", lower=1e-6, upper=np.inf, default_value=5.3155746 ), "target_distance": UniformFloatContextFeature( - "target_distance", lower=0, upper=np.inf, default_value=0 + "target_distance", lower=0, upper=np.inf, default_value=100 ), "target_direction": CategoricalContextFeature( "target_direction", choices=directions, default_value=1 diff --git a/carl/envs/brax/carl_humanoid.py b/carl/envs/brax/carl_humanoid.py index ad4af4cf..4ddbe3b0 100644 --- a/carl/envs/brax/carl_humanoid.py +++ b/carl/envs/brax/carl_humanoid.py @@ -74,7 +74,7 @@ def get_context_features() -> dict[str, ContextFeature]: "mass_left_lower_arm", lower=1e-6, upper=np.inf, default_value=1.2295402 ), "target_distance": UniformFloatContextFeature( - "target_distance", lower=0, upper=np.inf, default_value=0 + "target_distance", lower=0, upper=np.inf, default_value=100 ), "target_direction": CategoricalContextFeature( "target_direction", choices=directions, default_value=1 diff --git a/carl/envs/brax/carl_walker2d.py b/carl/envs/brax/carl_walker2d.py index db08dbe2..8d927da0 100644 --- a/carl/envs/brax/carl_walker2d.py +++ b/carl/envs/brax/carl_walker2d.py @@ -56,7 +56,7 @@ def get_context_features() -> dict[str, ContextFeature]: "mass_foot_left", lower=1e-6, upper=np.inf, default_value=3.1667254 ), "target_distance": UniformFloatContextFeature( - "target_distance", lower=0, upper=np.inf, default_value=0 + "target_distance", lower=0, upper=np.inf, default_value=100 ), "target_direction": CategoricalContextFeature( "target_direction", choices=directions, default_value=1 From d12306f872ea749e6cb1676cc84c610d018a4bda Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Fri, 9 Feb 2024 11:59:30 +0100 Subject: [PATCH 41/44] fix: make sure goal brax is not default --- carl/envs/brax/brax_walker_goal_wrapper.py | 1 - carl/envs/brax/carl_brax_env.py | 32 +++++++++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/carl/envs/brax/brax_walker_goal_wrapper.py b/carl/envs/brax/brax_walker_goal_wrapper.py index efdc9654..9c7a49c8 100644 --- a/carl/envs/brax/brax_walker_goal_wrapper.py +++ b/carl/envs/brax/brax_walker_goal_wrapper.py @@ -148,7 +148,6 @@ def __init__(self, env) -> None: self.context = None def reset(self, seed=None, options={}): - print(self.context) self.env.context = self.context state, info = self.env.reset(seed=seed, options=options) goal_str = self.get_goal_desc(self.context) diff --git a/carl/envs/brax/carl_brax_env.py b/carl/envs/brax/carl_brax_env.py index 2c99d3d0..3b774c6e 100644 --- a/carl/envs/brax/carl_brax_env.py +++ b/carl/envs/brax/carl_brax_env.py @@ -19,7 +19,7 @@ ) from carl.envs.brax.wrappers import GymWrapper, VectorGymWrapper from carl.envs.carl_env import CARLEnv -from carl.utils.types import Contexts +from carl.utils.types import Context, Contexts def set_geom_attr( @@ -306,3 +306,33 @@ def reset( state = self._add_context_to_state(state) info["context_id"] = self.context_id return state, info + + @classmethod + def get_default_context(cls) -> Context: + """Get the default context (without any goal features) + + Returns + ------- + Context + Default context. + """ + default_context = cls.get_context_space().get_default_context() + if "target_distance" in default_context: + del default_context["target_distance"] + if "target_direction" in default_context: + del default_context["target_direction"] + if "target_radius" in default_context: + del default_context["target_radius"] + return default_context + + @classmethod + def get_default_goal_context(cls) -> Context: + """Get the default context (with goal features) + + Returns + ------- + Context + Default context. + """ + default_context = cls.get_context_space().get_default_context() + return default_context From 0a7e4c9c6ce76c7eacc8850d0eef376646bf609e Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Fri, 9 Feb 2024 12:32:01 +0100 Subject: [PATCH 42/44] next brax version try --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d19673cc..fd711596 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def read_file(filepath: str) -> str: "gymnasium[box2d]>=0.27.1", ], "brax": [ - "brax==0.9.1", + "brax==0.9.3", "protobuf>=3.17.3", "mujoco==3.0.1" ], From 193d512e5912e3a2ba76ad0fa932facf5adbcd65 Mon Sep 17 00:00:00 2001 From: benjamc Date: Fri, 9 Feb 2024 12:51:34 +0100 Subject: [PATCH 43/44] test: fix search space encoding (test), handle more cases --- carl/context/search_space_encoding.py | 8 +++++--- test/test_search_space_encoding.py | 18 +++++++++++------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/carl/context/search_space_encoding.py b/carl/context/search_space_encoding.py index 893a71d4..ae955d3a 100644 --- a/carl/context/search_space_encoding.py +++ b/carl/context/search_space_encoding.py @@ -106,11 +106,11 @@ def search_space_to_config_space( ------- ConfigurationSpace """ - if type(search_space) == str: + if isinstance(search_space, str): with open(search_space, "r") as f: jason_string = f.read() cs = csjson.read(jason_string) - elif type(search_space) == DictConfig: + elif isinstance(search_space, DictConfig): # reorder hyperparameters as List[Dict] hyperparameters = [] for name, cfg in search_space.hyperparameters.items(): @@ -130,8 +130,10 @@ def search_space_to_config_space( jason_string = json.dumps(search_space, cls=JSONCfgEncoder) cs = csjson.read(jason_string) - elif type(search_space) == ConfigurationSpace: + elif isinstance(search_space, ConfigurationSpace): cs = search_space + elif isinstance(search_space, dict): + cs = csjson.read(json.dumps(search_space)) else: raise ValueError( f"search_space must be of type str or DictConfig. Got {type(search_space)}." diff --git a/test/test_search_space_encoding.py b/test/test_search_space_encoding.py index 42a0489d..2c1a4e84 100644 --- a/test/test_search_space_encoding.py +++ b/test/test_search_space_encoding.py @@ -21,6 +21,7 @@ "lower": -512.0, "upper": 512.0, "default": -3.0, + "q": None, }, { "name": "x1", @@ -29,6 +30,7 @@ "lower": -512.0, "upper": 512.0, "default": -4.0, + "q": None, }, ], "conditions": [], @@ -51,19 +53,14 @@ def setUp(self): self.test_space = ConfigurationSpace(name="myspace", space=dict_space) return super().setUp() - def test_init(self): - self.test_space = ConfigurationSpace(name="myspace", space=dict_space_2) - - self.test_space = ConfigurationSpace(name="myspace", space=str_space) - - def test_config_spaces(self): + def test_ss_as_cs(self): try: search_space_to_config_space(self.test_space) except Exception as e: print(f"Cannot encode search space -- {self.test_space}.") raise e - def test_dict_configs(self): + def test_ss_as_dictconfig(self): try: dict_space = DictConfig({"hyperparameters": {}}) @@ -72,6 +69,13 @@ def test_dict_configs(self): print(f"Cannot encode search space -- {dict_space}.") raise e + def test_ss_as_dict(self): + try: + search_space_to_config_space(dict_space_2) + except Exception as e: + print(f"Cannot encode search space -- {dict_space_2}.") + raise e + if __name__ == "__main__": unittest.main() From 0e00e41115b484d84468539f60b7f8e17eca128a Mon Sep 17 00:00:00 2001 From: Theresa Eimer Date: Fri, 9 Feb 2024 13:06:39 +0100 Subject: [PATCH 44/44] version update --- carl/__init__.py | 2 +- changelog.md | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/carl/__init__.py b/carl/__init__.py index fa63768c..a7bb344f 100644 --- a/carl/__init__.py +++ b/carl/__init__.py @@ -1,5 +1,5 @@ __license__ = "Apache-2.0 License" -__version__ = "1.0.0" +__version__ = "1.1.0" __author__ = "Carolin Benjamins, Theresa Eimer, Frederik Schubert, André Biedenkapp, Aditya Mohan, Sebastian Döhler" diff --git a/changelog.md b/changelog.md index 76d89dae..d8e60288 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,9 @@ +# 1.1.0 +- increased test coverage +- smaller bug fixes +- added DMC pointmass env +- added goal & language goal options for Brax + # 1.0.0 Major overhaul of the CARL environment - Contexts are stored in each environment's class