automl · amsks · Nov 13, 2022 · Nov 13, 2022 · Nov 13, 2022 · Nov 13, 2022
diff --git a/.gitignore b/.gitignore
@@ -8,7 +8,7 @@
 /dataSources.local.xml
 /httpRequests/
 tmp/
-slurm.sh
+slurm.shssssssssssssssssssssssssssssssssssssssssssssssssssss
 carl/runscripts/generated
 docs/html
 docs/apidoc
@@ -20,12 +20,19 @@ carl.egg-info
 exp_sweep
 multirun
 outputs
+testvenv
+*.egg-info
 runs
-*.tex
 *.png
 *.pdf
 *.csv
-*.json
 *.pickle
-*.egg-info
-*code-workspace
+*.ipynb_checkpoints
+*optgap*
+*smac3*
+*.json
+generated
+core
+*.tex
+build
+target
diff --git a/.gitmodules b/.gitmodules
@@ -6,4 +6,4 @@
 	url = https://github.com/Mawiszus/TOAD-GUI
 [submodule "src/envs/mario/Mario-AI-Framework"]
 	path = src/envs/mario/Mario-AI-Framework
-	url = https://github.com/frederikschubert/Mario-AI-Framework
+	url = https://github.com/frederikschubert/Mario-AI-Framework
diff --git a/CITATION.bib b/CITATION.bib
@@ -11,4 +11,4 @@ @inproceedings { BenEim2023a
   title        = {Contextualize Me - The Case for Context in Reinforcement Learning},
   journal      = {Transactions on Machine Learning Research},
   year         = {2023},
-}
+}
diff --git a/README.md b/README.md
@@ -41,7 +41,7 @@ pip install .
 
 This will only install the basic classic control environments, which should run on most operating systems. For the full set of environments, use the install options:
 ```bash
-pip install -e .[box2d, brax, mario, dm_control]
+pip install -e .[box2d,brax,mario,dm_control]
 ```
 
 These may not be compatible with Windows systems. Box2D environment may need to be installed via conda on MacOS systems:
@@ -68,17 +68,15 @@ Different instiations can be achieved by setting the context features to differe
 ## Cite Us
 If you use CARL in your research, please cite our paper on the benchmark:
 ```bibtex
-@inproceedings{BenEim2021a,
-    title     = {CARL: A Benchmark for Contextual and Adaptive Reinforcement Learning},
-    author    = {Carolin Benjamins and Theresa Eimer and Frederik Schubert and André Biedenkapp and Bodo Rosenhahn and Frank Hutter and Marius Lindauer},
-    booktitle = {NeurIPS 2021 Workshop on Ecological Theory of Reinforcement Learning},
-    year      = {2021},
-    month     = dec
+@inproceedings{Benjamins2023,
+    title     = {Contextualize Me -- The Case for Context in Reinforcement Learning},
+    author    = {Carolin Benjamins and Theresa Eimer and Frederik Schubert and Aditya Mohan and Sebastian Döhler and André Biedenkapp and Bodo Rosenhan and Frank Hutter and Marius Lindauer},
+    booktitle = {Transactions on Machine Learning Research},
+    year      = {2023},
+    month     = Apr
 }
 ```
 
-You can find the code and experiments for this paper in the `neurips_ecorl_workshop_2021` branch.
-
 ## References
 [OpenAI gym, Brockman et al., 2016. arXiv preprint arXiv:1606.01540](https://arxiv.org/pdf/1606.01540.pdf)
 

diff --git a/carl/context/sampling.py b/carl/context/sampling.py
@@ -1,10 +1,11 @@
 # flake8: noqa: W605
+from __future__ import annotations
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 import importlib
 
 import numpy as np
-from scipy.stats import norm, rv_continuous
+from scipy.stats import norm, rv_continuous, uniform
 
 import carl.envs
 from carl.utils.types import Context, Contexts
@@ -59,6 +60,8 @@ def sample_contexts(
     default_sample_std_percentage: float = 0.05,
     fallback_sample_std: float = 0.1,
     seed: Optional[int] = None,
+    uniform_distribution: bool = False,
+    uniform_bounds_rel: tuple(float, float) | None = None
 ) -> Dict[int, Dict[str, Any]]:
     """
     Sample contexts.
@@ -158,7 +161,21 @@ def sample_contexts(
                 # the sample mean. Therefore we use a fallback sample standard deviation.
                 sample_std = fallback_sample_std  # TODO change this back to sample_std
 
-            random_variable = norm(loc=sample_mean, scale=sample_std)
+            if not uniform_distribution:
+                random_variable = norm(loc=sample_mean, scale=sample_std)
+            else:
+                # bounds defined as [loc, loc+scale]
+                if sample_mean == 0:
+                    # relative bounds are centered around 1 so subtract here for the percentages
+                    loc = uniform_bounds_rel[0] - 1  
+                    scale = uniform_bounds_rel[1] - uniform_bounds_rel[0]
+                elif sample_mean < 0:
+                    loc = uniform_bounds_rel[1] * sample_mean
+                    scale = uniform_bounds_rel[0] * sample_mean - loc
+                else:
+                    loc = uniform_bounds_rel[0] * sample_mean
+                    scale = uniform_bounds_rel[1] * sample_mean - loc
+                random_variable = uniform(loc=loc, scale=scale)
             context_feature_type = env_bounds[context_feature_name][2]
             sample_dists[context_feature_name] = (random_variable, context_feature_type)
 
@@ -173,6 +190,7 @@ def sample_contexts(
                 random_variable = sample_dists[k][0]
                 context_feature_type = sample_dists[k][1]
                 lower_bound, upper_bound = env_bounds[k][0], env_bounds[k][1]
+                assert lower_bound <= upper_bound, f"context variable {k}: lower bound [{lower_bound}] is higher than upper bound [{upper_bound}]!"
                 if context_feature_type == list:
                     length = rng.integers(
                         5e5

diff --git a/carl/context/selection.py b/carl/context/selection.py
@@ -88,7 +88,7 @@ def context_key(self) -> Any | None:
         Any | None
             The key of the current context or None
         """
-        if self.context_id:
+        if self.context_id is not None:
             key = self.contexts_keys[self.context_id]
         else:
             key = None

diff --git a/carl/envs/box2d/__init__.py b/carl/envs/box2d/__init__.py
@@ -1,22 +1,42 @@
 # flake8: noqa: F401
-from carl.envs.box2d.carl_bipedal_walker import (
-    CONTEXT_BOUNDS as CARLBipedalWalkerEnv_bounds,
-)
-from carl.envs.box2d.carl_bipedal_walker import (
-    DEFAULT_CONTEXT as CARLBipedalWalkerEnv_defaults,
-)
-from carl.envs.box2d.carl_bipedal_walker import CARLBipedalWalkerEnv
 
 # Contextenvs.s and bounds by name
-from carl.envs.box2d.carl_lunarlander import CONTEXT_BOUNDS as CARLLunarLanderEnv_bounds
+from functools import partial
+import warnings
+
+import gym
+from carl.envs.box2d.carl_lunarlander import CARLLunarLanderEnv
 from carl.envs.box2d.carl_lunarlander import (
     DEFAULT_CONTEXT as CARLLunarLanderEnv_defaults,
 )
-from carl.envs.box2d.carl_lunarlander import CARLLunarLanderEnv
 from carl.envs.box2d.carl_vehicle_racing import (
     CONTEXT_BOUNDS as CARLVehicleRacingEnv_bounds,
 )
+
+from carl.envs.box2d.carl_vehicle_racing import CARLVehicleRacingEnv
 from carl.envs.box2d.carl_vehicle_racing import (
     DEFAULT_CONTEXT as CARLVehicleRacingEnv_defaults,
 )
-from carl.envs.box2d.carl_vehicle_racing import CARLVehicleRacingEnv
+from carl.envs.box2d.carl_vehicle_racing import (
+    CONTEXT_BOUNDS as CARLVehicleRacingEnv_bounds,
+)
+
+from carl.envs.box2d.carl_bipedal_walker import CARLBipedalWalkerEnv
+from carl.envs.box2d.carl_bipedal_walker import (
+    DEFAULT_CONTEXT as CARLBipedalWalkerEnv_defaults,
+)
+from carl.envs.box2d.carl_bipedal_walker import (
+    CONTEXT_BOUNDS as CARLBipedalWalkerEnv_bounds,
+)
+
+try:
+    from carl.envs.box2d.carl_bipedal_walker import CARLBipedalWalkerEnv
+    from gym.envs.registration import register
+
+    def make_env(**kwargs):
+        return CARLBipedalWalkerEnv(**kwargs)
+    register("CARLBipedalWalkerEnv-v0", entry_point=make_env)
+    register("CARLBipedalWalkerHardcoreEnv-v0", entry_point=partial(make_env, env=gym.make("BipedalWalkerHardcore-v3")))
+except Exception as e:
+    warnings.warn(
+        f"Could not load CARLMarioEnv which is probably not installed ({e}).")
diff --git a/carl/envs/box2d/carl_bipedal_walker.py b/carl/envs/box2d/carl_bipedal_walker.py
@@ -105,7 +105,8 @@ def __init__(
         instance_mode: str, optional
         """
         if env is None:
-            env = bipedal_walker.BipedalWalker()
+            # env = bipedal_walker.BipedalWalker()
+            env = gym.make(id="BipedalWalker-v3")
         if not contexts:
             contexts = {0: DEFAULT_CONTEXT}
         super().__init__(

diff --git a/carl/envs/brax/__init__.py b/carl/envs/brax/__init__.py
@@ -1,20 +1,23 @@
 # flake8: noqa: F401
 # Contexts and bounds by name
-from carl.envs.brax.carl_ant import CONTEXT_BOUNDS as CARLAnt_bounds
-from carl.envs.brax.carl_ant import DEFAULT_CONTEXT as CARLAnt_defaults
-from carl.envs.brax.carl_ant import CARLAnt
-from carl.envs.brax.carl_fetch import CONTEXT_BOUNDS as CARLFetch_bounds
-from carl.envs.brax.carl_fetch import DEFAULT_CONTEXT as CARLFetch_defaults
-from carl.envs.brax.carl_fetch import CARLFetch
-from carl.envs.brax.carl_grasp import CONTEXT_BOUNDS as CARLGrasp_bounds
-from carl.envs.brax.carl_grasp import DEFAULT_CONTEXT as CARLGrasp_defaults
-from carl.envs.brax.carl_grasp import CARLGrasp
-from carl.envs.brax.carl_halfcheetah import CONTEXT_BOUNDS as CARLHalfcheetah_bounds
-from carl.envs.brax.carl_halfcheetah import DEFAULT_CONTEXT as CARLHalfcheetah_defaults
-from carl.envs.brax.carl_halfcheetah import CARLHalfcheetah
-from carl.envs.brax.carl_humanoid import CONTEXT_BOUNDS as CARLHumanoid_bounds
-from carl.envs.brax.carl_humanoid import DEFAULT_CONTEXT as CARLHumanoid_defaults
-from carl.envs.brax.carl_humanoid import CARLHumanoid
-from carl.envs.brax.carl_ur5e import CONTEXT_BOUNDS as CARLUr5e_bounds
-from carl.envs.brax.carl_ur5e import DEFAULT_CONTEXT as CARLUr5e_defaults
-from carl.envs.brax.carl_ur5e import CARLUr5e
+from carl.envs.braxenvs.carl_ant import CONTEXT_BOUNDS as CARLAnt_bounds
+from carl.envs.braxenvs.carl_ant import DEFAULT_CONTEXT as CARLAnt_defaults
+from carl.envs.braxenvs.carl_ant import CARLAnt
+from carl.envs.braxenvs.carl_halfcheetah import CONTEXT_BOUNDS as CARLHalfcheetah_bounds
+from carl.envs.braxenvs.carl_halfcheetah import DEFAULT_CONTEXT as CARLHalfcheetah_defaults
+from carl.envs.braxenvs.carl_halfcheetah import CARLHalfcheetah
+from carl.envs.braxenvs.carl_humanoid import CONTEXT_BOUNDS as CARLHumanoid_bounds
+from carl.envs.braxenvs.carl_humanoid import DEFAULT_CONTEXT as CARLHumanoid_defaults
+from carl.envs.braxenvs.carl_humanoid import CARLHumanoid
+from carl.envs.braxenvs.carl_hopper import CONTEXT_BOUNDS as CARLHopper_bounds
+from carl.envs.braxenvs.carl_hopper import DEFAULT_CONTEXT as CARLHopper_defaults
+from carl.envs.braxenvs.carl_hopper import CARLHopper
+from carl.envs.braxenvs.carl_reacher import CONTEXT_BOUNDS as CARLReacher_bounds
+from carl.envs.braxenvs.carl_reacher import DEFAULT_CONTEXT as CARLReacher_defaults
+from carl.envs.braxenvs.carl_reacher import CARLReacher
+from carl.envs.braxenvs.carl_pusher import CONTEXT_BOUNDS as CARLPusher_bounds
+from carl.envs.braxenvs.carl_pusher import DEFAULT_CONTEXT as CARLPusher_defaults
+from carl.envs.braxenvs.carl_pusher import CARLPusher
+from carl.envs.braxenvs.carl_double_pendulum import CONTEXT_BOUNDS as CARLInvertedDoublePendulum_bounds
+from carl.envs.braxenvs.carl_double_pendulum import DEFAULT_CONTEXT as CARLInvertedDoublePendulum_defaults
+from carl.envs.braxenvs.carl_double_pendulum import CARLInvertedDoublePendulum
diff --git a/carl/envs/brax/brax_wrappers.py b/carl/envs/brax/brax_wrappers.py
@@ -0,0 +1,148 @@
+# Copyright 2023 The Brax Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Wrappers to convert brax envs to gym envs."""
+from typing import ClassVar, Optional
+
+from brax.envs import Env
+import gym
+from gym import spaces
+from gym.vector import utils
+import jax
+import numpy as np
+from functools import partial
+
+
+class GymWrapper(gym.Env):
+  """A wrapper that converts Brax Env to one that follows Gym API."""
+
+  # Flag that prevents `gym.register` from misinterpreting the `_step` and
+  # `_reset` as signs of a deprecated gym Env API.
+  _gym_disable_underscore_compat: ClassVar[bool] = True
+
+  def __init__(self,
+               env: Env,
+               seed: int = 0,
+               backend: Optional[str] = None):
+    self._env = env
+    self.metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second': 1 / self._env.dt
+    }
+    self.seed(seed)
+    self.backend = backend
+    self._state = None
+
+    obs = np.inf * np.ones(self._env.observation_size, dtype='float32')
+    self.observation_space = spaces.Box(-obs, obs, dtype='float32')
+
+    action = np.ones(self._env.action_size, dtype='float32')
+    self.action_space = spaces.Box(-action, action, dtype='float32')
+
+    def reset(key):
+      key1, key2 = jax.random.split(key)
+      state = self._env.reset(key2)
+      return state, state.obs, key1
+
+    self._reset = partial(reset)
+
+    def step(state, action):
+      state = self._env.step(state, action)
+      info = {**state.metrics, **state.info}
+      return state, state.obs, state.reward, state.done, info
+
+    self._step = partial(step)
+
+  def reset(self):
+    self._state, obs, self._key = self._reset(self._key)
+    # We return device arrays for pytorch users.
+    return obs
+
+  def step(self, action):
+    self._state, obs, reward, done, info = self._step(self._state, action)
+    # We return device arrays for pytorch users.
+    return obs, reward, done, info
+
+  def seed(self, seed: int = 0):
+    self._key = jax.random.PRNGKey(seed)
+
+  def render(self, mode='human'):
+    return super().render(mode=mode)  # just raise an exception
+
+
+class VectorGymWrapper(gym.vector.VectorEnv):
+  """A wrapper that converts batched Brax Env to one that follows Gym VectorEnv API."""
+
+  # Flag that prevents `gym.register` from misinterpreting the `_step` and
+  # `_reset` as signs of a deprecated gym Env API.
+  _gym_disable_underscore_compat: ClassVar[bool] = True
+
+  def __init__(self,
+               env: Env,
+               seed: int = 0,
+               backend: Optional[str] = None):
+    self._env = env
+    self.metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second': 1 / self._env.dt
+    }
+    if not hasattr(self._env, 'batch_size'):
+      raise ValueError('underlying env must be batched')
+
+    self.num_envs = self._env.batch_size
+    self.seed(seed)
+    self.backend = backend
+    self._state = None
+
+    obs = np.inf * np.ones(self._env.observation_size, dtype='float32')
+    obs_space = spaces.Box(-obs, obs, dtype='float32')
+    self.observation_space = utils.batch_space(obs_space, self.num_envs)
+
+    action = np.ones(self._env.action_size, dtype='float32')
+    action_space = spaces.Box(-action, action, dtype='float32')
+    self.action_space = utils.batch_space(action_space, self.num_envs)
+
+    def reset(key):
+      key1, key2 = jax.random.split(key)
+      state = self._env.reset(key2)
+      return state, state.obs, key1
+
+    self._reset = partial(reset)
+
+    def step(state, action):
+      state = self._env.step(state, action)
+      info = {**state.metrics, **state.info}
+      return state, state.obs, state.reward, state.done, info
+
+    self._step = partial(step)
+
+  def reset(self):
+    self._state, obs, self._key = self._reset(self._key)
+    return obs
+
+  def step(self, action):
+    self._state, obs, reward, done, info = self._step(self._state, action)
+    return obs, reward, done, info
+
+  def seed(self, seed: int = 0):
+    self._key = jax.random.PRNGKey(seed)
+
+  def render(self, mode='human'):
+    if mode == 'rgb_array':
+      sys, state = self._env.sys, self._state
+      if state is None:
+        raise RuntimeError('must call reset or step before rendering')
+      return image.render_array(sys, state.state.take(0), 256, 256)
+    else:
+      return super().render(mode=mode)  # just raise an exception