HumanCompatibleAI · itwasabhi · Dec 16, 2024
diff --git a/setup.py b/setup.py
@@ -42,7 +42,7 @@
         "numpy",
         "scipy",
         "tqdm",
-        "gym",
+        "gymnasium",
         "pettingzoo",
         "ipython",
         "pygame",

diff --git a/src/human_aware_rl/imitation/behavior_cloning_tf2.py b/src/human_aware_rl/imitation/behavior_cloning_tf2.py
@@ -474,8 +474,8 @@ def __init__(self, observation_space, action_space, config):
         """
         RLLib compatible constructor for initializing a behavior cloning model
 
-        observation_space (gym.Space|tuple)     Shape of the featurized observations
-        action_space (gym.space|tuple)          Shape of the action space (len(Action.All_ACTIONS),)
+        observation_space (gymnasium.Space|tuple)     Shape of the featurized observations
+        action_space (gymnasium.space|tuple)          Shape of the action space (len(Action.All_ACTIONS),)
         config (dict)                           Dictionary of relavant bc params
             - model_dir (str)                   Path to pickled keras.Model used to map observations to action logits
             - stochastic (bool)                 Whether action should return logit argmax or sample over distribution
@@ -519,7 +519,7 @@ def __init__(self, observation_space, action_space, config):
         self.context = self._create_execution_context()
 
     def _setup_shapes(self):
-        # This is here to make the class compatible with both tuples or gym.Space objs for the spaces
+        # This is here to make the class compatible with both tuples or gymnasium.Space objs for the spaces
         # Note: action_space = (len(Action.ALL_ACTIONS,)) is technically NOT the action space shape, which would be () since actions are scalars
         self.observation_shape = (
             self.observation_space

diff --git a/src/human_aware_rl/rllib/rllib.py b/src/human_aware_rl/rllib/rllib.py
@@ -6,7 +6,7 @@
 from datetime import datetime
 
 import dill
-import gym
+import gymnasium
 import numpy as np
 import ray
 from ray.rllib.agents.ppo import PPOTrainer
@@ -32,8 +32,8 @@
     OvercookedGridworld,
 )
 
-action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
-obs_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
+action_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
+obs_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
 timestr = datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
 
 
@@ -218,9 +218,9 @@ def _validate_schedule(self, schedule):
     def _setup_action_space(self, agents):
         action_sp = {}
         for agent in agents:
-            action_sp[agent] = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
-        self.action_space = gym.spaces.Dict(action_sp)
-        self.shared_action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
+            action_sp[agent] = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
+        self.action_space = gymnasium.spaces.Dict(action_sp)
+        self.shared_action_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
 
     def _setup_observation_space(self, agents):
         dummy_state = self.base_env.mdp.get_standard_start_state()
@@ -232,7 +232,7 @@ def _setup_observation_space(self, agents):
 
         high = np.ones(obs_shape) * float("inf")
         low = np.ones(obs_shape) * 0
-        self.ppo_observation_space = gym.spaces.Box(
+        self.ppo_observation_space = gymnasium.spaces.Box(
             np.float32(low), np.float32(high), dtype=np.float32
         )
 
@@ -243,7 +243,7 @@ def _setup_observation_space(self, agents):
         obs_shape = featurize_fn_bc(dummy_state)[0].shape
         high = np.ones(obs_shape) * 100
         low = np.ones(obs_shape) * -100
-        self.bc_observation_space = gym.spaces.Box(
+        self.bc_observation_space = gymnasium.spaces.Box(
             np.float32(low), np.float32(high), dtype=np.float32
         )
         # hardcode mapping between action space and agent
@@ -253,7 +253,7 @@ def _setup_observation_space(self, agents):
                 ob_space[agent] = self.ppo_observation_space
             else:
                 ob_space[agent] = self.bc_observation_space
-        self.observation_space = gym.spaces.Dict(ob_space)
+        self.observation_space = gymnasium.spaces.Dict(ob_space)
 
     def _get_featurize_fn(self, agent_id):
         if agent_id.startswith("ppo"):

diff --git a/src/overcooked_ai_py/__init__.py b/src/overcooked_ai_py/__init__.py
@@ -1,4 +1,4 @@
-from gym.envs.registration import register
+from gymnasium.envs.registration import register
 
 register(
     id="Overcooked-v0",

diff --git a/src/overcooked_ai_py/mdp/overcooked_env.py b/src/overcooked_ai_py/mdp/overcooked_env.py
@@ -2,7 +2,6 @@
 import time
 
 import cv2
-import gym
 import gymnasium
 import numpy as np
 import pygame
@@ -715,8 +714,8 @@ def observation_space(self, agent):
         dummy_mdp = self.base_env.mdp
         dummy_state = dummy_mdp.get_standard_start_state()
         obs_shape = agent.featurize(dummy_state)[0].shape
-        high = np.ones(obs_shape) * float("inf")
-        low = np.zeros(obs_shape)
+        high = np.ones(obs_shape, dtype=np.float32) * float("inf")
+        low = np.zeros(obs_shape, dtype=np.float32)
         return gymnasium.spaces.Box(low, high, dtype=np.float32)
 
     # we want to return the same space object every time
@@ -780,7 +779,7 @@ def render(self, mode="human", close=False):
         pass
 
 
-class Overcooked(gym.Env):
+class Overcooked(gymnasium.Env):
     """
     Wrapper for the Env class above that is SOMEWHAT compatible with the standard gym API.
     Why only somewhat? Because we need to flatten a multi-agent env to be a single-agent env (as gym requires).
@@ -814,7 +813,7 @@ def __init__(self, base_env, featurize_fn, baselines_reproducible=False):
 
         mdp = OvercookedGridworld.from_layout_name("asymmetric_advantages")
         base_env = OvercookedEnv.from_mdp(mdp, horizon=500)
-        env = gym.make("Overcooked-v0",base_env = base_env, featurize_fn =base_env.featurize_state_mdp)
+        env = gymnasium.make("Overcooked-v0",base_env = base_env, featurize_fn =base_env.featurize_state_mdp)
         """
         if baselines_reproducible:
             # NOTE:
@@ -830,17 +829,17 @@ def __init__(self, base_env, featurize_fn, baselines_reproducible=False):
         self.base_env = base_env
         self.featurize_fn = featurize_fn
         self.observation_space = self._setup_observation_space()
-        self.action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
+        self.action_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
         self.reset()
         self.visualizer = StateVisualizer()
 
     def _setup_observation_space(self):
         dummy_mdp = self.base_env.mdp
         dummy_state = dummy_mdp.get_standard_start_state()
         obs_shape = self.featurize_fn(dummy_state)[0].shape
-        high = np.ones(obs_shape) * float("inf")
-        low = np.zeros(obs_shape)
-        return gym.spaces.Box(low, high, dtype=np.float32)
+        high = np.ones(obs_shape, dtype=np.float32) * float("inf")
+        low = np.zeros(obs_shape, dtype=np.float32)
+        return gymnasium.spaces.Box(low, high, dtype=np.float32)
 
     def step(self, action):
         """

diff --git a/testing/overcooked_test.py b/testing/overcooked_test.py
@@ -6,7 +6,7 @@
 import unittest
 from math import factorial
 
-import gym
+import gymnasium
 import numpy as np
 
 from overcooked_ai_py.agents.agent import (
@@ -1699,13 +1699,13 @@ def setUp(self):
         np.random.seed(0)
 
     def test_creation(self):
-        env = gym.make(
+        env = gymnasium.make(
             "Overcooked-v0",
             base_env=self.env,
             featurize_fn=self.env.featurize_state_mdp,
         )
         # verify that the action_space * obs_space are initialized correctly
-        self.assertEqual(env.action_space, gym.spaces.Discrete(6))
+        self.assertEqual(env.action_space, gymnasium.spaces.Discrete(6))
         self.assertEqual(
             env.observation_space.shape,
             self.base_mdp.get_featurize_state_shape(),