Skip to content

Commit

Permalink
Switch to gymnasium in favor of openai gym.
Browse files Browse the repository at this point in the history
Gymnasium's gym is a drop-in replacement of open-ai's gym. Some of these gymnasium modules were already being used in this repo as it depends on PettingZoo.
  • Loading branch information
itwasabhi committed Dec 16, 2024
1 parent cd6739e commit de6e419
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 26 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
"numpy",
"scipy",
"tqdm",
"gym",
"gymnasium",
"pettingzoo",
"ipython",
"pygame",
Expand Down
6 changes: 3 additions & 3 deletions src/human_aware_rl/imitation/behavior_cloning_tf2.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,8 +474,8 @@ def __init__(self, observation_space, action_space, config):
"""
RLLib compatible constructor for initializing a behavior cloning model
observation_space (gym.Space|tuple) Shape of the featurized observations
action_space (gym.space|tuple) Shape of the action space (len(Action.All_ACTIONS),)
observation_space (gymnasium.Space|tuple) Shape of the featurized observations
action_space (gymnasium.space|tuple) Shape of the action space (len(Action.All_ACTIONS),)
config (dict) Dictionary of relavant bc params
- model_dir (str) Path to pickled keras.Model used to map observations to action logits
- stochastic (bool) Whether action should return logit argmax or sample over distribution
Expand Down Expand Up @@ -519,7 +519,7 @@ def __init__(self, observation_space, action_space, config):
self.context = self._create_execution_context()

def _setup_shapes(self):
# This is here to make the class compatible with both tuples or gym.Space objs for the spaces
# This is here to make the class compatible with both tuples or gymnasium.Space objs for the spaces
# Note: action_space = (len(Action.ALL_ACTIONS,)) is technically NOT the action space shape, which would be () since actions are scalars
self.observation_shape = (
self.observation_space
Expand Down
18 changes: 9 additions & 9 deletions src/human_aware_rl/rllib/rllib.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from datetime import datetime

import dill
import gym
import gymnasium
import numpy as np
import ray
from ray.rllib.agents.ppo import PPOTrainer
Expand All @@ -32,8 +32,8 @@
OvercookedGridworld,
)

action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
obs_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
action_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
obs_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
timestr = datetime.today().strftime("%Y-%m-%d_%H-%M-%S")


Expand Down Expand Up @@ -218,9 +218,9 @@ def _validate_schedule(self, schedule):
def _setup_action_space(self, agents):
action_sp = {}
for agent in agents:
action_sp[agent] = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
self.action_space = gym.spaces.Dict(action_sp)
self.shared_action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
action_sp[agent] = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
self.action_space = gymnasium.spaces.Dict(action_sp)
self.shared_action_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))

def _setup_observation_space(self, agents):
dummy_state = self.base_env.mdp.get_standard_start_state()
Expand All @@ -232,7 +232,7 @@ def _setup_observation_space(self, agents):

high = np.ones(obs_shape) * float("inf")
low = np.ones(obs_shape) * 0
self.ppo_observation_space = gym.spaces.Box(
self.ppo_observation_space = gymnasium.spaces.Box(
np.float32(low), np.float32(high), dtype=np.float32
)

Expand All @@ -243,7 +243,7 @@ def _setup_observation_space(self, agents):
obs_shape = featurize_fn_bc(dummy_state)[0].shape
high = np.ones(obs_shape) * 100
low = np.ones(obs_shape) * -100
self.bc_observation_space = gym.spaces.Box(
self.bc_observation_space = gymnasium.spaces.Box(
np.float32(low), np.float32(high), dtype=np.float32
)
# hardcode mapping between action space and agent
Expand All @@ -253,7 +253,7 @@ def _setup_observation_space(self, agents):
ob_space[agent] = self.ppo_observation_space
else:
ob_space[agent] = self.bc_observation_space
self.observation_space = gym.spaces.Dict(ob_space)
self.observation_space = gymnasium.spaces.Dict(ob_space)

def _get_featurize_fn(self, agent_id):
if agent_id.startswith("ppo"):
Expand Down
2 changes: 1 addition & 1 deletion src/overcooked_ai_py/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from gym.envs.registration import register
from gymnasium.envs.registration import register

register(
id="Overcooked-v0",
Expand Down
17 changes: 8 additions & 9 deletions src/overcooked_ai_py/mdp/overcooked_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import time

import cv2
import gym
import gymnasium
import numpy as np
import pygame
Expand Down Expand Up @@ -715,8 +714,8 @@ def observation_space(self, agent):
dummy_mdp = self.base_env.mdp
dummy_state = dummy_mdp.get_standard_start_state()
obs_shape = agent.featurize(dummy_state)[0].shape
high = np.ones(obs_shape) * float("inf")
low = np.zeros(obs_shape)
high = np.ones(obs_shape, dtype=np.float32) * float("inf")
low = np.zeros(obs_shape, dtype=np.float32)
return gymnasium.spaces.Box(low, high, dtype=np.float32)

# we want to return the same space object every time
Expand Down Expand Up @@ -780,7 +779,7 @@ def render(self, mode="human", close=False):
pass


class Overcooked(gym.Env):
class Overcooked(gymnasium.Env):
"""
Wrapper for the Env class above that is SOMEWHAT compatible with the standard gym API.
Why only somewhat? Because we need to flatten a multi-agent env to be a single-agent env (as gym requires).
Expand Down Expand Up @@ -814,7 +813,7 @@ def __init__(self, base_env, featurize_fn, baselines_reproducible=False):
mdp = OvercookedGridworld.from_layout_name("asymmetric_advantages")
base_env = OvercookedEnv.from_mdp(mdp, horizon=500)
env = gym.make("Overcooked-v0",base_env = base_env, featurize_fn =base_env.featurize_state_mdp)
env = gymnasium.make("Overcooked-v0",base_env = base_env, featurize_fn =base_env.featurize_state_mdp)
"""
if baselines_reproducible:
# NOTE:
Expand All @@ -830,17 +829,17 @@ def __init__(self, base_env, featurize_fn, baselines_reproducible=False):
self.base_env = base_env
self.featurize_fn = featurize_fn
self.observation_space = self._setup_observation_space()
self.action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
self.action_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
self.reset()
self.visualizer = StateVisualizer()

def _setup_observation_space(self):
dummy_mdp = self.base_env.mdp
dummy_state = dummy_mdp.get_standard_start_state()
obs_shape = self.featurize_fn(dummy_state)[0].shape
high = np.ones(obs_shape) * float("inf")
low = np.zeros(obs_shape)
return gym.spaces.Box(low, high, dtype=np.float32)
high = np.ones(obs_shape, dtype=np.float32) * float("inf")
low = np.zeros(obs_shape, dtype=np.float32)
return gymnasium.spaces.Box(low, high, dtype=np.float32)

def step(self, action):
"""
Expand Down
6 changes: 3 additions & 3 deletions testing/overcooked_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import unittest
from math import factorial

import gym
import gymnasium
import numpy as np

from overcooked_ai_py.agents.agent import (
Expand Down Expand Up @@ -1699,13 +1699,13 @@ def setUp(self):
np.random.seed(0)

def test_creation(self):
env = gym.make(
env = gymnasium.make(
"Overcooked-v0",
base_env=self.env,
featurize_fn=self.env.featurize_state_mdp,
)
# verify that the action_space * obs_space are initialized correctly
self.assertEqual(env.action_space, gym.spaces.Discrete(6))
self.assertEqual(env.action_space, gymnasium.spaces.Discrete(6))
self.assertEqual(
env.observation_space.shape,
self.base_mdp.get_featurize_state_shape(),
Expand Down

0 comments on commit de6e419

Please sign in to comment.