Skip to content

Commit

Permalink
Merge branch 'mujoco-v5' of https://github.com/Farama-Foundation/MO-G…
Browse files Browse the repository at this point in the history
…ymnasium into mujoco-v5
  • Loading branch information
LucasAlegre committed Oct 16, 2024
2 parents d615b48 + eab4592 commit 7931b98
Show file tree
Hide file tree
Showing 6 changed files with 312 additions and 0 deletions.
38 changes: 38 additions & 0 deletions mo_gymnasium/envs/mujoco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@
max_episode_steps=1000,
)

register(
id="mo-walker2d-v5",
entry_point="mo_gymnasium.envs.mujoco.walker2d_v5:MOWalker2dEnv",
max_episode_steps=1000,
)

register(
id="mo-ant-v4",
entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
Expand All @@ -58,20 +64,52 @@
kwargs={"cost_objective": False},
)


register(
id="mo-ant-v5",
entry_point="mo_gymnasium.envs.mujoco.ant_v5:MOAntEnv",
max_episode_steps=1000,
)

register(
id="mo-ant-2d-v5",
entry_point="mo_gymnasium.envs.mujoco.ant_v5:MOAntEnv",
max_episode_steps=1000,
kwargs={"cost_objective": False},
)

register(
id="mo-swimmer-v4",
entry_point="mo_gymnasium.envs.mujoco.swimmer:MOSwimmerEnv",
max_episode_steps=1000,
)

register(
id="mo-swimmer-v5",
entry_point="mo_gymnasium.envs.mujoco.swimmer_v5:MOSwimmerEnv",
max_episode_steps=1000,
)

register(
id="mo-humanoid-v4",
entry_point="mo_gymnasium.envs.mujoco.humanoid:MOHumanoidEnv",
max_episode_steps=1000,
)

register(
id="mo-humanoid-v5",
entry_point="mo_gymnasium.envs.mujoco.humanoid_v5:MOHumanoidEnv",
max_episode_steps=1000,
)

register(
id="mo-reacher-v4",
entry_point="mo_gymnasium.envs.mujoco.reacher_v4:MOReacherEnv",
max_episode_steps=50,
)

register(
id="mo-reacher-v5",
entry_point="mo_gymnasium.envs.mujoco.reacher_v5:MOReacherEnv",
max_episode_steps=50,
)
57 changes: 57 additions & 0 deletions mo_gymnasium/envs/mujoco/ant_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import numpy as np
from gymnasium.envs.mujoco.ant_v5 import AntEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOAntEnv(AntEnv, EzPickle):
"""
## Description
Multi-objective version of the AntEnv environment.
See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/ant/) for more information.
The original Gymnasium's 'Ant-v5' is recovered by the following linear scalarization:
env = mo_gym.make('mo-ant-v4', cost_objective=False)
LinearReward(env, weight=np.array([1.0, 0.0]))
## Reward Space
The reward is 2- or 3-dimensional:
- 0: x-velocity
- 1: y-velocity
- 2: Control cost of the action
If the cost_objective flag is set to False, the reward is 2-dimensional, and the cost is added to other objectives.
A healthy reward is added to all objectives.
## Version History
- v5: Now includes contact forces in the reward and observation.
See https://gymnasium.farama.org/environments/mujoco/ant/#version-history
"""

def __init__(self, cost_objective=True, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, cost_objective, **kwargs)
self.cost_objetive = cost_objective
self.reward_dim = 3 if cost_objective else 2
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(self.reward_dim,))

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
x_velocity = info["x_velocity"]
y_velocity = info["y_velocity"]
cost = info["reward_ctrl"]
contact_cost = info["reward_contact"]
healthy_reward = info["reward_survive"]

if self.cost_objetive:
cost /= self._ctrl_cost_weight # Ignore the weight in the original AntEnv
contact_cost /= self._contact_cost_weight
vec_reward = np.array([x_velocity, y_velocity, cost], dtype=np.float32)
else:
vec_reward = np.array([x_velocity, y_velocity], dtype=np.float32)
vec_reward += cost + contact_cost

vec_reward += healthy_reward

return observation, vec_reward, terminated, truncated, info
37 changes: 37 additions & 0 deletions mo_gymnasium/envs/mujoco/humanoid_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import numpy as np
from gymnasium.envs.mujoco.humanoid_v5 import HumanoidEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOHumanoidEnv(HumanoidEnv, EzPickle):
"""
## Description
Multi-objective version of the HumanoidEnv environment.
See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/humanoid/) for more information.
## Reward Space
The reward is 2-dimensional:
- 0: Reward for running forward (x-velocity)
- 1: Control cost of the action
## Version History:
- v5: Now includes contact forces. See: https://gymnasium.farama.org/environments/mujoco/humanoid/#version-history
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, **kwargs)
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
self.reward_dim = 2

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
velocity = info["x_velocity"]
negative_cost = 10 * info["reward_ctrl"] + info["reward_contact"]
vec_reward = np.array([velocity, negative_cost], dtype=np.float32)

vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls

return observation, vec_reward, terminated, truncated, info
101 changes: 101 additions & 0 deletions mo_gymnasium/envs/mujoco/reacher_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from os import path

import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.envs.mujoco.reacher_v5 import ReacherEnv
from gymnasium.spaces import Box, Discrete


DEFAULT_CAMERA_CONFIG = {"trackbodyid": 0}


class MOReacherEnv(ReacherEnv):
"""
## Description
Multi-objective version of the [`Reacher-v4` environment](https://gymnasium.farama.org/environments/mujoco/reacher/).
## Observation Space
The observation is 6-dimensional and contains:
- sin and cos of the angles of the central and elbow joints
- angular velocity of the central and elbow joints
## Action Space
The action space is discrete and contains the 3^2=9 possible actions based on applying positive (+1), negative (-1) or zero (0) torque to each of the two joints.
## Reward Space
The reward is 4-dimensional and is defined based on the distance of the tip of the arm and the four target locations.
For each i={1,2,3,4} it is computed as:
```math
r_i = 1 - 4 * || finger_tip_coord - target_i ||^2
```
## Version History:
See https://gymnasium.farama.org/environments/mujoco/reacher/#version-history
"""

def __init__(self, **kwargs):
utils.EzPickle.__init__(self, **kwargs)
self.observation_space = Box(low=-np.inf, high=np.inf, shape=(6,), dtype=np.float64)
MujocoEnv.__init__(
self,
path.join(path.dirname(__file__), "assets", "mo_reacher.xml"),
2,
observation_space=self.observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
actions = [-1.0, 0.0, 1.0]
self.action_dict = dict()
for a1 in actions:
for a2 in actions:
self.action_dict[len(self.action_dict)] = (a1, a2)
self.action_space = Discrete(9)
# Target goals: x1, y1, x2, y2, ... x4, y4
self.goal = np.array([0.14, 0.0, -0.14, 0.0, 0.0, 0.14, 0.0, -0.14])
self.reward_space = Box(low=-1.0, high=1.0, shape=(4,))
self.reward_dim = 4

def step(self, a):
real_action = self.action_dict[int(a)]
vec_reward = np.array(
[
1 - 4 * np.linalg.norm(self.get_body_com("fingertip")[:2] - self.get_body_com("target1")[:2]),
1 - 4 * np.linalg.norm(self.get_body_com("fingertip")[:2] - self.get_body_com("target2")[:2]),
1 - 4 * np.linalg.norm(self.get_body_com("fingertip")[:2] - self.get_body_com("target3")[:2]),
1 - 4 * np.linalg.norm(self.get_body_com("fingertip")[:2] - self.get_body_com("target4")[:2]),
],
dtype=np.float32,
)

self._step_mujoco_simulation(real_action, self.frame_skip)
if self.render_mode == "human":
self.render()

ob = self._get_obs()
return (
ob,
vec_reward,
False,
False,
{},
)

def reset_model(self):
qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
qpos[:2] = np.array([0, 3.1415 / 2]) # init position
qpos[-len(self.goal) :] = self.goal
qvel = self.init_qvel + self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nv)
qvel[-len(self.goal) :] = 0
self.set_state(qpos, qvel)
return self._get_obs()

def _get_obs(self):
theta = self.data.qpos.flatten()[:2]
return np.concatenate(
[
np.cos(theta),
np.sin(theta),
self.data.qvel.flatten()[:2] * 0.1,
]
)
41 changes: 41 additions & 0 deletions mo_gymnasium/envs/mujoco/swimmer_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import numpy as np
from gymnasium.envs.mujoco.swimmer_v5 import SwimmerEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOSwimmerEnv(SwimmerEnv, EzPickle):
"""
## Description
Multi-objective version of the SwimmerEnv environment.
See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/swimmer/) for more information.
The original Gymnasium's 'Swimmer-v4' is recovered by the following linear scalarization:
env = mo_gym.make('mo-swimmer-v4')
LinearReward(env, weight=np.array([1.0, 1e-4]))
## Reward Space
The reward is 2-dimensional:
- 0: Reward for moving forward (x-velocity)
- 1: Control cost of the action
## Version History:
See https://gymnasium.farama.org/main/environments/mujoco/swimmer/#version-history
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, **kwargs)
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
self.reward_dim = 2

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
velocity = info["x_velocity"]
energy = -np.sum(np.square(action))

vec_reward = np.array([velocity, energy], dtype=np.float32)

return observation, vec_reward, terminated, truncated, info
38 changes: 38 additions & 0 deletions mo_gymnasium/envs/mujoco/walker2d_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import numpy as np
from gymnasium.envs.mujoco.walker2d_v5 import Walker2dEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOWalker2dEnv(Walker2dEnv, EzPickle):
"""
## Description
Multi-objective version of the Walker2dEnv environment.
See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/walker2d/) for more information.
## Reward Space
The reward is 2-dimensional:
- 0: Reward for running forward (x-velocity)
- 1: Control cost of the action
# Version History
- See https://gymnasium.farama.org/main/environments/mujoco/walker2d/#version-history
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, **kwargs)
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
self.reward_dim = 2

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
velocity = info["x_velocity"]
energy = -np.sum(np.square(action))

vec_reward = np.array([velocity, energy], dtype=np.float32)

vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls

return observation, vec_reward, terminated, truncated, info

0 comments on commit 7931b98

Please sign in to comment.