Skip to content

Commit

Permalink
Merge branch 'main' into mujoco-v5
Browse files Browse the repository at this point in the history
  • Loading branch information
LucasAlegre committed Oct 16, 2024
2 parents cf081b0 + d4d81ca commit d615b48
Show file tree
Hide file tree
Showing 31 changed files with 837 additions and 678 deletions.
36 changes: 10 additions & 26 deletions .github/workflows/build-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# - https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
#
# derived from https://github.com/Farama-Foundation/PettingZoo/blob/e230f4d80a5df3baf9bd905149f6d4e8ce22be31/.github/workflows/build-publish.yml
name: build-publish
name: Build artifact for PyPI

on:
push:
Expand All @@ -16,35 +16,18 @@ on:

jobs:
build-wheels:
runs-on: ${{ matrix.os }}
strategy:
matrix:
include:
- os: ubuntu-latest
python: 38
platform: manylinux_x86_64
- os: ubuntu-latest
python: 39
platform: manylinux_x86_64
- os: ubuntu-latest
python: 310
platform: manylinux_x86_64
- os: ubuntu-latest
python: 311
platform: manylinux_x86_64
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- uses: actions/checkout@v4
- uses: actions/setup-python@v5

- name: Install dependencies
run: python -m pip install --upgrade pip setuptools build
run: pipx install build
- name: Build sdist and wheels
run: python -m build
run: pyproject-build
- name: Store wheels
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
path: dist

Expand All @@ -55,10 +38,11 @@ jobs:
if: github.event_name == 'release' && github.event.action == 'published'
steps:
- name: Download dists
uses: actions/download-artifact@v2
uses: actions/download-artifact@v4
with:
name: artifact
path: dist

- name: Publish
uses: pypa/gh-action-pypi-publish@release/v1
with:
Expand Down
10 changes: 4 additions & 6 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@ jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
- run: python -m pip install pre-commit
- run: python -m pre_commit --version
- run: python -m pre_commit install
- run: python -m pre_commit run --all-files
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- run: pipx install pre-commit
- run: pre-commit run --all-files
12 changes: 6 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v5.0.0
hooks:
- id: check-symlinks
- id: destroyed-symlinks
Expand All @@ -17,13 +17,13 @@ repos:
- id: detect-private-key
- id: debug-statements
- repo: https://github.com/codespell-project/codespell
rev: v2.2.4
rev: v2.3.0
hooks:
- id: codespell
args:
- --ignore-words-list=reacher, mor
- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
rev: 7.1.1
hooks:
- id: flake8
args:
Expand All @@ -34,16 +34,16 @@ repos:
- --show-source
- --statistics
- repo: https://github.com/asottile/pyupgrade
rev: v3.3.1
rev: v3.18.0
hooks:
- id: pyupgrade
args: ["--py37-plus"]
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
rev: 5.13.2
hooks:
- id: isort
- repo: https://github.com/python/black
rev: 23.1.0
rev: 24.10.0
hooks:
- id: black
- repo: https://github.com/pycqa/pydocstyle
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ obs, info = env.reset()
next_obs, vector_reward, terminated, truncated, info = env.step(your_agent.act(obs))

# Optionally, you can scalarize the reward function with the LinearReward wrapper
env = mo_gym.LinearReward(env, weight=np.array([0.8, 0.2, 0.2]))
env = mo_gym.wrappers.LinearReward(env, weight=np.array([0.8, 0.2, 0.2]))
```
For details on multi-objective MDP's (MOMDP's) and other MORL definitions, see [A practical guide to multi-objective reinforcement learning and planning](https://link.springer.com/article/10.1007/s10458-022-09552-y).

Expand Down
14 changes: 7 additions & 7 deletions docs/_scripts/gen_env_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def trim(docstring):

pattern = re.compile(r"(?<!^)(?=[A-Z])")

gym.logger.set_level(gym.logger.DISABLED)
# gym.logger.set_level(gym.logger.DISABLED)

all_envs = list(gym.envs.registry.values())
filtered_envs_by_type = {}
Expand Down Expand Up @@ -177,12 +177,12 @@ def trim(docstring):
else:
env_table += f"| Observation Space | {env.observation_space} |\n"

if env.reward_space.shape:
env_table += f"| Reward Shape | {env.reward_space.shape} |\n"
if hasattr(env.reward_space, "high"):
env_table += f"| Reward High | {env.reward_space.high} |\n"
if hasattr(env.reward_space, "low"):
env_table += f"| Reward Low | {env.reward_space.low} |\n"
if env.unwrapped.reward_space.shape:
env_table += f"| Reward Shape | {env.unwrapped.reward_space.shape} |\n"
if hasattr(env.unwrapped.reward_space, "high"):
env_table += f"| Reward High | {env.unwrapped.reward_space.high} |\n"
if hasattr(env.unwrapped.reward_space, "low"):
env_table += f"| Reward Low | {env.unwrapped.reward_space.low} |\n"

env_table += f'| Import | `mo_gymnasium.make("{env_spec.id}")` | \n'

Expand Down
10 changes: 9 additions & 1 deletion docs/environments/classical.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,17 @@ Multi-objective versions of classical Gymnasium's environments.
| Env | Obs/Action spaces | Objectives | Description |
|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------|---------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| [`mo-mountaincar-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincar/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincar.gif" width="200px"> | Continuous / Discrete | `[time_penalty, reverse_penalty, forward_penalty]` | Classic Mountain Car env, but with extra penalties for the forward and reverse actions. From [Vamplew et al. 2011](https://www.researchgate.net/publication/220343783_Empirical_evaluation_methods_for_multiobjective_reinforcement_learning_algorithms). |
| [`mo-mountaincarcontinuous-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincarcontinuous/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincarcontinuous.gif" width="200px"> | Continuous / Continuous | `[time_penalty, fuel_consumption_penalty]` | Continuous Mountain Car env, but with penalties for fuel consumption. |
[`mo-mountaincar-3d-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincar/) ** <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincar.gif" width="200px"> | Continuous / Discrete| `[time_penalty, move_penalty, speed_objective]` | The forward and backward penalties have been merged into the move penalty and a speed objective has been introduced which gives the positive reward equivalent to the car's speed at that time step.* |
[`mo-mountaincar-timemove-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincar/) ** <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincar.gif" width="200px"> | Continuous / Discrete | `[time_penalty, move_penalty]`| Class Mountain Car env but an extra penalty for moving backwards or forwards merged into a move penalty. |
[`mo-mountaincar-timespeed-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincar/) ** <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincar.gif" width="200px"> | Continuous / Discrete| `[time_penalty, speed_objective]` | Class Mountain Car env but an extra positive objective of speed which gives the positive reward equivalent to the car's speed at that time step.*
| [`mo-mountaincarcontinuous-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincarcontinuous/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincarcontinuous.gif" width="200px"> | Continuous / Continuous | `[time_penalty, fuel_consumption_penalty]` | Continuous Mountain Car env, but with penalties for fuel consumption. |
| [`mo-lunar-lander-v2`](https://mo-gymnasium.farama.org/environments/mo-lunar-lander/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-lunar-lander.gif" width="200px"> | Continuous / Discrete or Continuous | `[landed, shaped_reward, main_engine_fuel, side_engine_fuel]` | MO version of the `LunarLander-v2` [environment](https://gymnasium.farama.org/environments/box2d/lunar_lander/). Objectives defined similarly as in [Hung et al. 2022](https://openreview.net/forum?id=AwWaBXLIJE). |

*An additional objective was introduced to prevent the agent from converging to the local maxima due to a lack of reward signal for the static action.

**Read more about these environments and the detailed reasoning behind them in [`Pranav Gupta's Dissertation`](https://drive.google.com/file/d/1yT6hlavYZGmoB2phaIBX_5hbibA3Illa/view?usp=sharing)
<!Can be removed if required, work is currently in place to write a research paper though may take a while->

```{toctree}
:hidden:
:glob:
Expand Down
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ lastpage:
introduction/install
introduction/api
wrappers/wrappers
wrappers/vector_wrappers
examples/morl_baselines
```

Expand Down
20 changes: 20 additions & 0 deletions docs/wrappers/vector_wrappers.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
title: "Vector Wrappers"
---

# Vector Wrappers

Similar to the normal wrappers, MO-Gymnasium provides a few wrappers that are specifically designed to work with vectorized environments. They are all available directly from the `mo_gymnasium.wrappers.vector` module.


## `MOSyncVectorEnv`

```{eval-rst}
.. autoclass:: mo_gymnasium.wrappers.vector.MOSyncVectorEnv
```

## `MORecordEpisodeStatistics`

```{eval-rst}
.. autoclass:: mo_gymnasium.wrappers.vector.MORecordEpisodeStatistics
```
16 changes: 8 additions & 8 deletions docs/wrappers/wrappers.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,36 @@ title: "Wrappers"

# Wrappers

A few wrappers inspired from Gymnasium's wrappers are available in MO-Gymnasium. They are all available directly from the `mo_gymnasium` module.
A few wrappers inspired from Gymnasium's wrappers are available in MO-Gymnasium. They are all available directly from the `mo_gymnasium.wrappers` module.


## `LinearReward`


```{eval-rst}
.. autoclass:: mo_gymnasium.LinearReward
.. autoclass:: mo_gymnasium.wrappers.LinearReward
```

## `MONormalizeReward`

```{eval-rst}
.. autoclass:: mo_gymnasium.MONormalizeReward
.. autoclass:: mo_gymnasium.wrappers.MONormalizeReward
```

## `MOClipReward`

```{eval-rst}
.. autoclass:: mo_gymnasium.MOClipReward
.. autoclass:: mo_gymnasium.wrappers.MOClipReward
```

## `MOSyncVectorEnv`
## `MORecordEpisodeStatistics`

```{eval-rst}
.. autoclass:: mo_gymnasium.MOSyncVectorEnv
.. autoclass:: mo_gymnasium.wrappers.MORecordEpisodeStatistics
```

## `MORecordEpisodeStatistics`
## `MOMaxAndSkipObservation`

```{eval-rst}
.. autoclass:: mo_gymnasium.MORecordEpisodeStatistics
.. autoclass:: mo_gymnasium.wrappers.MOMaxAndSkipObservation
```
12 changes: 3 additions & 9 deletions mo_gymnasium/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,10 @@

# Envs
import mo_gymnasium.envs
from mo_gymnasium import wrappers

# Utils
from mo_gymnasium.utils import (
LinearReward,
MOClipReward,
MONormalizeReward,
MORecordEpisodeStatistics,
MOSyncVectorEnv,
make,
)
from mo_gymnasium.utils import make


__version__ = "1.1.0"
__version__ = "1.2.0"
1 change: 0 additions & 1 deletion mo_gymnasium/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,5 @@
import mo_gymnasium.envs.minecart
import mo_gymnasium.envs.mountain_car
import mo_gymnasium.envs.mujoco
import mo_gymnasium.envs.reacher
import mo_gymnasium.envs.resource_gathering
import mo_gymnasium.envs.water_reservoir
12 changes: 6 additions & 6 deletions mo_gymnasium/envs/fishwood/fishwood.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ class FishWood(gym.Env, EzPickle):
"""

metadata = {"render_modes": ["human"]}
FISH = 0
WOOD = 1
FISH = np.array([0], dtype=np.int32)
WOOD = np.array([1], dtype=np.int32)
MAX_TS = 200

def __init__(self, render_mode: Optional[str] = None, fishproba=0.1, woodproba=0.9):
Expand All @@ -55,17 +55,17 @@ def __init__(self, render_mode: Optional[str] = None, fishproba=0.1, woodproba=0

self.action_space = spaces.Discrete(2) # 2 actions, go fish and go wood
# 2 states, fishing and in the woods
self.observation_space = spaces.Discrete(2)
self.observation_space = spaces.Box(low=0, high=1, shape=(1,), dtype=np.int32)
# 2 objectives, amount of fish and amount of wood
self.reward_space = spaces.Box(low=np.array([0, 0]), high=np.array([1.0, 1.0]), dtype=np.float32)
self.reward_dim = 2

self._state = self.WOOD
self._state = self.WOOD.copy()

def reset(self, seed=None, **kwargs):
super().reset(seed=seed)

self._state = self.WOOD
self._state = self.WOOD.copy()
self._timestep = 0
if self.render_mode == "human":
self.render()
Expand All @@ -89,7 +89,7 @@ def step(self, action):
rewards[self.FISH] = 1.0

# Execute the action
self._state = action
self._state = np.array([action], dtype=np.int32)
self._timestep += 1

if self.render_mode == "human":
Expand Down
4 changes: 2 additions & 2 deletions mo_gymnasium/envs/lunar_lander/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@


register(
id="mo-lunar-lander-v2",
id="mo-lunar-lander-v3",
entry_point="mo_gymnasium.envs.lunar_lander.lunar_lander:MOLunarLander",
max_episode_steps=1000,
)

register(
id="mo-lunar-lander-continuous-v2",
id="mo-lunar-lander-continuous-v3",
entry_point="mo_gymnasium.envs.lunar_lander.lunar_lander:MOLunarLander",
max_episode_steps=1000,
kwargs={"continuous": True},
Expand Down
1 change: 1 addition & 0 deletions mo_gymnasium/envs/mario/joypad_space.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""An environment wrapper to convert binary to discrete action space. This is a modified version of the original code from nes-py."""

from typing import List

import gymnasium as gym
Expand Down
8 changes: 5 additions & 3 deletions mo_gymnasium/envs/mario/mario.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from gymnasium.utils import EzPickle, seeding

# from stable_baselines3.common.atari_wrappers import MaxAndSkipEnv
from gymnasium.wrappers import GrayScaleObservation, ResizeObservation
from nes_py.nes_env import SCREEN_SHAPE_24_BIT

import mo_gymnasium as mo_gym
Expand All @@ -16,7 +15,7 @@
from mo_gymnasium.envs.mario.joypad_space import JoypadSpace


class MOSuperMarioBros(SuperMarioBrosEnv, EzPickle):
class MOSuperMarioBros(SuperMarioBrosEnv, gym.Env, EzPickle):
"""
## Description
Multi-objective version of the SuperMarioBro environment.
Expand Down Expand Up @@ -202,11 +201,14 @@ def step(self, action):


if __name__ == "__main__":
from gymnasium.wrappers import ResizeObservation
from gymnasium.wrappers.transform_observation import GrayscaleObservation

env = MOSuperMarioBros()
env = JoypadSpace(env, SIMPLE_MOVEMENT)
# env = MaxAndSkipEnv(env, 4)
env = ResizeObservation(env, (84, 84))
env = GrayScaleObservation(env)
env = GrayscaleObservation(env)
# env = FrameStack(env, 4)
env = mo_gym.LinearReward(env)

Expand Down
8 changes: 5 additions & 3 deletions mo_gymnasium/envs/minecart/minecart.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,11 @@ def pareto_front(self, gamma: float, symmetric: bool = True) -> List[np.ndarray]
queue = [
{
"speed": ACCELERATION * self.frame_skip,
"dist": mine_distance - self.frame_skip * (self.frame_skip + 1) / 2 * ACCELERATION
if self.incremental_frame_skip
else mine_distance - ACCELERATION * self.frame_skip * self.frame_skip,
"dist": (
mine_distance - self.frame_skip * (self.frame_skip + 1) / 2 * ACCELERATION
if self.incremental_frame_skip
else mine_distance - ACCELERATION * self.frame_skip * self.frame_skip
),
"seq": [ACT_ACCEL],
}
]
Expand Down
Loading

0 comments on commit d615b48

Please sign in to comment.