Merge branch 'main' into mujoco-v5

Farama-Foundation · Oct 16, 2024 · d615b48 · d615b48
2 parents cf081b0 + d4d81ca
commit d615b48
Show file tree

Hide file tree

Showing 31 changed files with 837 additions and 678 deletions.
diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml
@@ -4,7 +4,7 @@
 #   - https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
 #
 # derived from https://github.com/Farama-Foundation/PettingZoo/blob/e230f4d80a5df3baf9bd905149f6d4e8ce22be31/.github/workflows/build-publish.yml
-name: build-publish
+name: Build artifact for PyPI
 
 on:
   push:
@@ -16,35 +16,18 @@ on:
 
 jobs:
   build-wheels:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        include:
-        - os: ubuntu-latest
-          python: 38
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 39
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 310
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 311
-          platform: manylinux_x86_64
+    runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.x'
+    - uses: actions/checkout@v4
+    - uses: actions/setup-python@v5
+
     - name: Install dependencies
-      run: python -m pip install --upgrade pip setuptools build
+      run: pipx install build
     - name: Build sdist and wheels
-      run: python -m build
+      run: pyproject-build
     - name: Store wheels
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v4
       with:
         path: dist
 
@@ -55,10 +38,11 @@ jobs:
     if: github.event_name == 'release' && github.event.action == 'published'
     steps:
     - name: Download dists
-      uses: actions/download-artifact@v2
+      uses: actions/download-artifact@v4
       with:
         name: artifact
         path: dist
+
     - name: Publish
       uses: pypa/gh-action-pypi-publish@release/v1
       with:

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -13,9 +13,7 @@ jobs:
   pre-commit:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v4
-      - run: python -m pip install pre-commit
-      - run: python -m pre_commit --version
-      - run: python -m pre_commit install
-      - run: python -m pre_commit run --all-files
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+      - run: pipx install pre-commit
+      - run: pre-commit run --all-files
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v5.0.0
     hooks:
       - id: check-symlinks
       - id: destroyed-symlinks
@@ -17,13 +17,13 @@ repos:
       - id: detect-private-key
       - id: debug-statements
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.4
+    rev: v2.3.0
     hooks:
       - id: codespell
         args:
           - --ignore-words-list=reacher, mor
   - repo: https://github.com/PyCQA/flake8
-    rev: 6.0.0
+    rev: 7.1.1
     hooks:
       - id: flake8
         args:
@@ -34,16 +34,16 @@ repos:
           - --show-source
           - --statistics
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.3.1
+    rev: v3.18.0
     hooks:
       - id: pyupgrade
         args: ["--py37-plus"]
   - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
       - id: isort
   - repo: https://github.com/python/black
-    rev: 23.1.0
+    rev: 24.10.0
     hooks:
       - id: black
   - repo: https://github.com/pycqa/pydocstyle

diff --git a/README.md b/README.md
@@ -50,7 +50,7 @@ obs, info = env.reset()
 next_obs, vector_reward, terminated, truncated, info = env.step(your_agent.act(obs))
 
 # Optionally, you can scalarize the reward function with the LinearReward wrapper
-env = mo_gym.LinearReward(env, weight=np.array([0.8, 0.2, 0.2]))
+env = mo_gym.wrappers.LinearReward(env, weight=np.array([0.8, 0.2, 0.2]))
 ```
 For details on multi-objective MDP's (MOMDP's) and other MORL definitions, see [A practical guide to multi-objective reinforcement learning and planning](https://link.springer.com/article/10.1007/s10458-022-09552-y).
 

diff --git a/docs/_scripts/gen_env_docs.py b/docs/_scripts/gen_env_docs.py
@@ -41,7 +41,7 @@ def trim(docstring):
 
 pattern = re.compile(r"(?<!^)(?=[A-Z])")
 
-gym.logger.set_level(gym.logger.DISABLED)
+# gym.logger.set_level(gym.logger.DISABLED)
 
 all_envs = list(gym.envs.registry.values())
 filtered_envs_by_type = {}
@@ -177,12 +177,12 @@ def trim(docstring):
         else:
             env_table += f"| Observation Space | {env.observation_space} |\n"
 
-        if env.reward_space.shape:
-            env_table += f"| Reward Shape | {env.reward_space.shape} |\n"
-        if hasattr(env.reward_space, "high"):
-            env_table += f"| Reward High | {env.reward_space.high} |\n"
-        if hasattr(env.reward_space, "low"):
-            env_table += f"| Reward Low | {env.reward_space.low} |\n"
+        if env.unwrapped.reward_space.shape:
+            env_table += f"| Reward Shape | {env.unwrapped.reward_space.shape} |\n"
+        if hasattr(env.unwrapped.reward_space, "high"):
+            env_table += f"| Reward High | {env.unwrapped.reward_space.high} |\n"
+        if hasattr(env.unwrapped.reward_space, "low"):
+            env_table += f"| Reward Low | {env.unwrapped.reward_space.low} |\n"
 
         env_table += f'| Import | `mo_gymnasium.make("{env_spec.id}")` | \n'
 

diff --git a/docs/environments/classical.md b/docs/environments/classical.md
@@ -9,9 +9,17 @@ Multi-objective versions of classical Gymnasium's environments.
 | Env                                                                                                                                                                                                                                                                | Obs/Action spaces                   | Objectives                                                    | Description                                                                                                                                                                                                                                                     |
 |--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------|---------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | [`mo-mountaincar-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincar/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincar.gif" width="200px">                                     | Continuous / Discrete               | `[time_penalty, reverse_penalty, forward_penalty]`            | Classic Mountain Car env, but with extra penalties for the forward and reverse actions. From [Vamplew et al. 2011](https://www.researchgate.net/publication/220343783_Empirical_evaluation_methods_for_multiobjective_reinforcement_learning_algorithms).       |
-| [`mo-mountaincarcontinuous-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincarcontinuous/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincarcontinuous.gif" width="200px">       | Continuous / Continuous             | `[time_penalty, fuel_consumption_penalty]`                    | Continuous Mountain Car env, but with penalties for fuel consumption.                                                                                                                                                                                           |
+[`mo-mountaincar-3d-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincar/) ** <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincar.gif" width="200px">  | Continuous / Discrete| `[time_penalty, move_penalty, speed_objective]` | The forward and backward penalties have been merged into the move penalty and a speed objective has been introduced which gives the positive reward equivalent to the car's speed at that time step.* |
+[`mo-mountaincar-timemove-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincar/) ** <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincar.gif" width="200px">  | Continuous / Discrete | `[time_penalty, move_penalty]`| Class Mountain Car env but an extra penalty for moving backwards or forwards merged into a move penalty. |
+[`mo-mountaincar-timespeed-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincar/) ** <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincar.gif" width="200px"> | Continuous / Discrete| `[time_penalty, speed_objective]` | Class Mountain Car env but an extra positive objective of speed which gives the positive reward equivalent to the car's speed at that time step.*
+| [`mo-mountaincarcontinuous-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincarcontinuous/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincarcontinuous.gif" width="200px">       | Continuous / Continuous             | `[time_penalty, fuel_consumption_penalty]`                    | Continuous Mountain Car env, but with penalties for fuel consumption.                                                                                                                     |
 | [`mo-lunar-lander-v2`](https://mo-gymnasium.farama.org/environments/mo-lunar-lander/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-lunar-lander.gif" width="200px">                                  | Continuous / Discrete or Continuous | `[landed, shaped_reward, main_engine_fuel, side_engine_fuel]` | MO version of the `LunarLander-v2` [environment](https://gymnasium.farama.org/environments/box2d/lunar_lander/). Objectives defined similarly as in [Hung et al. 2022](https://openreview.net/forum?id=AwWaBXLIJE).                                             |
 
+*An additional objective was introduced to prevent the agent from converging to the local maxima due to a lack of reward signal for the static action.
+
+**Read more about these environments and the detailed reasoning behind them in [`Pranav Gupta's Dissertation`](https://drive.google.com/file/d/1yT6hlavYZGmoB2phaIBX_5hbibA3Illa/view?usp=sharing)
+<!Can be removed if required, work is currently in place to write a research paper though may take a while->
+
 ```{toctree}
 :hidden:
 :glob:

diff --git a/docs/index.md b/docs/index.md
@@ -11,6 +11,7 @@ lastpage:
 introduction/install
 introduction/api
 wrappers/wrappers
+wrappers/vector_wrappers
 examples/morl_baselines
 ```
 

diff --git a/docs/wrappers/vector_wrappers.md b/docs/wrappers/vector_wrappers.md
@@ -0,0 +1,20 @@
+---
+title: "Vector Wrappers"
+---
+
+# Vector Wrappers
+
+Similar to the normal wrappers, MO-Gymnasium provides a few wrappers that are specifically designed to work with vectorized environments. They are all available directly from the `mo_gymnasium.wrappers.vector` module.
+
+
+## `MOSyncVectorEnv`
+
+```{eval-rst}
+.. autoclass:: mo_gymnasium.wrappers.vector.MOSyncVectorEnv
+```
+
+## `MORecordEpisodeStatistics`
+
+```{eval-rst}
+.. autoclass:: mo_gymnasium.wrappers.vector.MORecordEpisodeStatistics
+```
diff --git a/docs/wrappers/wrappers.md b/docs/wrappers/wrappers.md
@@ -4,36 +4,36 @@ title: "Wrappers"
 
 # Wrappers
 
-A few wrappers inspired from Gymnasium's wrappers are available in MO-Gymnasium. They are all available directly from the `mo_gymnasium` module.
+A few wrappers inspired from Gymnasium's wrappers are available in MO-Gymnasium. They are all available directly from the `mo_gymnasium.wrappers` module.
 
 
 ## `LinearReward`
 
 
 ```{eval-rst}
-.. autoclass:: mo_gymnasium.LinearReward
+.. autoclass:: mo_gymnasium.wrappers.LinearReward
 ```
 
 ## `MONormalizeReward`
 
 ```{eval-rst}
-.. autoclass:: mo_gymnasium.MONormalizeReward
+.. autoclass:: mo_gymnasium.wrappers.MONormalizeReward
 ```
 
 ## `MOClipReward`
 
 ```{eval-rst}
-.. autoclass:: mo_gymnasium.MOClipReward
+.. autoclass:: mo_gymnasium.wrappers.MOClipReward
 ```
 
-## `MOSyncVectorEnv`
+## `MORecordEpisodeStatistics`
 
 ```{eval-rst}
-.. autoclass:: mo_gymnasium.MOSyncVectorEnv
+.. autoclass:: mo_gymnasium.wrappers.MORecordEpisodeStatistics
 ```
 
-## `MORecordEpisodeStatistics`
+## `MOMaxAndSkipObservation`
 
 ```{eval-rst}
-.. autoclass:: mo_gymnasium.MORecordEpisodeStatistics
+.. autoclass:: mo_gymnasium.wrappers.MOMaxAndSkipObservation
 ```
diff --git a/mo_gymnasium/__init__.py b/mo_gymnasium/__init__.py
@@ -2,16 +2,10 @@
 
 # Envs
 import mo_gymnasium.envs
+from mo_gymnasium import wrappers
 
 # Utils
-from mo_gymnasium.utils import (
-    LinearReward,
-    MOClipReward,
-    MONormalizeReward,
-    MORecordEpisodeStatistics,
-    MOSyncVectorEnv,
-    make,
-)
+from mo_gymnasium.utils import make
 
 
-__version__ = "1.1.0"
+__version__ = "1.2.0"
diff --git a/mo_gymnasium/envs/__init__.py b/mo_gymnasium/envs/__init__.py
@@ -10,6 +10,5 @@
 import mo_gymnasium.envs.minecart
 import mo_gymnasium.envs.mountain_car
 import mo_gymnasium.envs.mujoco
-import mo_gymnasium.envs.reacher
 import mo_gymnasium.envs.resource_gathering
 import mo_gymnasium.envs.water_reservoir
diff --git a/mo_gymnasium/envs/fishwood/fishwood.py b/mo_gymnasium/envs/fishwood/fishwood.py
@@ -42,8 +42,8 @@ class FishWood(gym.Env, EzPickle):
     """
 
     metadata = {"render_modes": ["human"]}
-    FISH = 0
-    WOOD = 1
+    FISH = np.array([0], dtype=np.int32)
+    WOOD = np.array([1], dtype=np.int32)
     MAX_TS = 200
 
     def __init__(self, render_mode: Optional[str] = None, fishproba=0.1, woodproba=0.9):
@@ -55,17 +55,17 @@ def __init__(self, render_mode: Optional[str] = None, fishproba=0.1, woodproba=0
 
         self.action_space = spaces.Discrete(2)  # 2 actions, go fish and go wood
         # 2 states, fishing and in the woods
-        self.observation_space = spaces.Discrete(2)
+        self.observation_space = spaces.Box(low=0, high=1, shape=(1,), dtype=np.int32)
         # 2 objectives, amount of fish and amount of wood
         self.reward_space = spaces.Box(low=np.array([0, 0]), high=np.array([1.0, 1.0]), dtype=np.float32)
         self.reward_dim = 2
 
-        self._state = self.WOOD
+        self._state = self.WOOD.copy()
 
     def reset(self, seed=None, **kwargs):
         super().reset(seed=seed)
 
-        self._state = self.WOOD
+        self._state = self.WOOD.copy()
         self._timestep = 0
         if self.render_mode == "human":
             self.render()
@@ -89,7 +89,7 @@ def step(self, action):
             rewards[self.FISH] = 1.0
 
         # Execute the action
-        self._state = action
+        self._state = np.array([action], dtype=np.int32)
         self._timestep += 1
 
         if self.render_mode == "human":

diff --git a/mo_gymnasium/envs/lunar_lander/__init__.py b/mo_gymnasium/envs/lunar_lander/__init__.py
@@ -2,13 +2,13 @@
 
 
 register(
-    id="mo-lunar-lander-v2",
+    id="mo-lunar-lander-v3",
     entry_point="mo_gymnasium.envs.lunar_lander.lunar_lander:MOLunarLander",
     max_episode_steps=1000,
 )
 
 register(
-    id="mo-lunar-lander-continuous-v2",
+    id="mo-lunar-lander-continuous-v3",
     entry_point="mo_gymnasium.envs.lunar_lander.lunar_lander:MOLunarLander",
     max_episode_steps=1000,
     kwargs={"continuous": True},

diff --git a/mo_gymnasium/envs/mario/joypad_space.py b/mo_gymnasium/envs/mario/joypad_space.py
@@ -1,4 +1,5 @@
 """An environment wrapper to convert binary to discrete action space. This is a modified version of the original code from nes-py."""
+
 from typing import List
 
 import gymnasium as gym

diff --git a/mo_gymnasium/envs/mario/mario.py b/mo_gymnasium/envs/mario/mario.py
@@ -7,7 +7,6 @@
 from gymnasium.utils import EzPickle, seeding
 
 # from stable_baselines3.common.atari_wrappers import MaxAndSkipEnv
-from gymnasium.wrappers import GrayScaleObservation, ResizeObservation
 from nes_py.nes_env import SCREEN_SHAPE_24_BIT
 
 import mo_gymnasium as mo_gym
@@ -16,7 +15,7 @@
 from mo_gymnasium.envs.mario.joypad_space import JoypadSpace
 
 
-class MOSuperMarioBros(SuperMarioBrosEnv, EzPickle):
+class MOSuperMarioBros(SuperMarioBrosEnv, gym.Env, EzPickle):
     """
     ## Description
     Multi-objective version of the SuperMarioBro environment.
@@ -202,11 +201,14 @@ def step(self, action):
 
 
 if __name__ == "__main__":
+    from gymnasium.wrappers import ResizeObservation
+    from gymnasium.wrappers.transform_observation import GrayscaleObservation
+
     env = MOSuperMarioBros()
     env = JoypadSpace(env, SIMPLE_MOVEMENT)
     # env = MaxAndSkipEnv(env, 4)
     env = ResizeObservation(env, (84, 84))
-    env = GrayScaleObservation(env)
+    env = GrayscaleObservation(env)
     # env = FrameStack(env, 4)
     env = mo_gym.LinearReward(env)
 

diff --git a/mo_gymnasium/envs/minecart/minecart.py b/mo_gymnasium/envs/minecart/minecart.py
@@ -249,9 +249,11 @@ def pareto_front(self, gamma: float, symmetric: bool = True) -> List[np.ndarray]
             queue = [
                 {
                     "speed": ACCELERATION * self.frame_skip,
-                    "dist": mine_distance - self.frame_skip * (self.frame_skip + 1) / 2 * ACCELERATION
-                    if self.incremental_frame_skip
-                    else mine_distance - ACCELERATION * self.frame_skip * self.frame_skip,
+                    "dist": (
+                        mine_distance - self.frame_skip * (self.frame_skip + 1) / 2 * ACCELERATION
+                        if self.incremental_frame_skip
+                        else mine_distance - ACCELERATION * self.frame_skip * self.frame_skip
+                    ),
                     "seq": [ACT_ACCEL],
                 }
             ]