diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml
index d44dc96d..a00c7e8e 100644
--- a/.github/workflows/build-publish.yml
+++ b/.github/workflows/build-publish.yml
@@ -4,7 +4,7 @@
 #   - https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
 #
 # derived from https://github.com/Farama-Foundation/PettingZoo/blob/e230f4d80a5df3baf9bd905149f6d4e8ce22be31/.github/workflows/build-publish.yml
-name: build-publish
+name: Build artifact for PyPI
 
 on:
   push:
@@ -16,35 +16,18 @@ on:
 
 jobs:
   build-wheels:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        include:
-        - os: ubuntu-latest
-          python: 38
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 39
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 310
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 311
-          platform: manylinux_x86_64
+    runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.x'
+    - uses: actions/checkout@v4
+    - uses: actions/setup-python@v5
+
     - name: Install dependencies
-      run: python -m pip install --upgrade pip setuptools build
+      run: pipx install build
     - name: Build sdist and wheels
-      run: python -m build
+      run: pyproject-build
     - name: Store wheels
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v4
       with:
         path: dist
 
@@ -55,10 +38,11 @@ jobs:
     if: github.event_name == 'release' && github.event.action == 'published'
     steps:
     - name: Download dists
-      uses: actions/download-artifact@v2
+      uses: actions/download-artifact@v4
       with:
         name: artifact
         path: dist
+
     - name: Publish
       uses: pypa/gh-action-pypi-publish@release/v1
       with:
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 80ce02af..9f2cc2ab 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -13,9 +13,7 @@ jobs:
   pre-commit:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v4
-      - run: python -m pip install pre-commit
-      - run: python -m pre_commit --version
-      - run: python -m pre_commit install
-      - run: python -m pre_commit run --all-files
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+      - run: pipx install pre-commit
+      - run: pre-commit run --all-files
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index cbbea960..05e72fd0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v5.0.0
     hooks:
       - id: check-symlinks
       - id: destroyed-symlinks
@@ -17,13 +17,13 @@ repos:
       - id: detect-private-key
       - id: debug-statements
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.4
+    rev: v2.3.0
     hooks:
       - id: codespell
         args:
           - --ignore-words-list=reacher, mor
   - repo: https://github.com/PyCQA/flake8
-    rev: 6.0.0
+    rev: 7.1.1
     hooks:
       - id: flake8
         args:
@@ -34,16 +34,16 @@ repos:
           - --show-source
           - --statistics
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.3.1
+    rev: v3.18.0
     hooks:
       - id: pyupgrade
         args: ["--py37-plus"]
   - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
       - id: isort
   - repo: https://github.com/python/black
-    rev: 23.1.0
+    rev: 24.10.0
     hooks:
       - id: black
   - repo: https://github.com/pycqa/pydocstyle
diff --git a/README.md b/README.md
index fb5f7885..708bb3a6 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,7 @@ obs, info = env.reset()
 next_obs, vector_reward, terminated, truncated, info = env.step(your_agent.act(obs))
 
 # Optionally, you can scalarize the reward function with the LinearReward wrapper
-env = mo_gym.LinearReward(env, weight=np.array([0.8, 0.2, 0.2]))
+env = mo_gym.wrappers.LinearReward(env, weight=np.array([0.8, 0.2, 0.2]))
 ```
 For details on multi-objective MDP's (MOMDP's) and other MORL definitions, see [A practical guide to multi-objective reinforcement learning and planning](https://link.springer.com/article/10.1007/s10458-022-09552-y).
 
diff --git a/docs/_scripts/gen_env_docs.py b/docs/_scripts/gen_env_docs.py
index eec55219..87a2184e 100644
--- a/docs/_scripts/gen_env_docs.py
+++ b/docs/_scripts/gen_env_docs.py
@@ -41,7 +41,7 @@ def trim(docstring):
 
 pattern = re.compile(r"(?<!^)(?=[A-Z])")
 
-gym.logger.set_level(gym.logger.DISABLED)
+# gym.logger.set_level(gym.logger.DISABLED)
 
 all_envs = list(gym.envs.registry.values())
 filtered_envs_by_type = {}
@@ -177,12 +177,12 @@ def trim(docstring):
         else:
             env_table += f"| Observation Space | {env.observation_space} |\n"
 
-        if env.reward_space.shape:
-            env_table += f"| Reward Shape | {env.reward_space.shape} |\n"
-        if hasattr(env.reward_space, "high"):
-            env_table += f"| Reward High | {env.reward_space.high} |\n"
-        if hasattr(env.reward_space, "low"):
-            env_table += f"| Reward Low | {env.reward_space.low} |\n"
+        if env.unwrapped.reward_space.shape:
+            env_table += f"| Reward Shape | {env.unwrapped.reward_space.shape} |\n"
+        if hasattr(env.unwrapped.reward_space, "high"):
+            env_table += f"| Reward High | {env.unwrapped.reward_space.high} |\n"
+        if hasattr(env.unwrapped.reward_space, "low"):
+            env_table += f"| Reward Low | {env.unwrapped.reward_space.low} |\n"
 
         env_table += f'| Import | `mo_gymnasium.make("{env_spec.id}")` | \n'
 
diff --git a/docs/environments/classical.md b/docs/environments/classical.md
index 3f80ce85..0d15285e 100644
--- a/docs/environments/classical.md
+++ b/docs/environments/classical.md
@@ -9,9 +9,17 @@ Multi-objective versions of classical Gymnasium's environments.
 | Env                                                                                                                                                                                                                                                                | Obs/Action spaces                   | Objectives                                                    | Description                                                                                                                                                                                                                                                     |
 |--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------|---------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | [`mo-mountaincar-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincar/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincar.gif" width="200px">                                     | Continuous / Discrete               | `[time_penalty, reverse_penalty, forward_penalty]`            | Classic Mountain Car env, but with extra penalties for the forward and reverse actions. From [Vamplew et al. 2011](https://www.researchgate.net/publication/220343783_Empirical_evaluation_methods_for_multiobjective_reinforcement_learning_algorithms).       |
-| [`mo-mountaincarcontinuous-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincarcontinuous/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincarcontinuous.gif" width="200px">       | Continuous / Continuous             | `[time_penalty, fuel_consumption_penalty]`                    | Continuous Mountain Car env, but with penalties for fuel consumption.                                                                                                                                                                                           |
+[`mo-mountaincar-3d-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincar/) ** <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincar.gif" width="200px">  | Continuous / Discrete| `[time_penalty, move_penalty, speed_objective]` | The forward and backward penalties have been merged into the move penalty and a speed objective has been introduced which gives the positive reward equivalent to the car's speed at that time step.* |
+[`mo-mountaincar-timemove-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincar/) ** <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincar.gif" width="200px">  | Continuous / Discrete | `[time_penalty, move_penalty]`| Class Mountain Car env but an extra penalty for moving backwards or forwards merged into a move penalty. |
+[`mo-mountaincar-timespeed-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincar/) ** <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincar.gif" width="200px"> | Continuous / Discrete| `[time_penalty, speed_objective]` | Class Mountain Car env but an extra positive objective of speed which gives the positive reward equivalent to the car's speed at that time step.*
+| [`mo-mountaincarcontinuous-v0`](https://mo-gymnasium.farama.org/environments/mo-mountaincarcontinuous/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-mountaincarcontinuous.gif" width="200px">       | Continuous / Continuous             | `[time_penalty, fuel_consumption_penalty]`                    | Continuous Mountain Car env, but with penalties for fuel consumption.                                                                                                                     |
 | [`mo-lunar-lander-v2`](https://mo-gymnasium.farama.org/environments/mo-lunar-lander/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-lunar-lander.gif" width="200px">                                  | Continuous / Discrete or Continuous | `[landed, shaped_reward, main_engine_fuel, side_engine_fuel]` | MO version of the `LunarLander-v2` [environment](https://gymnasium.farama.org/environments/box2d/lunar_lander/). Objectives defined similarly as in [Hung et al. 2022](https://openreview.net/forum?id=AwWaBXLIJE).                                             |
 
+*An additional objective was introduced to prevent the agent from converging to the local maxima due to a lack of reward signal for the static action.
+
+**Read more about these environments and the detailed reasoning behind them in [`Pranav Gupta's Dissertation`](https://drive.google.com/file/d/1yT6hlavYZGmoB2phaIBX_5hbibA3Illa/view?usp=sharing)
+<!Can be removed if required, work is currently in place to write a research paper though may take a while->
+
 ```{toctree}
 :hidden:
 :glob:
diff --git a/docs/index.md b/docs/index.md
index fb6d56ff..f1d24905 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -11,6 +11,7 @@ lastpage:
 introduction/install
 introduction/api
 wrappers/wrappers
+wrappers/vector_wrappers
 examples/morl_baselines
 ```
 
diff --git a/docs/wrappers/vector_wrappers.md b/docs/wrappers/vector_wrappers.md
new file mode 100644
index 00000000..ade24022
--- /dev/null
+++ b/docs/wrappers/vector_wrappers.md
@@ -0,0 +1,20 @@
+---
+title: "Vector Wrappers"
+---
+
+# Vector Wrappers
+
+Similar to the normal wrappers, MO-Gymnasium provides a few wrappers that are specifically designed to work with vectorized environments. They are all available directly from the `mo_gymnasium.wrappers.vector` module.
+
+
+## `MOSyncVectorEnv`
+
+```{eval-rst}
+.. autoclass:: mo_gymnasium.wrappers.vector.MOSyncVectorEnv
+```
+
+## `MORecordEpisodeStatistics`
+
+```{eval-rst}
+.. autoclass:: mo_gymnasium.wrappers.vector.MORecordEpisodeStatistics
+```
diff --git a/docs/wrappers/wrappers.md b/docs/wrappers/wrappers.md
index 542e5cca..acf2ab56 100644
--- a/docs/wrappers/wrappers.md
+++ b/docs/wrappers/wrappers.md
@@ -4,36 +4,36 @@ title: "Wrappers"
 
 # Wrappers
 
-A few wrappers inspired from Gymnasium's wrappers are available in MO-Gymnasium. They are all available directly from the `mo_gymnasium` module.
+A few wrappers inspired from Gymnasium's wrappers are available in MO-Gymnasium. They are all available directly from the `mo_gymnasium.wrappers` module.
 
 
 ## `LinearReward`
 
 
 ```{eval-rst}
-.. autoclass:: mo_gymnasium.LinearReward
+.. autoclass:: mo_gymnasium.wrappers.LinearReward
 ```
 
 ## `MONormalizeReward`
 
 ```{eval-rst}
-.. autoclass:: mo_gymnasium.MONormalizeReward
+.. autoclass:: mo_gymnasium.wrappers.MONormalizeReward
 ```
 
 ## `MOClipReward`
 
 ```{eval-rst}
-.. autoclass:: mo_gymnasium.MOClipReward
+.. autoclass:: mo_gymnasium.wrappers.MOClipReward
 ```
 
-## `MOSyncVectorEnv`
+## `MORecordEpisodeStatistics`
 
 ```{eval-rst}
-.. autoclass:: mo_gymnasium.MOSyncVectorEnv
+.. autoclass:: mo_gymnasium.wrappers.MORecordEpisodeStatistics
 ```
 
-## `MORecordEpisodeStatistics`
+## `MOMaxAndSkipObservation`
 
 ```{eval-rst}
-.. autoclass:: mo_gymnasium.MORecordEpisodeStatistics
+.. autoclass:: mo_gymnasium.wrappers.MOMaxAndSkipObservation
 ```
diff --git a/mo_gymnasium/__init__.py b/mo_gymnasium/__init__.py
index 23201d0c..94fe1c92 100644
--- a/mo_gymnasium/__init__.py
+++ b/mo_gymnasium/__init__.py
@@ -2,16 +2,10 @@
 
 # Envs
 import mo_gymnasium.envs
+from mo_gymnasium import wrappers
 
 # Utils
-from mo_gymnasium.utils import (
-    LinearReward,
-    MOClipReward,
-    MONormalizeReward,
-    MORecordEpisodeStatistics,
-    MOSyncVectorEnv,
-    make,
-)
+from mo_gymnasium.utils import make
 
 
-__version__ = "1.1.0"
+__version__ = "1.2.0"
diff --git a/mo_gymnasium/envs/__init__.py b/mo_gymnasium/envs/__init__.py
index 7e917397..c4846df6 100644
--- a/mo_gymnasium/envs/__init__.py
+++ b/mo_gymnasium/envs/__init__.py
@@ -10,6 +10,5 @@
 import mo_gymnasium.envs.minecart
 import mo_gymnasium.envs.mountain_car
 import mo_gymnasium.envs.mujoco
-import mo_gymnasium.envs.reacher
 import mo_gymnasium.envs.resource_gathering
 import mo_gymnasium.envs.water_reservoir
diff --git a/mo_gymnasium/envs/fishwood/fishwood.py b/mo_gymnasium/envs/fishwood/fishwood.py
index fad02d77..7aa1242b 100644
--- a/mo_gymnasium/envs/fishwood/fishwood.py
+++ b/mo_gymnasium/envs/fishwood/fishwood.py
@@ -42,8 +42,8 @@ class FishWood(gym.Env, EzPickle):
     """
 
     metadata = {"render_modes": ["human"]}
-    FISH = 0
-    WOOD = 1
+    FISH = np.array([0], dtype=np.int32)
+    WOOD = np.array([1], dtype=np.int32)
     MAX_TS = 200
 
     def __init__(self, render_mode: Optional[str] = None, fishproba=0.1, woodproba=0.9):
@@ -55,17 +55,17 @@ def __init__(self, render_mode: Optional[str] = None, fishproba=0.1, woodproba=0
 
         self.action_space = spaces.Discrete(2)  # 2 actions, go fish and go wood
         # 2 states, fishing and in the woods
-        self.observation_space = spaces.Discrete(2)
+        self.observation_space = spaces.Box(low=0, high=1, shape=(1,), dtype=np.int32)
         # 2 objectives, amount of fish and amount of wood
         self.reward_space = spaces.Box(low=np.array([0, 0]), high=np.array([1.0, 1.0]), dtype=np.float32)
         self.reward_dim = 2
 
-        self._state = self.WOOD
+        self._state = self.WOOD.copy()
 
     def reset(self, seed=None, **kwargs):
         super().reset(seed=seed)
 
-        self._state = self.WOOD
+        self._state = self.WOOD.copy()
         self._timestep = 0
         if self.render_mode == "human":
             self.render()
@@ -89,7 +89,7 @@ def step(self, action):
             rewards[self.FISH] = 1.0
 
         # Execute the action
-        self._state = action
+        self._state = np.array([action], dtype=np.int32)
         self._timestep += 1
 
         if self.render_mode == "human":
diff --git a/mo_gymnasium/envs/lunar_lander/__init__.py b/mo_gymnasium/envs/lunar_lander/__init__.py
index d4435341..817671fb 100644
--- a/mo_gymnasium/envs/lunar_lander/__init__.py
+++ b/mo_gymnasium/envs/lunar_lander/__init__.py
@@ -2,13 +2,13 @@
 
 
 register(
-    id="mo-lunar-lander-v2",
+    id="mo-lunar-lander-v3",
     entry_point="mo_gymnasium.envs.lunar_lander.lunar_lander:MOLunarLander",
     max_episode_steps=1000,
 )
 
 register(
-    id="mo-lunar-lander-continuous-v2",
+    id="mo-lunar-lander-continuous-v3",
     entry_point="mo_gymnasium.envs.lunar_lander.lunar_lander:MOLunarLander",
     max_episode_steps=1000,
     kwargs={"continuous": True},
diff --git a/mo_gymnasium/envs/mario/joypad_space.py b/mo_gymnasium/envs/mario/joypad_space.py
index 73969eee..32fb0dfb 100644
--- a/mo_gymnasium/envs/mario/joypad_space.py
+++ b/mo_gymnasium/envs/mario/joypad_space.py
@@ -1,4 +1,5 @@
 """An environment wrapper to convert binary to discrete action space. This is a modified version of the original code from nes-py."""
+
 from typing import List
 
 import gymnasium as gym
diff --git a/mo_gymnasium/envs/mario/mario.py b/mo_gymnasium/envs/mario/mario.py
index b7279941..45924ed3 100644
--- a/mo_gymnasium/envs/mario/mario.py
+++ b/mo_gymnasium/envs/mario/mario.py
@@ -7,7 +7,6 @@
 from gymnasium.utils import EzPickle, seeding
 
 # from stable_baselines3.common.atari_wrappers import MaxAndSkipEnv
-from gymnasium.wrappers import GrayScaleObservation, ResizeObservation
 from nes_py.nes_env import SCREEN_SHAPE_24_BIT
 
 import mo_gymnasium as mo_gym
@@ -16,7 +15,7 @@
 from mo_gymnasium.envs.mario.joypad_space import JoypadSpace
 
 
-class MOSuperMarioBros(SuperMarioBrosEnv, EzPickle):
+class MOSuperMarioBros(SuperMarioBrosEnv, gym.Env, EzPickle):
     """
     ## Description
     Multi-objective version of the SuperMarioBro environment.
@@ -202,11 +201,14 @@ def step(self, action):
 
 
 if __name__ == "__main__":
+    from gymnasium.wrappers import ResizeObservation
+    from gymnasium.wrappers.transform_observation import GrayscaleObservation
+
     env = MOSuperMarioBros()
     env = JoypadSpace(env, SIMPLE_MOVEMENT)
     # env = MaxAndSkipEnv(env, 4)
     env = ResizeObservation(env, (84, 84))
-    env = GrayScaleObservation(env)
+    env = GrayscaleObservation(env)
     # env = FrameStack(env, 4)
     env = mo_gym.LinearReward(env)
 
diff --git a/mo_gymnasium/envs/minecart/minecart.py b/mo_gymnasium/envs/minecart/minecart.py
index b5154192..cd6c64e7 100644
--- a/mo_gymnasium/envs/minecart/minecart.py
+++ b/mo_gymnasium/envs/minecart/minecart.py
@@ -249,9 +249,11 @@ def pareto_front(self, gamma: float, symmetric: bool = True) -> List[np.ndarray]
             queue = [
                 {
                     "speed": ACCELERATION * self.frame_skip,
-                    "dist": mine_distance - self.frame_skip * (self.frame_skip + 1) / 2 * ACCELERATION
-                    if self.incremental_frame_skip
-                    else mine_distance - ACCELERATION * self.frame_skip * self.frame_skip,
+                    "dist": (
+                        mine_distance - self.frame_skip * (self.frame_skip + 1) / 2 * ACCELERATION
+                        if self.incremental_frame_skip
+                        else mine_distance - ACCELERATION * self.frame_skip * self.frame_skip
+                    ),
                     "seq": [ACT_ACCEL],
                 }
             ]
diff --git a/mo_gymnasium/envs/mountain_car/__init__.py b/mo_gymnasium/envs/mountain_car/__init__.py
index f75fe751..523de281 100644
--- a/mo_gymnasium/envs/mountain_car/__init__.py
+++ b/mo_gymnasium/envs/mountain_car/__init__.py
@@ -6,3 +6,24 @@
     entry_point="mo_gymnasium.envs.mountain_car.mountain_car:MOMountainCar",
     max_episode_steps=200,
 )
+
+register(
+    id="mo-mountaincar-3d-v0",
+    entry_point="mo_gymnasium.envs.mountain_car.mountain_car:MOMountainCar",
+    max_episode_steps=200,
+    kwargs={"add_speed_objective": True, "merge_move_penalty": True},
+)
+
+register(
+    id="mo-mountaincar-timemove-v0",
+    entry_point="mo_gymnasium.envs.mountain_car.mountain_car:MOMountainCar",
+    max_episode_steps=200,
+    kwargs={"merge_move_penalty": True},
+)
+
+register(
+    id="mo-mountaincar-timespeed-v0",
+    entry_point="mo_gymnasium.envs.mountain_car.mountain_car:MOMountainCar",
+    max_episode_steps=200,
+    kwargs={"remove_move_penalty": True, "add_speed_objective": True},
+)
diff --git a/mo_gymnasium/envs/mountain_car/mountain_car.py b/mo_gymnasium/envs/mountain_car/mountain_car.py
index 6e88acca..f49cf5ce 100644
--- a/mo_gymnasium/envs/mountain_car/mountain_car.py
+++ b/mo_gymnasium/envs/mountain_car/mountain_car.py
@@ -14,19 +14,50 @@ class MOMountainCar(MountainCarEnv, EzPickle):
     See [Gymnasium's env](https://gymnasium.farama.org/environments/classic_control/mountain_car_continuous/) for more information.
 
     ## Reward space:
-    The reward space is a 3D vector containing the time penalty, and penalties for reversing and going forward.
+    By default, the reward space is a 3D vector containing the time penalty, and penalties for reversing and going forward.
     - time penalty: -1.0 for each time step
     - reverse penalty: -1.0 for each time step the action is 0 (reverse)
     - forward penalty: -1.0 for each time step the action is 2 (forward)
+
+    #Alternatively, the reward can be changed with the following options:
+    - add_speed_objective: Add an extra objective corresponding to the speed of the car.
+    - remove_move_penalty: Remove the reverse and forward objectives.
+    - merge_move_penalty: Merge reverse and forward penalties into a single penalty.
     """
 
-    def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
+    def __init__(
+        self,
+        render_mode: Optional[str] = None,
+        add_speed_objective: bool = False,
+        remove_move_penalty: bool = False,
+        merge_move_penalty: bool = False,
+        goal_velocity=0,
+    ):
         super().__init__(render_mode, goal_velocity)
-        EzPickle.__init__(self, render_mode, goal_velocity)
+        EzPickle.__init__(self, render_mode, add_speed_objective, remove_move_penalty, merge_move_penalty, goal_velocity)
+        self.add_speed_objective = add_speed_objective
+        self.remove_move_penalty = remove_move_penalty
+        self.merge_move_penalty = merge_move_penalty
 
-        self.reward_space = spaces.Box(low=np.array([-1, -1, -1]), high=np.array([-1, 0, 0]), shape=(3,), dtype=np.float32)
         self.reward_dim = 3
 
+        if self.add_speed_objective:
+            self.reward_dim += 1
+
+        if self.remove_move_penalty:
+            self.reward_dim -= 2
+        elif self.merge_move_penalty:
+            self.reward_dim -= 1
+
+        low = np.array([-1] * self.reward_dim)
+        high = np.zeros(self.reward_dim)
+        high[0] = -1  # Time penalty is always -1
+        if self.add_speed_objective:
+            low[-1] = 0.0
+            high[-1] = 1.1
+
+        self.reward_space = spaces.Box(low=low, high=high, shape=(self.reward_dim,), dtype=np.float32)
+
     def step(self, action: int):
         assert self.action_space.contains(action), f"{action!r} ({type(action)}) invalid"
 
@@ -39,11 +70,20 @@ def step(self, action: int):
             velocity = 0
 
         terminated = bool(position >= self.goal_position and velocity >= self.goal_velocity)
-        # reward = -1.0
-        reward = np.zeros(3, dtype=np.float32)
+
+        reward = np.zeros(self.reward_dim, dtype=np.float32)
+
         reward[0] = 0.0 if terminated else -1.0  # time penalty
-        reward[1] = 0.0 if action != 0 else -1.0  # reverse penalty
-        reward[2] = 0.0 if action != 2 else -1.0  # forward penalty
+
+        if not self.remove_move_penalty:
+            if self.merge_move_penalty:
+                reward[1] = 0.0 if action == 1 else -1.0
+            else:
+                reward[1] = 0.0 if action != 0 else -1.0  # reverse penalty
+                reward[2] = 0.0 if action != 2 else -1.0  # forward penalty
+
+        if self.add_speed_objective:
+            reward[-1] = 15 * abs(velocity)
 
         self.state = (position, velocity)
         if self.render_mode == "human":
diff --git a/mo_gymnasium/envs/mujoco/reacher_v4.py b/mo_gymnasium/envs/mujoco/reacher_v4.py
index 01a5bc9d..9596f64c 100644
--- a/mo_gymnasium/envs/mujoco/reacher_v4.py
+++ b/mo_gymnasium/envs/mujoco/reacher_v4.py
@@ -13,7 +13,7 @@
 class MOReacherEnv(ReacherEnv):
     """
     ## Description
-    Mujoco version of `mo-reacher-v0`, based on [`Reacher-v4` environment](https://gymnasium.farama.org/environments/mujoco/reacher/).
+    Multi-objective version of the [`Reacher-v4` environment](https://gymnasium.farama.org/environments/mujoco/reacher/).
 
     ## Observation Space
     The observation is 6-dimensional and contains:
diff --git a/mo_gymnasium/envs/reacher/__init__.py b/mo_gymnasium/envs/reacher/__init__.py
deleted file mode 100644
index b752382c..00000000
--- a/mo_gymnasium/envs/reacher/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from gymnasium.envs.registration import register
-
-
-register(
-    id="mo-reacher-v0",
-    entry_point="mo_gymnasium.envs.reacher.reacher:ReacherBulletEnv",
-    max_episode_steps=100,
-    kwargs={"fixed_initial_state": None},
-)
diff --git a/mo_gymnasium/envs/reacher/reacher.py b/mo_gymnasium/envs/reacher/reacher.py
deleted file mode 100644
index f881f512..00000000
--- a/mo_gymnasium/envs/reacher/reacher.py
+++ /dev/null
@@ -1,158 +0,0 @@
-from typing import Optional
-
-import numpy as np
-from gymnasium import spaces
-from gymnasium.utils import EzPickle, seeding
-from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv
-from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot
-from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene
-
-
-target_positions = list(map(lambda l: np.array(l), [(0.14, 0.0), (-0.14, 0.0), (0.0, 0.14), (0.0, -0.14)]))
-
-
-class ReacherBulletEnv(BaseBulletEnv, EzPickle):
-    metadata = {"render_modes": ["human", "rgb_array"]}
-
-    def __init__(
-        self,
-        render_mode: Optional[str] = None,
-        target=(0.14, 0.0),
-        fixed_initial_state: Optional[tuple] = (3.14, 0),
-    ):
-        EzPickle.__init__(self, render_mode, target, fixed_initial_state)
-        self.robot = ReacherRobot(target, fixed_initial_state=fixed_initial_state)
-        self.render_mode = render_mode
-        BaseBulletEnv.__init__(self, self.robot, render=render_mode == "human")
-        self._cam_dist = 0.75
-
-        # self.target_positions = list(map(lambda l: np.array(l), [(0.14, 0.0), (-0.14, 0.0), (0.0, 0.14), (0.0, -0.14), (0.22, 0.0), (-0.22, 0.0), (0.0, 0.22), (0.0, -0.22), (0.1, 0.1), (0.1, -0.1), (-0.1, 0.1), (-0.1, -0.1)]))
-        # self.target_positions = list(map(lambda l: np.array(l), [(0.14, 0.0), (-0.14, 0.0), (0.0, 0.14), (0.0, -0.14), (0.1, 0.1), (0.1, -0.1), (-0.1, 0.1), (-0.1, -0.1)]))
-        self.target_positions = list(
-            map(
-                lambda l: np.array(l),
-                [(0.14, 0.0), (-0.14, 0.0), (0.0, 0.14), (0.0, -0.14)],
-            )
-        )
-
-        actions = [-1.0, 0.0, 1.0]
-        self.action_dict = dict()
-        for a1 in actions:
-            for a2 in actions:
-                self.action_dict[len(self.action_dict)] = (a1, a2)
-
-        self.action_space = spaces.Discrete(9)
-        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(5,), dtype=np.float32)
-        self.reward_space = spaces.Box(low=-1.0, high=1.0, shape=(4,), dtype=np.float32)
-        self.reward_dim = 4
-
-    def create_single_player_scene(self, bullet_client):
-        return SingleRobotEmptyScene(bullet_client, gravity=0.0, timestep=0.0165, frame_skip=1)
-
-    def step(self, a):
-        real_action = self.action_dict[int(a)]
-
-        assert not self.scene.multiplayer
-        self.robot.apply_action(real_action)
-        self.scene.global_step()
-
-        state = self.robot.calc_state()  # sets self.to_target_vec
-
-        """ delta = np.linalg.norm(np.array(self.robot.fingertip.pose().xyz()) - np.array(self.robot.target.pose().xyz()))
-        reward = 1. - 4. * delta """
-
-        phi = np.zeros(len(self.target_positions), dtype=np.float32)
-        for index, target in enumerate(self.target_positions):
-            delta = np.linalg.norm(np.array(self.robot.fingertip.pose().xyz()[:2]) - target)
-            phi[index] = 1.0 - 4 * delta  # 1 - 4
-
-        self.HUD(state, real_action, False)
-
-        if self.render_mode == "human":
-            self._render(mode="human")
-
-        return state, phi, False, False, {}
-
-    def render(self):
-        if self.render_mode == "human":
-            self._render(mode="human")
-        else:
-            return self._render(mode="rgb_array")
-
-    def camera_adjust(self):
-        x, y, z = self.robot.fingertip.pose().xyz()
-        x *= 0.5
-        y *= 0.5
-        self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
-
-    def reset(self, seed=None, **kwargs):
-        self._seed(seed)
-        if seed is not None:
-            self._np_random, seed = seeding.np_random(seed)
-        obs = super().reset()
-        if self.render_mode == "human":
-            self._render(mode="human")
-        return obs, {}
-
-
-class ReacherRobot(MJCFBasedRobot):
-    TARG_LIMIT = 0.27
-
-    def __init__(self, target, fixed_initial_state=False):
-        MJCFBasedRobot.__init__(self, "reacher.xml", "body0", action_dim=2, obs_dim=4)
-        self.target_pos = target
-        self.fixed_initial_state = fixed_initial_state
-
-    def robot_specific_reset(self, bullet_client):
-        self.jdict["target_x"].reset_current_position(target_positions[0][0], 0)
-        self.jdict["target_y"].reset_current_position(target_positions[0][1], 0)
-
-        """ self.jdict["target2_x"].reset_current_position(target_positions[1][0], 0)
-        self.jdict["target2_y"].reset_current_position(target_positions[1][1], 0)
-        self.jdict["target3_x"].reset_current_position(target_positions[2][0], 0)
-        self.jdict["target3_y"].reset_current_position(target_positions[2][1], 0)
-        self.jdict["target4_x"].reset_current_position(target_positions[3][0], 0)
-        self.jdict["target4_y"].reset_current_position(target_positions[3][1], 0) """
-
-        self.fingertip = self.parts["fingertip"]
-        self.target = self.parts["target"]
-        self.central_joint = self.jdict["joint0"]
-        self.elbow_joint = self.jdict["joint1"]
-        if self.fixed_initial_state is None:
-            self.central_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
-            self.elbow_joint.reset_current_position(self.np_random.uniform(low=-3.14 / 2, high=3.14 / 2), 0)
-        else:
-            self.central_joint.reset_current_position(0, 0)
-            self.elbow_joint.reset_current_position(self.fixed_initial_state[0], self.fixed_initial_state[1])
-
-    def apply_action(self, a):
-        assert np.isfinite(a).all()
-        self.central_joint.set_motor_torque(0.05 * float(np.clip(a[0], -1, +1)))
-        self.elbow_joint.set_motor_torque(0.05 * float(np.clip(a[1], -1, +1)))
-
-    def calc_state(self):
-        theta, self.theta_dot = self.central_joint.current_relative_position()
-        self.gamma, self.gamma_dot = self.elbow_joint.current_relative_position()
-        # target_x, _ = self.jdict["target_x"].current_position()
-        # target_y, _ = self.jdict["target_y"].current_position()
-        self.to_target_vec = np.array(self.fingertip.pose().xyz()) - np.array(self.target.pose().xyz())
-        return np.array(
-            [
-                np.cos(theta),
-                np.sin(theta),
-                self.theta_dot * 0.1,
-                self.gamma,
-                self.gamma_dot * 0.1,
-            ],
-            dtype=np.float32,
-        )
-
-
-if __name__ == "__main__":
-    env = ReacherBulletEnv()
-    # env.render(mode='human')
-    obs = env.reset()
-    print(env.observation_space.contains(obs), obs.dtype, env.observation_space)
-    while True:
-        env.step(env.action_space.sample())
-        # env.render(mode='human')
diff --git a/mo_gymnasium/py.typed b/mo_gymnasium/py.typed
new file mode 100644
index 00000000..e69de29b
diff --git a/mo_gymnasium/utils.py b/mo_gymnasium/utils.py
index def90471..a5c41a14 100644
--- a/mo_gymnasium/utils.py
+++ b/mo_gymnasium/utils.py
@@ -1,14 +1,8 @@
-"""Utilities function such as wrappers."""
+"""Utilities functions."""
 
-import time
-from copy import deepcopy
-from typing import Iterator, Tuple, TypeVar
+from typing import TypeVar
 
 import gymnasium as gym
-import numpy as np
-from gymnasium.vector import SyncVectorEnv
-from gymnasium.wrappers.normalize import RunningMeanStd
-from gymnasium.wrappers.record_episode_statistics import RecordEpisodeStatistics
 
 
 ObsType = TypeVar("ObsType")
@@ -26,337 +20,3 @@ def make(env_name: str, disable_env_checker: bool = True, **kwargs) -> gym.Env:
     """
     """Disable env checker, as it requires the reward to be a scalar."""
     return gym.make(env_name, disable_env_checker=disable_env_checker, **kwargs)
-
-
-class LinearReward(gym.Wrapper, gym.utils.RecordConstructorArgs):
-    """Makes the env return a scalar reward, which is the dot-product between the reward vector and the weight vector."""
-
-    def __init__(self, env: gym.Env, weight: np.ndarray = None):
-        """Makes the env return a scalar reward, which is the dot-product between the reward vector and the weight vector.
-
-        Args:
-            env: env to wrap
-            weight: weight vector to use in the dot product
-        """
-        gym.utils.RecordConstructorArgs.__init__(self, weight=weight)
-        gym.Wrapper.__init__(self, env)
-        if weight is None:
-            weight = np.ones(shape=env.unwrapped.reward_space.shape)
-        self.set_weight(weight)
-
-    def set_weight(self, weight: np.ndarray):
-        """Changes weights for the scalarization.
-
-        Args:
-            weight: new weights to set
-        Returns: nothing
-        """
-        assert weight.shape == self.env.unwrapped.reward_space.shape, "Reward weight has different shape than reward vector."
-        self.w = weight
-
-    def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, dict]:
-        """Steps in the environment.
-
-        Args:
-            action: action to perform
-        Returns: obs, scalarized_reward, terminated, truncated, info
-        """
-        observation, reward, terminated, truncated, info = self.env.step(action)
-        scalar_reward = np.dot(reward, self.w)
-        info["vector_reward"] = reward
-        info["reward_weights"] = self.w
-
-        return observation, scalar_reward, terminated, truncated, info
-
-
-class MONormalizeReward(gym.Wrapper, gym.utils.RecordConstructorArgs):
-    """Wrapper to normalize the reward component at index idx. Does not touch other reward components."""
-
-    def __init__(self, env: gym.Env, idx: int, gamma: float = 0.99, epsilon: float = 1e-8):
-        """This wrapper will normalize immediate rewards s.t. their exponential moving average has a fixed variance.
-
-        Args:
-            env (env): The environment to apply the wrapper
-            idx (int): the index of the reward to normalize
-            epsilon (float): A stability parameter
-            gamma (float): The discount factor that is used in the exponential moving average.
-        """
-        gym.utils.RecordConstructorArgs.__init__(self, idx=idx, gamma=gamma, epsilon=epsilon)
-        gym.Wrapper.__init__(self, env)
-        self.idx = idx
-        self.num_envs = getattr(env, "num_envs", 1)
-        self.is_vector_env = getattr(env, "is_vector_env", False)
-        self.return_rms = RunningMeanStd(shape=())
-        self.returns = np.zeros(self.num_envs)
-        self.gamma = gamma
-        self.epsilon = epsilon
-
-    def step(self, action: ActType):
-        """Steps through the environment, normalizing the rewards returned.
-
-        Args:
-            action: action to perform
-        Returns: obs, normalized_rewards, terminated, truncated, infos
-        """
-        obs, rews, terminated, truncated, infos = self.env.step(action)
-        # Extracts the objective value to normalize
-        to_normalize = rews[self.idx]
-        if not self.is_vector_env:
-            to_normalize = np.array([to_normalize])
-        self.returns = self.returns * self.gamma + to_normalize
-        # Defer normalization to gym implementation
-        to_normalize = self.normalize(to_normalize)
-        self.returns[terminated] = 0.0
-        if not self.is_vector_env:
-            to_normalize = to_normalize[0]
-        # Injecting the normalized objective value back into the reward vector
-        rews[self.idx] = to_normalize
-        return obs, rews, terminated, truncated, infos
-
-    def normalize(self, rews):
-        """Normalizes the rewards with the running mean rewards and their variance.
-
-        Args:
-            rews: rewards
-        Returns: the normalized reward
-        """
-        self.return_rms.update(self.returns)
-        return rews / np.sqrt(self.return_rms.var + self.epsilon)
-
-
-class MOClipReward(gym.RewardWrapper, gym.utils.RecordConstructorArgs):
-    """Clip reward[idx] to [min, max]."""
-
-    def __init__(self, env: gym.Env, idx: int, min_r, max_r):
-        """Clip reward[idx] to [min, max].
-
-        Args:
-            env: environment to wrap
-            idx: index of the MO reward to clip
-            min_r: min reward
-            max_r: max reward
-        """
-        gym.utils.RecordConstructorArgs.__init__(self, idx=idx, min_r=min_r, max_r=max_r)
-        gym.RewardWrapper.__init__(self, env)
-        self.idx = idx
-        self.min_r = min_r
-        self.max_r = max_r
-
-    def reward(self, reward):
-        """Clips the reward at the given index.
-
-        Args:
-            reward: reward to clip.
-        Returns: the clipped reward.
-        """
-        reward[self.idx] = np.clip(reward[self.idx], self.min_r, self.max_r)
-        return reward
-
-
-class MOSyncVectorEnv(SyncVectorEnv):
-    """Vectorized environment that serially runs multiple environments."""
-
-    def __init__(
-        self,
-        env_fns: Iterator[callable],
-        copy: bool = True,
-    ):
-        """Vectorized environment that serially runs multiple environments.
-
-        Args:
-            env_fns: env constructors
-            copy: If ``True``, then the :meth:`reset` and :meth:`step` methods return a copy of the observations.
-        """
-        SyncVectorEnv.__init__(self, env_fns, copy=copy)
-        # Just overrides the rewards memory to add the number of objectives
-        self.reward_space = self.envs[0].unwrapped.reward_space
-        self._rewards = np.zeros(
-            (
-                self.num_envs,
-                self.reward_space.shape[0],
-            ),
-            dtype=np.float64,
-        )
-
-
-class MORecordEpisodeStatistics(RecordEpisodeStatistics, gym.utils.RecordConstructorArgs):
-    """This wrapper will keep track of cumulative rewards and episode lengths.
-
-    After the completion of an episode, ``info`` will look like this::
-
-        >>> info = {
-        ...     "episode": {
-        ...         "r": "<cumulative reward (array)>",
-        ...         "dr": "<discounted reward (array)>",
-        ...         "l": "<episode length (scalar)>", # contrary to Gymnasium, these are not a numpy array
-        ...         "t": "<elapsed time since beginning of episode (scalar)>"
-        ...     },
-        ... }
-
-    For a vectorized environments the output will be in the form of (be careful to first wrap the env into vector before applying MORewordStatistics)::
-
-        >>> infos = {
-        ...     "final_observation": "<array of length num-envs>",
-        ...     "_final_observation": "<boolean array of length num-envs>",
-        ...     "final_info": "<array of length num-envs>",
-        ...     "_final_info": "<boolean array of length num-envs>",
-        ...     "episode": {
-        ...         "r": "<array of cumulative reward (2d array, shape (num_envs, dim_reward))>",
-        ...         "dr": "<array of discounted reward (2d array, shape (num_envs, dim_reward))>",
-        ...         "l": "<array of episode length (array)>",
-        ...         "t": "<array of elapsed time since beginning of episode (array)>"
-        ...     },
-        ...     "_episode": "<boolean array of length num-envs>"
-        ... }
-    """
-
-    def __init__(self, env: gym.Env, gamma: float = 1.0, deque_size: int = 100):
-        """This wrapper will keep track of cumulative rewards and episode lengths.
-
-        Args:
-            env (Env): The environment to apply the wrapper
-            gamma (float): Discounting factor
-            deque_size: The size of the buffers :attr:`return_queue` and :attr:`length_queue`
-        """
-        gym.utils.RecordConstructorArgs.__init__(self, gamma=gamma, deque_size=deque_size)
-        RecordEpisodeStatistics.__init__(self, env, deque_size=deque_size)
-        # CHANGE: Here we just override the standard implementation to extend to MO
-        # We also take care of the case where the env is vectorized
-        self.reward_dim = self.env.unwrapped.reward_space.shape[0]
-        if self.is_vector_env:
-            self.rewards_shape = (self.num_envs, self.reward_dim)
-        else:
-            self.rewards_shape = (self.reward_dim,)
-        self.gamma = gamma
-
-    def reset(self, **kwargs):
-        """Resets the environment using kwargs and resets the episode returns and lengths."""
-        obs, info = super().reset(**kwargs)
-
-        # CHANGE: Here we just override the standard implementation to extend to MO
-        self.episode_returns = np.zeros(self.rewards_shape, dtype=np.float32)
-        self.disc_episode_returns = np.zeros(self.rewards_shape, dtype=np.float32)
-
-        return obs, info
-
-    def step(self, action):
-        """Steps through the environment, recording the episode statistics."""
-        # This is very close the code from the RecordEpisodeStatistics wrapper from gym.
-        (
-            observations,
-            rewards,
-            terminations,
-            truncations,
-            infos,
-        ) = self.env.step(action)
-        assert isinstance(
-            infos, dict
-        ), f"`info` dtype is {type(infos)} while supported dtype is `dict`. This may be due to usage of other wrappers in the wrong order."
-        self.episode_returns += rewards
-        self.episode_lengths += 1
-
-        # CHANGE: The discounted returns are also computed here
-        self.disc_episode_returns += rewards * np.repeat(self.gamma**self.episode_lengths, self.reward_dim).reshape(
-            self.episode_returns.shape
-        )
-
-        dones = np.logical_or(terminations, truncations)
-        num_dones = np.sum(dones)
-        if num_dones:
-            if "episode" in infos or "_episode" in infos:
-                raise ValueError("Attempted to add episode stats when they already exist")
-            else:
-                episode_return = np.zeros(self.rewards_shape, dtype=np.float32)
-                disc_episode_return = np.zeros(self.rewards_shape, dtype=np.float32)
-                if self.is_vector_env:
-                    for i in range(self.num_envs):
-                        if dones[i]:
-                            # CHANGE: Makes a deepcopy to avoid subsequent mutations
-                            episode_return[i] = deepcopy(self.episode_returns[i])
-                            disc_episode_return[i] = deepcopy(self.disc_episode_returns[i])
-                else:
-                    episode_return = deepcopy(self.episode_returns)
-                    disc_episode_return = deepcopy(self.disc_episode_returns)
-
-                length_eps = np.where(dones, self.episode_lengths, 0)
-                time_eps = np.where(
-                    dones,
-                    np.round(time.perf_counter() - self.episode_start_times, 6),
-                    0.0,
-                )
-
-                infos["episode"] = {
-                    "r": episode_return,
-                    "dr": disc_episode_return,
-                    "l": length_eps[0] if not self.is_vector_env else length_eps,
-                    "t": time_eps[0] if not self.is_vector_env else time_eps,
-                }
-                if self.is_vector_env:
-                    infos["_episode"] = np.where(dones, True, False)
-            self.return_queue.extend(self.episode_returns[dones])
-            self.length_queue.extend(self.episode_lengths[dones])
-            self.episode_count += num_dones
-            self.episode_lengths[dones] = 0
-            self.episode_returns[dones] = np.zeros(self.reward_dim, dtype=np.float32)
-            self.disc_episode_returns[dones] = np.zeros(self.reward_dim, dtype=np.float32)
-            self.episode_start_times[dones] = time.perf_counter()
-        return (
-            observations,
-            rewards,
-            terminations,
-            truncations,
-            infos,
-        )
-
-
-class MOMaxAndSkipObservation(gym.Wrapper):
-    """This wrapper will return only every ``skip``-th frame (frameskipping) and return the max between the two last observations.
-
-    Note: This wrapper is based on the wrapper from stable-baselines3: https://stable-baselines3.readthedocs.io/en/master/_modules/stable_baselines3/common/atari_wrappers.html#MaxAndSkipEnv
-    """
-
-    def __init__(self, env: gym.Env[ObsType, ActType], skip: int = 4):
-        """This wrapper will return only every ``skip``-th frame (frameskipping) and return the max between the two last frames.
-
-        Args:
-            env (Env): The environment to apply the wrapper
-            skip: The number of frames to skip
-        """
-        gym.Wrapper.__init__(self, env)
-
-        if not np.issubdtype(type(skip), np.integer):
-            raise TypeError(f"The skip is expected to be an integer, actual type: {type(skip)}")
-        if skip < 2:
-            raise ValueError(f"The skip value needs to be equal or greater than two, actual value: {skip}")
-        if env.observation_space.shape is None:
-            raise ValueError("The observation space must have the shape attribute.")
-
-        self._skip = skip
-        self._obs_buffer = np.zeros((2, *env.observation_space.shape), dtype=env.observation_space.dtype)
-
-    def step(self, action):
-        """Step the environment with the given action for ``skip`` steps.
-
-        Repeat action, sum reward, and max over last observations.
-
-        Args:
-            action: The action to step through the environment with
-        Returns:
-            Max of the last two observations, reward, terminated, truncated, and info from the environment
-        """
-        total_reward = np.zeros(self.env.unwrapped.reward_dim, dtype=np.float32)
-        terminated = truncated = False
-        info = {}
-        for i in range(self._skip):
-            obs, reward, terminated, truncated, info = self.env.step(action)
-            done = terminated or truncated
-            if i == self._skip - 2:
-                self._obs_buffer[0] = obs
-            if i == self._skip - 1:
-                self._obs_buffer[1] = obs
-            total_reward += reward
-            if done:
-                break
-        max_frame = self._obs_buffer.max(axis=0)
-
-        return max_frame, total_reward, terminated, truncated, info
diff --git a/mo_gymnasium/wrappers/__init__.py b/mo_gymnasium/wrappers/__init__.py
new file mode 100644
index 00000000..274241a0
--- /dev/null
+++ b/mo_gymnasium/wrappers/__init__.py
@@ -0,0 +1,10 @@
+"""Contains all wrappers (vectors or not)."""
+
+from mo_gymnasium.wrappers import vector
+from mo_gymnasium.wrappers.wrappers import (
+    LinearReward,
+    MOClipReward,
+    MOMaxAndSkipObservation,
+    MONormalizeReward,
+    MORecordEpisodeStatistics,
+)
diff --git a/mo_gymnasium/wrappers/vector/__init__.py b/mo_gymnasium/wrappers/vector/__init__.py
new file mode 100644
index 00000000..60225b17
--- /dev/null
+++ b/mo_gymnasium/wrappers/vector/__init__.py
@@ -0,0 +1,6 @@
+"""Vector wrappers."""
+
+from mo_gymnasium.wrappers.vector.wrappers import (
+    MORecordEpisodeStatistics,
+    MOSyncVectorEnv,
+)
diff --git a/mo_gymnasium/wrappers/vector/wrappers.py b/mo_gymnasium/wrappers/vector/wrappers.py
new file mode 100644
index 00000000..6028061d
--- /dev/null
+++ b/mo_gymnasium/wrappers/vector/wrappers.py
@@ -0,0 +1,227 @@
+"""Vector wrappers."""
+
+import time
+from copy import deepcopy
+from typing import Any, Dict, Iterator, Tuple
+
+import gymnasium as gym
+import numpy as np
+from gymnasium.core import ActType, ObsType
+from gymnasium.vector import SyncVectorEnv
+from gymnasium.vector.utils import concatenate, iterate
+from gymnasium.vector.vector_env import ArrayType, VectorEnv
+from gymnasium.wrappers.vector import RecordEpisodeStatistics
+
+
+class MOSyncVectorEnv(SyncVectorEnv):
+    """Vectorized environment that serially runs multiple environments.
+
+    Example:
+        >>> import mo_gymnasium as mo_gym
+
+        >>> envs = mo_gym.wrappers.vector.MOSyncVectorEnv([
+        ...     lambda: mo_gym.make("deep-sea-treasure-v0") for _ in range(4)
+        ... ])
+        >>> envs
+        MOSyncVectorEnv(num_envs=4)
+        >>> obs, infos = envs.reset()
+        >>> obs
+        array([[0, 0], [0, 0], [0, 0], [0, 0]], dtype=int32)
+        >>> _ = envs.action_space.seed(42)
+        >>> actions = envs.action_space.sample()
+        >>> obs, rewards, terminateds, truncateds, infos = envs.step([0, 1, 2, 3])
+        >>> obs
+        array([[0, 0], [1, 0], [0, 0], [0, 3]], dtype=int32)
+        >>> rewards
+        array([[0., -1.], [0.7, -1.], [0., -1.], [0., -1.]], dtype=float32)
+        >>> terminateds
+        array([False,  True, False, False])
+    """
+
+    def __init__(
+        self,
+        env_fns: Iterator[callable],
+        copy: bool = True,
+    ):
+        """Vectorized environment that serially runs multiple environments.
+
+        Args:
+            env_fns: env constructors
+            copy: If ``True``, then the :meth:`reset` and :meth:`step` methods return a copy of the observations.
+        """
+        SyncVectorEnv.__init__(self, env_fns, copy=copy)
+        # Just overrides the rewards memory to add the number of objectives
+        self.reward_space = self.envs[0].unwrapped.reward_space
+        self._rewards = np.zeros(
+            (
+                self.num_envs,
+                self.reward_space.shape[0],
+            ),
+            dtype=np.float32,
+        )
+
+    def step(self, actions: ActType) -> Tuple[ObsType, ArrayType, ArrayType, ArrayType, Dict[str, Any]]:
+        """Steps through each of the environments returning the batched results.
+
+        Returns:
+            The batched environment step results
+        """
+        actions = iterate(self.action_space, actions)
+
+        observations, infos = [], {}
+        for i, action in enumerate(actions):
+            if self._autoreset_envs[i]:
+                env_obs, env_info = self.envs[i].reset()
+
+                self._rewards[i] = np.zeros(self.reward_space.shape[0])  # This overrides Gymnasium's implem
+                self._terminations[i] = False
+                self._truncations[i] = False
+            else:
+                (
+                    env_obs,
+                    self._rewards[i],
+                    self._terminations[i],
+                    self._truncations[i],
+                    env_info,
+                ) = self.envs[
+                    i
+                ].step(action)
+
+            observations.append(env_obs)
+            infos = self._add_info(infos, env_info, i)
+
+        # Concatenate the observations
+        self._observations = concatenate(self.single_observation_space, observations, self._observations)
+        self._autoreset_envs = np.logical_or(self._terminations, self._truncations)
+
+        return (
+            deepcopy(self._observations) if self.copy else self._observations,
+            np.copy(self._rewards),
+            np.copy(self._terminations),
+            np.copy(self._truncations),
+            infos,
+        )
+
+
+class MORecordEpisodeStatistics(RecordEpisodeStatistics):
+    """This wrapper will keep track of cumulative rewards and episode lengths.
+
+    At the end of any episode within the vectorized env, the statistics of the episode
+    will be added to ``info`` using the key ``episode``, and the ``_episode`` key
+    is used to indicate the environment index which has a terminated or truncated episode.
+
+     For a vectorized environments the output will be in the form of (be careful to first wrap the env into vector before applying MORewordStatistics)::
+
+        >>> infos = { # doctest: +SKIP
+        ...     "episode": {
+        ...         "r": "<array of cumulative reward for each done sub-environment (2d array, shape (num_envs, dim_reward))>",
+        ...         "dr": "<array of discounted reward for each done sub-environment (2d array, shape (num_envs, dim_reward))>",
+        ...         "l": "<array of episode length for each done sub-environment (array)>",
+        ...         "t": "<array of elapsed time since beginning of episode for each done sub-environment (array)>"
+        ...     },
+        ...     "_episode": "<boolean array of length num-envs>"
+        ... }
+
+    Moreover, the most recent rewards and episode lengths are stored in buffers that can be accessed via
+    :attr:`wrapped_env.return_queue` and :attr:`wrapped_env.length_queue` respectively.
+
+    Attributes:
+        return_queue: The cumulative rewards of the last ``deque_size``-many episodes
+        length_queue: The lengths of the last ``deque_size``-many episodes
+    """
+
+    def __init__(
+        self,
+        env: VectorEnv,
+        gamma: float = 1.0,
+        buffer_length: int = 100,
+        stats_key: str = "episode",
+    ):
+        """This wrapper will keep track of cumulative rewards and episode lengths.
+
+        Args:
+            env (Env): The environment to apply the wrapper
+            gamma: The discount factor
+            buffer_length: The size of the buffers :attr:`return_queue`, :attr:`length_queue` and :attr:`time_queue`
+            stats_key: The info key to save the data
+        """
+        gym.utils.RecordConstructorArgs.__init__(self, buffer_length=buffer_length, stats_key=stats_key)
+        RecordEpisodeStatistics.__init__(self, env, buffer_length=buffer_length, stats_key=stats_key)
+        self.disc_episode_returns = None
+        self.reward_dim = self.env.unwrapped.reward_space.shape[0]
+        self.rewards_shape = (self.num_envs, self.reward_dim)
+        self.gamma = gamma
+
+    def reset(self, **kwargs):
+        """Resets the environment using kwargs and resets the episode returns and lengths."""
+        obs, info = super().reset(**kwargs)
+
+        # CHANGE: Here we just override the standard implementation to extend to MO
+        self.episode_returns = np.zeros(self.rewards_shape, dtype=np.float32)
+        self.disc_episode_returns = np.zeros(self.rewards_shape, dtype=np.float32)
+
+        return obs, info
+
+    def step(self, actions: ActType) -> Tuple[ObsType, ArrayType, ArrayType, ArrayType, Dict[str, Any]]:
+        """Steps through the environment, recording the episode statistics."""
+        (
+            observations,
+            rewards,
+            terminations,
+            truncations,
+            infos,
+        ) = self.env.step(actions)
+
+        assert isinstance(
+            infos, dict
+        ), f"`vector.RecordEpisodeStatistics` requires `info` type to be `dict`, its actual type is {type(infos)}. This may be due to usage of other wrappers in the wrong order."
+
+        self.episode_returns[self.prev_dones] = 0
+        self.episode_lengths[self.prev_dones] = 0
+        self.episode_start_times[self.prev_dones] = time.perf_counter()
+        self.episode_returns[~self.prev_dones] += rewards[~self.prev_dones]
+
+        # CHANGE: The discounted returns are also computed here
+        self.disc_episode_returns += rewards * np.repeat(self.gamma**self.episode_lengths, self.reward_dim).reshape(
+            self.episode_returns.shape
+        )
+        self.episode_lengths[~self.prev_dones] += 1
+
+        self.prev_dones = dones = np.logical_or(terminations, truncations)
+        num_dones = np.sum(dones)
+        if num_dones:
+            if self._stats_key in infos or f"_{self._stats_key}" in infos:
+                raise ValueError(f"Attempted to add episode stats when they already exist, info keys: {list(infos.keys())}")
+            else:
+                # CHANGE to handle the vectorial reward and do deepcopies
+                episode_return = np.zeros(self.rewards_shape, dtype=np.float32)
+                disc_episode_return = np.zeros(self.rewards_shape, dtype=np.float32)
+
+                for i in range(self.num_envs):
+                    if dones[i]:
+                        episode_return[i] = np.copy(self.episode_returns[i])
+                        disc_episode_return[i] = np.copy(self.disc_episode_returns[i])
+
+                episode_time_length = np.round(time.perf_counter() - self.episode_start_times, 6)
+                infos[self._stats_key] = {
+                    "r": episode_return,
+                    "dr": disc_episode_return,
+                    "l": np.where(dones, self.episode_lengths, 0),
+                    "t": np.where(dones, episode_time_length, 0.0),
+                }
+                infos[f"_{self._stats_key}"] = dones
+
+            self.episode_count += num_dones
+
+            for i in np.where(dones):
+                self.time_queue.extend(episode_time_length[i])
+                self.return_queue.extend(self.episode_returns[i])
+                self.length_queue.extend(self.episode_lengths[i])
+
+        return (
+            observations,
+            rewards,
+            terminations,
+            truncations,
+            infos,
+        )
diff --git a/mo_gymnasium/wrappers/wrappers.py b/mo_gymnasium/wrappers/wrappers.py
new file mode 100644
index 00000000..f7830865
--- /dev/null
+++ b/mo_gymnasium/wrappers/wrappers.py
@@ -0,0 +1,305 @@
+"""Wrappers."""
+
+import time
+from copy import deepcopy
+from typing import Tuple, TypeVar
+
+import gymnasium as gym
+import numpy as np
+from gymnasium.wrappers.common import RecordEpisodeStatistics
+from gymnasium.wrappers.utils import RunningMeanStd
+
+
+ObsType = TypeVar("ObsType")
+ActType = TypeVar("ActType")
+
+
+class LinearReward(gym.Wrapper, gym.utils.RecordConstructorArgs):
+    """Makes the env return a scalar reward, which is the dot-product between the reward vector and the weight vector."""
+
+    def __init__(self, env: gym.Env, weight: np.ndarray = None):
+        """Makes the env return a scalar reward, which is the dot-product between the reward vector and the weight vector.
+
+        Args:
+            env: env to wrap
+            weight: weight vector to use in the dot product
+        """
+        gym.utils.RecordConstructorArgs.__init__(self, weight=weight)
+        gym.Wrapper.__init__(self, env)
+        if weight is None:
+            weight = np.ones(shape=env.unwrapped.reward_space.shape)
+        self.set_weight(weight)
+
+    def set_weight(self, weight: np.ndarray):
+        """Changes weights for the scalarization.
+
+        Args:
+            weight: new weights to set
+        Returns: nothing
+        """
+        assert weight.shape == self.env.unwrapped.reward_space.shape, "Reward weight has different shape than reward vector."
+        self.w = weight
+
+    def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, dict]:
+        """Steps in the environment.
+
+        Args:
+            action: action to perform
+        Returns: obs, scalarized_reward, terminated, truncated, info
+        """
+        observation, reward, terminated, truncated, info = self.env.step(action)
+        scalar_reward = np.dot(reward, self.w)
+        info["vector_reward"] = reward
+        info["reward_weights"] = self.w
+
+        return observation, scalar_reward, terminated, truncated, info
+
+
+class MONormalizeReward(gym.Wrapper, gym.utils.RecordConstructorArgs):
+    """Wrapper to normalize the reward component at index idx. Does not touch other reward components.
+
+    This code is heavily inspired on Gymnasium's except that it extracts the reward component at given idx, normalizes it, and reinjects it.
+
+    (!) This smoothes the moving average of the reward, which can be useful for training stability. But it does not "normalize" the reward in the sense of making it have a mean of 0 and a standard deviation of 1.
+
+    Example:
+        >>> import mo_gymnasium as mo_gym
+        >>> from mo_gymnasium.wrappers import MONormalizeReward
+        >>> env = mo_gym.make("deep-sea-treasure-v0")
+        >>> norm_treasure_env = MONormalizeReward(env, idx=0)
+        >>> both_norm_env = MONormalizeReward(norm_treasure_env, idx=1)
+        >>> both_norm_env.reset() # This one normalizes both rewards
+
+    """
+
+    def __init__(self, env: gym.Env, idx: int, gamma: float = 0.99, epsilon: float = 1e-8):
+        """This wrapper will normalize immediate rewards s.t. their exponential moving average has a fixed variance.
+
+        Args:
+            env (env): The environment to apply the wrapper
+            idx (int): the index of the reward to normalize
+            epsilon (float): A stability parameter
+            gamma (float): The discount factor that is used in the exponential moving average.
+        """
+        gym.utils.RecordConstructorArgs.__init__(self, idx=idx, gamma=gamma, epsilon=epsilon)
+        gym.Wrapper.__init__(self, env)
+        self.idx = idx
+        self.return_rms = RunningMeanStd(shape=())
+        self.discounted_reward: np.array = np.array([0.0])
+        self.gamma = gamma
+        self.epsilon = epsilon
+        self._update_running_mean = True
+
+    @property
+    def update_running_mean(self) -> bool:
+        """Property to freeze/continue the running mean calculation of the reward statistics."""
+        return self._update_running_mean
+
+    @update_running_mean.setter
+    def update_running_mean(self, setting: bool):
+        """Sets the property to freeze/continue the running mean calculation of the reward statistics."""
+        self._update_running_mean = setting
+
+    def step(self, action: ActType):
+        """Steps through the environment, normalizing the rewards returned.
+
+        Args:
+            action: action to perform
+        Returns: obs, normalized_rewards, terminated, truncated, infos
+        """
+        obs, rews, terminated, truncated, infos = self.env.step(action)
+        # Extracts the objective value to normalize
+        to_normalize = rews[self.idx]
+
+        self.discounted_reward = self.discounted_reward * self.gamma * (1 - terminated) + float(to_normalize)
+        if self._update_running_mean:
+            self.return_rms.update(self.discounted_reward)
+
+        # We don't (reward - self.return_rms.mean) see https://github.com/openai/baselines/issues/538
+        normalized_reward = to_normalize / np.sqrt(self.return_rms.var + self.epsilon)
+
+        # Injecting the normalized objective value back into the reward vector
+        rews[self.idx] = normalized_reward
+        return obs, rews, terminated, truncated, infos
+
+
+class MOClipReward(gym.RewardWrapper, gym.utils.RecordConstructorArgs):
+    """Clip reward[idx] to [min, max]."""
+
+    def __init__(self, env: gym.Env, idx: int, min_r, max_r):
+        """Clip reward[idx] to [min, max].
+
+        Args:
+            env: environment to wrap
+            idx: index of the MO reward to clip
+            min_r: min reward
+            max_r: max reward
+        """
+        gym.utils.RecordConstructorArgs.__init__(self, idx=idx, min_r=min_r, max_r=max_r)
+        gym.RewardWrapper.__init__(self, env)
+        self.idx = idx
+        self.min_r = min_r
+        self.max_r = max_r
+
+    def reward(self, reward):
+        """Clips the reward at the given index.
+
+        Args:
+            reward: reward to clip.
+        Returns: the clipped reward.
+        """
+        reward[self.idx] = np.clip(reward[self.idx], self.min_r, self.max_r)
+        return reward
+
+
+class MORecordEpisodeStatistics(RecordEpisodeStatistics, gym.utils.RecordConstructorArgs):
+    """This wrapper will keep track of cumulative rewards and episode lengths.
+
+    After the completion of an episode, ``info`` will look like this::
+
+        >>> info = {
+        ...     "episode": {
+        ...         "r": "<cumulative reward (array)>",
+        ...         "dr": "<discounted reward (array)>",
+        ...         "l": "<episode length (scalar)>",
+        ...         "t": "<elapsed time since beginning of episode (scalar)>"
+        ...     },
+        ... }
+    """
+
+    def __init__(
+        self,
+        env: gym.Env,
+        gamma: float = 1.0,
+        buffer_length: int = 100,
+        stats_key: str = "episode",
+    ):
+        """This wrapper will keep track of cumulative rewards and episode lengths.
+
+        Args:
+            env (Env): The environment to apply the wrapper
+            gamma (float): Discounting factor
+            buffer_length: The size of the buffers :attr:`return_queue`, :attr:`length_queue` and :attr:`time_queue`
+            stats_key: The info key for the episode statistics
+        """
+        gym.utils.RecordConstructorArgs.__init__(self, gamma=gamma, buffer_length=buffer_length, stats_key=stats_key)
+        RecordEpisodeStatistics.__init__(self, env, buffer_length=buffer_length, stats_key=stats_key)
+        # CHANGE: Here we just override the standard implementation to extend to MO
+        self.reward_dim = self.env.unwrapped.reward_space.shape[0]
+        self.rewards_shape = (self.reward_dim,)
+        self.gamma = gamma
+
+    def step(self, action):
+        """Steps through the environment, recording the episode statistics."""
+        # This is very close the code from the RecordEpisodeStatistics wrapper from Gymnasium.
+        (
+            observation,
+            rewards,
+            terminated,
+            truncated,
+            info,
+        ) = self.env.step(action)
+        assert isinstance(
+            info, dict
+        ), f"`info` dtype is {type(info)} while supported dtype is `dict`. This may be due to usage of other wrappers in the wrong order."
+        self.episode_returns += rewards
+
+        # CHANGE: The discounted returns are also computed here
+        self.disc_episode_returns += rewards * np.repeat(self.gamma**self.episode_lengths, self.reward_dim).reshape(
+            self.episode_returns.shape
+        )
+        self.episode_lengths += 1
+
+        if terminated or truncated:
+            assert self._stats_key not in info
+
+            episode_time_length = round(time.perf_counter() - self.episode_start_time, 6)
+
+            # Make a deepcopy to void subsequent mutation of the numpy array
+            episode_returns = deepcopy(self.episode_returns)
+            disc_episode_returns = deepcopy(self.disc_episode_returns)
+
+            info["episode"] = {
+                "r": episode_returns,
+                "dr": disc_episode_returns,
+                "l": self.episode_lengths,
+                "t": episode_time_length,
+            }
+
+            self.time_queue.append(episode_time_length)
+            self.return_queue.append(episode_returns)
+            self.length_queue.append(self.episode_lengths)
+
+            self.episode_count += 1
+            self.episode_start_time = time.perf_counter()
+
+        return (
+            observation,
+            rewards,
+            terminated,
+            truncated,
+            info,
+        )
+
+    def reset(self, **kwargs):
+        """Resets the environment using kwargs and resets the episode returns and lengths."""
+        obs, info = super().reset(**kwargs)
+
+        # CHANGE: Here we just override the standard implementation to extend to MO
+        self.episode_returns = np.zeros(self.rewards_shape, dtype=np.float32)
+        self.disc_episode_returns = np.zeros(self.rewards_shape, dtype=np.float32)
+
+        return obs, info
+
+
+class MOMaxAndSkipObservation(gym.Wrapper):
+    """This wrapper will return only every ``skip``-th frame (frameskipping) and return the max between the two last observations.
+
+    Note: This wrapper is based on the wrapper from stable-baselines3: https://stable-baselines3.readthedocs.io/en/master/_modules/stable_baselines3/common/atari_wrappers.html#MaxAndSkipEnv
+    """
+
+    def __init__(self, env: gym.Env[ObsType, ActType], skip: int = 4):
+        """This wrapper will return only every ``skip``-th frame (frameskipping) and return the max between the two last frames.
+
+        Args:
+            env (Env): The environment to apply the wrapper
+            skip: The number of frames to skip
+        """
+        gym.Wrapper.__init__(self, env)
+
+        if not np.issubdtype(type(skip), np.integer):
+            raise TypeError(f"The skip is expected to be an integer, actual type: {type(skip)}")
+        if skip < 2:
+            raise ValueError(f"The skip value needs to be equal or greater than two, actual value: {skip}")
+        if env.observation_space.shape is None:
+            raise ValueError("The observation space must have the shape attribute.")
+
+        self._skip = skip
+        self._obs_buffer = np.zeros((2, *env.observation_space.shape), dtype=env.observation_space.dtype)
+
+    def step(self, action):
+        """Step the environment with the given action for ``skip`` steps.
+
+        Repeat action, sum reward, and max over last observations.
+
+        Args:
+            action: The action to step through the environment with
+        Returns:
+            Max of the last two observations, reward, terminated, truncated, and info from the environment
+        """
+        total_reward = np.zeros(self.env.unwrapped.reward_dim, dtype=np.float32)
+        terminated = truncated = False
+        info = {}
+        for i in range(self._skip):
+            obs, reward, terminated, truncated, info = self.env.step(action)
+            done = terminated or truncated
+            if i == self._skip - 2:
+                self._obs_buffer[0] = obs
+            if i == self._skip - 1:
+                self._obs_buffer[1] = obs
+            total_reward += reward
+            if done:
+                break
+        max_frame = self._obs_buffer.max(axis=0)
+
+        return max_frame, total_reward, terminated, truncated, info
diff --git a/pyproject.toml b/pyproject.toml
index 160f2b53..d4b42d55 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,7 @@ readme = "README.md"
 requires-python = ">= 3.8"
 authors = [{ name = "Farama Foundation", email = "contact@farama.org" }]
 license = { text = "MIT License" }
-keywords = ["Reinforcement Learning", "Multi-Objective", "RL", "AI", "gymnasium"]
+keywords = ["Reinforcement Learning", "Multi-Objective", "RL", "AI", "Gymnasium"]
 classifiers = [
     "Development Status :: 4 - Beta",  # change to `5 - Production/Stable` when ready
     "License :: OSI Approved :: MIT License",
@@ -22,8 +22,8 @@ classifiers = [
     'Topic :: Scientific/Engineering :: Artificial Intelligence',
 ]
 dependencies = [
-    "gymnasium>=0.28.1,<0.30",
-    "numpy >=1.21.0",
+    "gymnasium >=1.0.0",
+    "numpy >=1.21.0,<2.0",
     "pygame >=2.1.0",
     "scipy >=1.7.3",
     "pymoo >=0.6.0",
@@ -49,7 +49,7 @@ all = [
     "imageio >=2.14.1",
     "mujoco >=2.2.0",
     # highway
-    "highway-env >= 1.8",
+    "highway-env >= 1.9.1",
     # box2d
     "box2d-py ==2.3.5",
     "pygame ==2.1.3.dev8",
@@ -73,12 +73,12 @@ include = ["mo_gymnasium", "mo_gymnasium.*"]
 mo_gymnasium = [
     "**/*.json",
     "**/assets/*",
+    "py.typed"
 ]
 
 # Linters and Test tools #######################################################
 
 [tool.black]
-safe = true
 line-length = 127
 target-version = ['py38', 'py39', 'py310', 'py311']
 include = '\.pyi?$'
diff --git a/tests/test_envs.py b/tests/test_envs.py
index 28af4b0c..568cc6b6 100644
--- a/tests/test_envs.py
+++ b/tests/test_envs.py
@@ -14,6 +14,7 @@
 for env_spec in gym.envs.registry.values():
     if type(env_spec.entry_point) is not str:
         continue
+
     # collect MO Gymnasium envs
     if env_spec.entry_point.split(".")[0] == "mo_gymnasium":
         all_testing_env_specs.append(env_spec)
@@ -27,7 +28,7 @@
 def test_all_env_api(spec):
     """Check that all environments pass the environment checker."""
     env = mo_gym.make(spec.id)
-    env = mo_gym.LinearReward(env)
+    env = mo_gym.wrappers.LinearReward(env)
     check_env(env, skip_render_check=True)
     _test_reward_bounds(env.unwrapped)
     _test_pickle_env(env)
@@ -46,7 +47,7 @@ def test_all_env_passive_env_checker(spec):
     [
         ("MountainCar-v0", "mo-mountaincar-v0"),
         ("MountainCarContinuous-v0", "mo-mountaincarcontinuous-v0"),
-        ("LunarLander-v2", "mo-lunar-lander-v2"),
+        ("LunarLander-v3", "mo-lunar-lander-v3"),
         # ("Reacher-v4", "mo-reacher-v4"),  # use a different model and action space
         ("Hopper-v4", "mo-hopper-v4"),
         ("HalfCheetah-v4", "mo-halfcheetah-v4"),
@@ -58,7 +59,7 @@ def test_all_env_passive_env_checker(spec):
 )
 def test_gymnasium_equivalence(gym_id, mo_gym_id, num_steps=100, seed=123):
     env = gym.make(gym_id)
-    mo_env = mo_gym.LinearReward(mo_gym.make(mo_gym_id))
+    mo_env = mo_gym.wrappers.LinearReward(mo_gym.make(mo_gym_id))
 
     # for float rewards, then precision becomes an issue
     env = gym.wrappers.TransformReward(env, lambda reward: round(reward, 4))
@@ -93,8 +94,8 @@ def test_env_determinism_rollout(env_spec: EnvSpec):
 
     env_1 = mo_gym.make(env_spec.id)
     env_2 = mo_gym.make(env_spec.id)
-    env_1 = mo_gym.LinearReward(env_1)
-    env_2 = mo_gym.LinearReward(env_2)
+    env_1 = mo_gym.wrappers.LinearReward(env_1)
+    env_2 = mo_gym.wrappers.LinearReward(env_2)
 
     initial_obs_1, initial_info_1 = env_1.reset(seed=SEED)
     initial_obs_2, initial_info_2 = env_2.reset(seed=SEED)
@@ -156,7 +157,7 @@ def assert_equals(a, b, prefix=None):
         b: second data structure
         prefix: prefix for failed assertion message for types and dicts
     """
-    assert type(a) == type(b), f"{prefix}Differing types: {a} and {b}"
+    assert type(a) is type(b), f"{prefix}Differing types: {a} and {b}"
     if isinstance(a, dict):
         assert list(a.keys()) == list(b.keys()), f"{prefix}Key sets differ: {a} and {b}"
 
@@ -190,7 +191,7 @@ def test_ccs_dst():
         np.array([19.778, -17.383]),
     ]
 
-    discounted_front = env.pareto_front(gamma=0.99)
+    discounted_front = env.unwrapped.pareto_front(gamma=0.99)
     for desired, actual in zip(known_ccs, discounted_front):
         np.testing.assert_array_almost_equal(desired, actual, decimal=2)
 
@@ -200,7 +201,7 @@ def test_ccs_dst_no_discount():
 
     known_ccs = mo_gym.envs.deep_sea_treasure.deep_sea_treasure.CONVEX_FRONT
 
-    discounted_front = env.pareto_front(gamma=1.0)
+    discounted_front = env.unwrapped.pareto_front(gamma=1.0)
     for desired, actual in zip(known_ccs, discounted_front):
         np.testing.assert_array_almost_equal(desired, actual, decimal=2)
 
@@ -223,7 +224,7 @@ def test_concave_pf_dst():
         np.array([124.0 * gamma**18, -17.383]),
     ]
 
-    discounted_front = env.pareto_front(gamma=0.99)
+    discounted_front = env.unwrapped.pareto_front(gamma=0.99)
     for desired, actual in zip(known_pf, discounted_front):
         np.testing.assert_array_almost_equal(desired, actual, decimal=2)
 
@@ -233,7 +234,7 @@ def test_concave_pf_dst_no_discount():
 
     known_pf = mo_gym.envs.deep_sea_treasure.deep_sea_treasure.CONCAVE_FRONT
 
-    discounted_front = env.pareto_front(gamma=1.0)
+    discounted_front = env.unwrapped.pareto_front(gamma=1.0)
     for desired, actual in zip(known_pf, discounted_front):
         np.testing.assert_array_almost_equal(desired, actual, decimal=2)
 
@@ -244,7 +245,7 @@ def test_pf_fruit_tree():
 
     known_pf = np.array(mo_gym.envs.fruit_tree.fruit_tree.FRUITS[str(depth)]) * (0.99 ** (depth - 1))
 
-    discounted_front = env.pareto_front(gamma=0.99)
+    discounted_front = env.unwrapped.pareto_front(gamma=0.99)
     for desired, actual in zip(known_pf, discounted_front):
         np.testing.assert_array_almost_equal(desired, actual, decimal=2)
 
@@ -255,6 +256,6 @@ def test_pf_fruit_tree_no_discount():
 
     known_pf = mo_gym.envs.fruit_tree.fruit_tree.FRUITS[str(depth)]
 
-    discounted_front = env.pareto_front(gamma=1.0)
+    discounted_front = env.unwrapped.pareto_front(gamma=1.0)
     for desired, actual in zip(known_pf, discounted_front):
         np.testing.assert_array_almost_equal(desired, actual, decimal=2)
diff --git a/tests/test_vector_wrappers.py b/tests/test_vector_wrappers.py
new file mode 100644
index 00000000..d57d7567
--- /dev/null
+++ b/tests/test_vector_wrappers.py
@@ -0,0 +1,89 @@
+import gymnasium as gym
+import numpy as np
+
+import mo_gymnasium as mo_gym
+from mo_gymnasium.wrappers.vector import MORecordEpisodeStatistics, MOSyncVectorEnv
+
+
+def test_mo_sync_wrapper():
+    num_envs = 3
+    envs = MOSyncVectorEnv([lambda: mo_gym.make("deep-sea-treasure-v0") for _ in range(num_envs)])
+
+    envs.reset()
+    obs, rewards, terminateds, truncateds, infos = envs.step(envs.action_space.sample())
+    assert len(obs) == num_envs, "Number of observations do not match the number of envs"
+    assert len(rewards) == num_envs, "Number of rewards do not match the number of envs"
+    assert len(terminateds) == num_envs, "Number of terminateds do not match the number of envs"
+    assert len(truncateds) == num_envs, "Number of truncateds do not match the number of envs"
+    envs.close()
+
+
+def test_mo_sync_autoreset():
+    num_envs = 2
+    envs = MOSyncVectorEnv([lambda: mo_gym.make("deep-sea-treasure-v0") for _ in range(num_envs)])
+
+    obs, infos = envs.reset()
+    assert (obs[0] == [0, 0]).all()
+    assert (obs[1] == [0, 0]).all()
+    obs, rewards, terminateds, truncateds, infos = envs.step([0, 1])
+    assert (obs[0] == [0, 0]).all()
+    assert (obs[1] == [1, 0]).all()
+    # Use np assert almost equal to avoid floating point errors
+    np.testing.assert_almost_equal(rewards[0], np.array([0.0, -1.0], dtype=np.float32), decimal=2)
+    np.testing.assert_almost_equal(rewards[1], np.array([0.7, -1.0], dtype=np.float32), decimal=2)
+    assert not terminateds[0]
+    assert terminateds[1]  # This one is done
+    assert not truncateds[0]
+    assert not truncateds[1]
+    obs, rewards, terminateds, truncateds, infos = envs.step([0, 1])
+    assert (obs[0] == [0, 0]).all()
+    assert (obs[1] == [0, 0]).all()
+    assert (rewards[0] == [0.0, -1.0]).all()
+    assert (rewards[1] == [0.0, 0.0]).all()  # Reset step
+    assert not terminateds[0]
+    assert not terminateds[1]  # Not done anymore
+    envs.close()
+
+
+def test_mo_record_ep_statistic_vector_env():
+    num_envs = 2
+    envs = MOSyncVectorEnv([lambda: mo_gym.make("deep-sea-treasure-v0") for _ in range(num_envs)])
+    envs = MORecordEpisodeStatistics(envs, gamma=0.97)
+
+    envs.reset()
+    terminateds = np.array([False] * num_envs)
+    info = {}
+    obs, rewards, terminateds, _, info = envs.step([0, 3])
+    obs, rewards, terminateds, _, info = envs.step([0, 1])
+    obs, rewards, terminateds, _, info = envs.step([0, 1])
+
+    assert isinstance(info["episode"]["r"], np.ndarray)
+    assert isinstance(info["episode"]["dr"], np.ndarray)
+    # Episode records are vectorized because multiple environments
+    assert info["episode"]["r"].shape == (num_envs, 2)
+    np.testing.assert_almost_equal(info["episode"]["r"][0], np.array([0.0, 0.0], dtype=np.float32), decimal=2)
+    np.testing.assert_almost_equal(info["episode"]["r"][1], np.array([8.2, -3.0], dtype=np.float32), decimal=2)
+    assert info["episode"]["dr"].shape == (num_envs, 2)
+    np.testing.assert_almost_equal(info["episode"]["dr"][0], np.array([0.0, 0.0], dtype=np.float32), decimal=2)
+    np.testing.assert_almost_equal(info["episode"]["dr"][1], np.array([7.72, -2.91], dtype=np.float32), decimal=2)
+    assert isinstance(info["episode"]["l"], np.ndarray)
+    np.testing.assert_almost_equal(info["episode"]["l"], np.array([0, 3], dtype=np.float32), decimal=2)
+    assert isinstance(info["episode"]["t"], np.ndarray)
+    envs.close()
+
+
+def test_gym_wrapper_and_vector():
+    # This tests the integration of gym-wrapped envs with MO-Gymnasium vectorized envs
+    num_envs = 2
+    envs = MOSyncVectorEnv(
+        [lambda: gym.wrappers.NormalizeObservation(mo_gym.make("deep-sea-treasure-v0")) for _ in range(num_envs)]
+    )
+
+    envs.reset()
+    for i in range(30):
+        obs, rewards, terminateds, truncateds, infos = envs.step(envs.action_space.sample())
+    assert len(obs) == num_envs, "Number of observations do not match the number of envs"
+    assert len(rewards) == num_envs, "Number of rewards do not match the number of envs"
+    assert len(terminateds) == num_envs, "Number of terminateds do not match the number of envs"
+    assert len(truncateds) == num_envs, "Number of truncateds do not match the number of envs"
+    envs.close()
diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py
index 9cf42354..df2ded4a 100644
--- a/tests/test_wrappers.py
+++ b/tests/test_wrappers.py
@@ -1,11 +1,10 @@
 import numpy as np
 
 import mo_gymnasium as mo_gym
-from mo_gymnasium import (
+from mo_gymnasium.wrappers import (
     MOClipReward,
     MONormalizeReward,
     MORecordEpisodeStatistics,
-    MOSyncVectorEnv,
 )
 
 
@@ -14,35 +13,42 @@ def go_to_8_3(env):
     Goes to (8.2, -3) treasure, returns the rewards
     """
     env.reset()
-    env.step(3)  # right
-    env.step(1)  # down
-    _, rewards, _, _, infos = env.step(1)
+    env.step(3)  # action: right, rewards: [0, -1]
+    env.step(1)  # action: down, rewards: [0, -1]
+    _, rewards, _, _, infos = env.step(1)  # action: down, rewards: [8.2, -1]
     return rewards, infos
 
 
 def test_normalization_wrapper():
+    # Watch out that the wrapper does not normalize the rewards to have a mean of 0 and std of 1
+    # instead it smoothens the moving average of the rewards
     env = mo_gym.make("deep-sea-treasure-v0")
     norm_treasure_env = MONormalizeReward(env, idx=0)
     both_norm_env = MONormalizeReward(norm_treasure_env, idx=1)
 
+    # No normalization
+    env.reset(seed=0)
+    _, rewards, _, _, _ = env.step(1)
+    np.testing.assert_almost_equal(rewards, [0.7, -1.0], decimal=2)
+
     # Tests for both rewards normalized
     for i in range(30):
         go_to_8_3(both_norm_env)
-    both_norm_env.reset()
+    both_norm_env.reset(seed=0)
     _, rewards, _, _, _ = both_norm_env.step(1)  # down
-    np.testing.assert_allclose(rewards, [0.18, -1.24], rtol=0, atol=1e-2)
+    np.testing.assert_almost_equal(rewards, [0.5, -1.24], decimal=2)
     rewards, _ = go_to_8_3(both_norm_env)
-    np.testing.assert_allclose(rewards, [2.13, -1.24], rtol=0, atol=1e-2)
+    np.testing.assert_almost_equal(rewards, [4.73, -1.24], decimal=2)
 
     # Tests for only treasure normalized
     for i in range(30):
         go_to_8_3(norm_treasure_env)
-    norm_treasure_env.reset()
+    norm_treasure_env.reset(seed=0)
     _, rewards, _, _, _ = norm_treasure_env.step(1)  # down
     # Time rewards are not normalized (-1)
-    np.testing.assert_allclose(rewards, [0.18, -1.0], rtol=0, atol=1e-2)
+    np.testing.assert_almost_equal(rewards, [0.51, -1.0], decimal=2)
     rewards, _ = go_to_8_3(norm_treasure_env)
-    np.testing.assert_allclose(rewards, [2.13, -1.0], rtol=0, atol=1e-2)
+    np.testing.assert_almost_equal(rewards, [5.33, -1.0], decimal=2)
 
 
 def test_clip_wrapper():
@@ -66,26 +72,6 @@ def test_clip_wrapper():
     np.testing.assert_allclose(rewards, [0.5, -1.0], rtol=0, atol=1e-2)
 
 
-def test_mo_sync_wrapper():
-    def make_env(env_id):
-        def thunk():
-            env = mo_gym.make(env_id)
-            env = MORecordEpisodeStatistics(env, gamma=0.97)
-            return env
-
-        return thunk
-
-    num_envs = 3
-    envs = MOSyncVectorEnv([make_env("deep-sea-treasure-v0") for _ in range(num_envs)])
-
-    envs.reset()
-    obs, rewards, terminateds, truncateds, infos = envs.step(envs.action_space.sample())
-    assert len(obs) == num_envs, "Number of observations do not match the number of envs"
-    assert len(rewards) == num_envs, "Number of rewards do not match the number of envs"
-    assert len(terminateds) == num_envs, "Number of terminateds do not match the number of envs"
-    assert len(truncateds) == num_envs, "Number of truncateds do not match the number of envs"
-
-
 def test_mo_record_ep_statistic():
     env = mo_gym.make("deep-sea-treasure-v0")
     env = MORecordEpisodeStatistics(env, gamma=0.97)
@@ -98,37 +84,9 @@ def test_mo_record_ep_statistic():
     assert info["episode"]["r"].shape == (2,)
     assert info["episode"]["dr"].shape == (2,)
     assert tuple(info["episode"]["r"]) == (np.float32(8.2), np.float32(-3.0))
-    assert tuple(np.round(info["episode"]["dr"], 2)) == (
-        np.float32(7.48),
-        np.float32(-2.82),
-    )
-    assert isinstance(info["episode"]["l"], np.int32)
+    np.testing.assert_allclose(info["episode"]["dr"], [7.71538, -2.9109], rtol=0, atol=1e-2)
+    # 0 * 0.97**0 + 0 * 0.97**1 + 8.2 * 0.97**2 == 7.71538
+    # -1 * 0.97**0 + -1 * 0.97**1 + -1 * 0.97**2 == -2.9109
+    assert isinstance(info["episode"]["l"], int)
     assert info["episode"]["l"] == 3
-    assert isinstance(info["episode"]["t"], np.float32)
-
-
-def test_mo_record_ep_statistic_vector_env():
-    def make_env(env_id):
-        def thunk():
-            env = mo_gym.make(env_id)
-            return env
-
-        return thunk
-
-    num_envs = 3
-    envs = MOSyncVectorEnv([make_env("deep-sea-treasure-v0") for _ in range(num_envs)])
-    envs = MORecordEpisodeStatistics(envs)
-
-    envs.reset()
-    terminateds = np.array([False] * num_envs)
-    info = {}
-    while not np.any(terminateds):
-        obs, rewards, terminateds, _, info = envs.step(envs.action_space.sample())
-
-    assert isinstance(info["episode"]["r"], np.ndarray)
-    assert isinstance(info["episode"]["dr"], np.ndarray)
-    # Episode records are vectorized because multiple environments
-    assert info["episode"]["r"].shape == (num_envs, 2)
-    assert info["episode"]["dr"].shape == (num_envs, 2)
-    assert isinstance(info["episode"]["l"], np.ndarray)
-    assert isinstance(info["episode"]["t"], np.ndarray)
+    assert isinstance(info["episode"]["t"], float)