From 6c8e8c1f8f65dbbb28d1fce7ab3a4f62c6a7a8a6 Mon Sep 17 00:00:00 2001
From: Elliot Tower <elliot@elliottower.com>
Date: Thu, 25 Jan 2024 13:55:38 -0500
Subject: [PATCH 01/21] Remove unused ansi rendering code from tictactoe, use
 kwargs (#1171)

---
 pettingzoo/classic/tictactoe/tictactoe.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index 436d0b3cb..45d357b6f 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -99,11 +99,8 @@ def get_font(path, size):
     return font
 
 
-def env(render_mode=None):
-    internal_render_mode = render_mode if render_mode != "ansi" else "human"
-    env = raw_env(render_mode=internal_render_mode)
-    if render_mode == "ansi":
-        env = wrappers.CaptureStdoutWrapper(env)
+def env(**kwargs):
+    env = raw_env(**kwargs)
     env = wrappers.TerminateIllegalWrapper(env, illegal_reward=-1)
     env = wrappers.AssertOutOfBoundsWrapper(env)
     env = wrappers.OrderEnforcingWrapper(env)

From 3f8f1bee8513581fa7d58f0e92a418b4b5c1532b Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Wed, 14 Feb 2024 10:34:12 -0500
Subject: [PATCH 02/21] Correct obs type in parallel_rps doc example (#1170)

---
 docs/code_examples/parallel_rps.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/docs/code_examples/parallel_rps.py b/docs/code_examples/parallel_rps.py
index 383659666..bf634af10 100644
--- a/docs/code_examples/parallel_rps.py
+++ b/docs/code_examples/parallel_rps.py
@@ -1,6 +1,7 @@
 import functools
 
 import gymnasium
+import numpy as np
 from gymnasium.spaces import Discrete
 
 from pettingzoo import ParallelEnv
@@ -9,7 +10,7 @@
 ROCK = 0
 PAPER = 1
 SCISSORS = 2
-NONE = 3
+NO_MOVE = 3
 MOVES = ["ROCK", "PAPER", "SCISSORS", "None"]
 NUM_ITERS = 100
 REWARD_MAP = {
@@ -83,6 +84,7 @@ def __init__(self, render_mode=None):
     @functools.lru_cache(maxsize=None)
     def observation_space(self, agent):
         # gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/
+        # Discrete(4) means an integer in range(0, 4)
         return Discrete(4)
 
     # Action space should be defined here.
@@ -128,7 +130,8 @@ def reset(self, seed=None, options=None):
         """
         self.agents = self.possible_agents[:]
         self.num_moves = 0
-        observations = {agent: NONE for agent in self.agents}
+        # the observations should be numpy arrays even if there is only one value
+        observations = {agent: np.array(NO_MOVE) for agent in self.agents}
         infos = {agent: {} for agent in self.agents}
         self.state = observations
 
@@ -161,9 +164,11 @@ def step(self, actions):
         env_truncation = self.num_moves >= NUM_ITERS
         truncations = {agent: env_truncation for agent in self.agents}
 
-        # current observation is just the other player's most recent action
+        # Current observation is just the other player's most recent action
+        # This is converted to a numpy value of type int to match the type
+        # that we declared in observation_space()
         observations = {
-            self.agents[i]: int(actions[self.agents[1 - i]])
+            self.agents[i]: np.array(actions[self.agents[1 - i]], dtype=np.int64)
             for i in range(len(self.agents))
         }
         self.state = observations

From 616fc86eb45f61be8985da7eafc8f2ad1c53c0d6 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Thu, 15 Feb 2024 16:25:07 -0500
Subject: [PATCH 03/21] Add compatibility with pytest version 8 (#1177)

---
 docs/content/basic_usage.md                   |  3 ++-
 docs/content/environment_tests.md             |  8 +++++--
 docs/environments/atari.md                    |  5 ++++-
 docs/index.md                                 | 21 ++++++++++++-------
 .../knights_archers_zombies.py                |  1 +
 pettingzoo/utils/deprecated_module.py         | 14 +++++++++++--
 pyproject.toml                                | 15 +++++++------
 tutorials/AgileRL/requirements.txt            |  2 +-
 8 files changed, 47 insertions(+), 22 deletions(-)

diff --git a/docs/content/basic_usage.md b/docs/content/basic_usage.md
index 75110efce..ad7589919 100644
--- a/docs/content/basic_usage.md
+++ b/docs/content/basic_usage.md
@@ -148,5 +148,6 @@ In certain cases, separating agent from environment actions is helpful for study
 Environments are by default wrapped in a handful of lightweight wrappers that handle error messages and ensure reasonable behavior given incorrect usage (i.e. playing illegal moves or stepping before resetting). However, these add a very small amount of overhead. If you want to create an environment without them, you can do so by using the `raw_env()` constructor contained within each module:
 
 ``` python
-env = knights_archers_zombies_v10.raw_env(<environment parameters>)
+environment_parameters = {}  # any parameters to pass to the environment
+env = knights_archers_zombies_v10.raw_env(**environment_parameters)
 ```
diff --git a/docs/content/environment_tests.md b/docs/content/environment_tests.md
index 369ffe98f..a7cfd1d46 100644
--- a/docs/content/environment_tests.md
+++ b/docs/content/environment_tests.md
@@ -80,10 +80,14 @@ render_test(env_func)
 The render test method takes in an optional argument `custom_tests` that allows for additional tests in non-standard modes.
 
 ``` python
+from pettingzoo.test import render_test
+from pettingzoo.butterfly import pistonball_v6
+env_func = pistonball_v6.env
+
 custom_tests = {
-    "svg": lambda render_result: return isinstance(render_result, str)
+    "svg": lambda render_result: isinstance(render_result, str)
 }
-render_test(env, custom_tests=custom_tests)
+render_test(env_func, custom_tests=custom_tests)
 ```
 
 ## Performance Benchmark Test
diff --git a/docs/environments/atari.md b/docs/environments/atari.md
index a00a55cfa..b3f31734e 100644
--- a/docs/environments/atari.md
+++ b/docs/environments/atari.md
@@ -121,7 +121,10 @@ env = supersuit.frame_stack_v1(env, 4)
 All the Atari environments have the following environment parameters:
 
 ``` python
-<atari_game>.env(obs_type='rgb_image', full_action_space=True, max_cycles=100000, auto_rom_install_path=None)
+# using space invaders as an example, but replace with any atari game
+from pettingzoo.atari import space_invaders_v2
+
+space_invaders_v2.env(obs_type='rgb_image', full_action_space=True, max_cycles=100000, auto_rom_install_path=None)
 ```
 
 `obs_type`:  There are three possible values for this parameter:
diff --git a/docs/index.md b/docs/index.md
index 154d283bf..d5d0694ba 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -78,11 +78,18 @@ The [AEC API](/api/aec/) supports sequential turn based environments, while the
 Environments can be interacted with using a similar interface to [Gymnasium](https://gymnasium.farama.org):
 
 ```python
-  from pettingzoo.butterfly import knights_archers_zombies_v10
-  env = knights_archers_zombies_v10.env(render_mode="human")
-  env.reset(seed=42)
-  for agent in env.agent_iter():
-      observation, reward, termination, truncation, info = env.last()
-      action = policy(observation, agent)
-      env.step(action)
+from pettingzoo.butterfly import knights_archers_zombies_v10
+env = knights_archers_zombies_v10.env(render_mode="human")
+env.reset(seed=42)
+
+for agent in env.agent_iter():
+    observation, reward, termination, truncation, info = env.last()
+
+    if termination or truncation:
+        action = None
+    else:
+        # this is where you would insert your policy
+        action = env.action_space(agent).sample()
+
+    env.step(action)
 ```
diff --git a/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py b/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py
index f81381a5c..0f21753e6 100644
--- a/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py
+++ b/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py
@@ -132,6 +132,7 @@
   vector_state=True,
   use_typemasks=False,
   sequence_space=False,
+)
 ```
 
 `spawn_rate`:  how many cycles before a new zombie is spawned. A lower number means zombies are spawned at a higher rate.
diff --git a/pettingzoo/utils/deprecated_module.py b/pettingzoo/utils/deprecated_module.py
index 423ac5e6b..49d7b4537 100644
--- a/pettingzoo/utils/deprecated_module.py
+++ b/pettingzoo/utils/deprecated_module.py
@@ -37,7 +37,17 @@ def deprecated_handler(
         # It wasn't able to find this module
         # You should do your deprecation notice here.
         if not is_env(env_name):
-            raise ImportError(f"cannot import name '{env_name}' from '{module_name}'")
+            # Although this seems like an import error, it needs to be an
+            # AttributeError because it is the failure to find the
+            # 'env_name' attribute in module_name.
+            # The distinction is important because this function is used in
+            # a __getattr__() function to get modules. Raising an error
+            # other than AttributeError will break the default value handling
+            # in a call like: getattr(obj, "key", default="value")
+            # Pytest uses that and will fail if this isn't an AttributeError
+            raise AttributeError(
+                f"cannot import name '{env_name}' from '{module_name}'"
+            )
         name, version = env_name.rsplit("_v")
 
         for loader, alt_env_name, is_pkg in pkgutil.iter_modules(module_path):
@@ -47,7 +57,7 @@ def deprecated_handler(
                     if int(alt_version) > int(version):
                         return DeprecatedModule(name, version, alt_version)
                     else:
-                        raise ImportError(
+                        raise AttributeError(
                             f"cannot import name '{env_name}' from '{module_name}'"
                         )
 
diff --git a/pyproject.toml b/pyproject.toml
index 5c59c9654..c0160ab17 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,14 +43,13 @@ mpe = ["pygame==2.3.0"]
 sisl = ["pygame==2.3.0", "pymunk==6.2.0", "box2d-py==2.3.5", "scipy>=1.4.1"]
 other = ["pillow>=8.0.1"]
 testing = [
-    "pynput",
-    "pytest",
-    "AutoROM",
-    "pytest",
-    "pytest-cov",
-    "pytest-xdist",
-    "pre-commit",
-    "pytest-markdown-docs"
+    "pynput==1.7.6",
+    "pytest==8.0.0",
+    "AutoROM==0.6.1",
+    "pytest-cov==4.1.0",
+    "pytest-xdist==3.5.0",
+    "pre-commit==3.5.0",
+    "pytest-markdown-docs==0.5.0"
 ]
 all = [
     "multi_agent_ale_py==0.1.11",
diff --git a/tutorials/AgileRL/requirements.txt b/tutorials/AgileRL/requirements.txt
index 13459f060..4a57a7af2 100644
--- a/tutorials/AgileRL/requirements.txt
+++ b/tutorials/AgileRL/requirements.txt
@@ -1,4 +1,4 @@
-agilerl>=0.1.16
+agilerl==0.1.19
 pettingzoo[classic,atari,mpe]>=1.23.1
 SuperSuit>=3.9.0
 torch>=2.0.1

From 9f8c1a8ac49219319c2d3d2772297ea567083f21 Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Mon, 26 Feb 2024 18:29:12 +0200
Subject: [PATCH 04/21] `custom_environment.index.md`: typo fix parallel API
 link  (#1183)

---
 docs/tutorials/custom_environment/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tutorials/custom_environment/index.md b/docs/tutorials/custom_environment/index.md
index 77c9a8b5e..7799b3214 100644
--- a/docs/tutorials/custom_environment/index.md
+++ b/docs/tutorials/custom_environment/index.md
@@ -14,7 +14,7 @@ These tutorials walk you though the full process of creating a custom environmen
 
 4. [Testing Your Environment](/tutorials/custom_environment/4-testing-your-environment.md)
 
-For a simpler example environment, including both [AEC](/api/aec/) and [Parallel](/api/aec/) implementations, see our [Environment Creation](/content/environment_creation/) documentation.
+For a simpler example environment, including both [AEC](/api/aec/) and [Parallel](/api/parallel/) implementations, see our [Environment Creation](/content/environment_creation/) documentation.
 
 
 ```{toctree}

From e812151005c747709d80bc87cedf462ff732c2d3 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Mon, 26 Feb 2024 11:30:14 -0500
Subject: [PATCH 05/21] Remove unneeded Maze Craze warning (#1164)

---
 pettingzoo/atari/maze_craze/maze_craze.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/pettingzoo/atari/maze_craze/maze_craze.py b/pettingzoo/atari/maze_craze/maze_craze.py
index 5c3b77a87..88cd4d710 100644
--- a/pettingzoo/atari/maze_craze/maze_craze.py
+++ b/pettingzoo/atari/maze_craze/maze_craze.py
@@ -85,7 +85,6 @@
 """
 
 import os
-import warnings
 from glob import glob
 
 from pettingzoo.atari.base_atari_env import (
@@ -102,10 +101,6 @@
 
 
 def raw_env(game_version="robbers", visibilty_level=0, **kwargs):
-    if game_version == "robbers" and visibilty_level == 0:
-        warnings.warn(
-            "maze_craze has different versions of the game via the `game_version` argument, consider overriding."
-        )
     assert (
         game_version in avaliable_versions
     ), f"`game_version` parameter must be one of {avaliable_versions.keys()}"

From dffd61ab4a3317c16e09d1d2e19ba3574ecf3003 Mon Sep 17 00:00:00 2001
From: Nick Ustaran-Anderegg <47857277+nicku-a@users.noreply.github.com>
Date: Wed, 13 Mar 2024 16:17:40 +0000
Subject: [PATCH 06/21] Update agilerl version and simplified CNN definition
 (#1182)

Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com>
---
 tutorials/AgileRL/agilerl_maddpg.py | 7 ++++---
 tutorials/AgileRL/agilerl_matd3.py  | 3 ++-
 tutorials/AgileRL/requirements.txt  | 3 ++-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/tutorials/AgileRL/agilerl_maddpg.py b/tutorials/AgileRL/agilerl_maddpg.py
index 902d6125f..14a93f6e1 100644
--- a/tutorials/AgileRL/agilerl_maddpg.py
+++ b/tutorials/AgileRL/agilerl_maddpg.py
@@ -23,8 +23,8 @@
     NET_CONFIG = {
         "arch": "cnn",  # Network architecture
         "h_size": [32, 32],  # Network hidden size
-        "c_size": [3, 32],  # CNN channel size
-        "k_size": [(1, 3, 3), (1, 3, 3)],  # CNN kernel size
+        "c_size": [32, 32],  # CNN channel size
+        "k_size": [3, 3],  # CNN kernel size
         "s_size": [2, 2],  # CNN stride size
         "normalize": True,  # Normalize image from range [0,255] to [0,1]
     }
@@ -36,7 +36,8 @@
         # Swap image channels dimension from last to first [H, W, C] -> [C, H, W]
         "CHANNELS_LAST": True,
         "BATCH_SIZE": 8,  # Batch size
-        "LR": 0.01,  # Learning rate
+        "LR_ACTOR": 0.001,  # Actor learning rate
+        "LR_CRITIC": 0.01,  # Critic learning rate
         "GAMMA": 0.95,  # Discount factor
         "MEMORY_SIZE": 10000,  # Max memory buffer size
         "LEARN_STEP": 5,  # Learning frequency
diff --git a/tutorials/AgileRL/agilerl_matd3.py b/tutorials/AgileRL/agilerl_matd3.py
index f44db84bb..46aefb5bf 100644
--- a/tutorials/AgileRL/agilerl_matd3.py
+++ b/tutorials/AgileRL/agilerl_matd3.py
@@ -31,7 +31,8 @@
         # Swap image channels dimension from last to first [H, W, C] -> [C, H, W]
         "CHANNELS_LAST": False,
         "BATCH_SIZE": 32,  # Batch size
-        "LR": 0.01,  # Learning rate
+        "LR_ACTOR": 0.001,  # Actor learning rate
+        "LR_CRITIC": 0.01,  # Critic learning rate
         "GAMMA": 0.95,  # Discount factor
         "MEMORY_SIZE": 100000,  # Max memory buffer size
         "LEARN_STEP": 5,  # Learning frequency
diff --git a/tutorials/AgileRL/requirements.txt b/tutorials/AgileRL/requirements.txt
index 4a57a7af2..d5eb552e4 100644
--- a/tutorials/AgileRL/requirements.txt
+++ b/tutorials/AgileRL/requirements.txt
@@ -1,4 +1,5 @@
-agilerl==0.1.19
+agilerl>=0.1.21; python_version >= '3.9'
+agilerl==0.1.20; python_version < '3.9'
 pettingzoo[classic,atari,mpe]>=1.23.1
 SuperSuit>=3.9.0
 torch>=2.0.1

From 9185ac91269241f9a8e06638284785ebc0b264ab Mon Sep 17 00:00:00 2001
From: Elliot Tower <elliot@elliottower.com>
Date: Wed, 13 Mar 2024 12:29:56 -0400
Subject: [PATCH 07/21] Fix obs space documentation in simple_adversary.py
 (#1189)

---
 pettingzoo/mpe/simple_adversary/simple_adversary.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pettingzoo/mpe/simple_adversary/simple_adversary.py b/pettingzoo/mpe/simple_adversary/simple_adversary.py
index 9f2e18c88..674790c38 100644
--- a/pettingzoo/mpe/simple_adversary/simple_adversary.py
+++ b/pettingzoo/mpe/simple_adversary/simple_adversary.py
@@ -28,7 +28,7 @@
 target landmark, but negatively rewarded based on how close the adversary is to the target landmark. The adversary is rewarded based on distance to the target, but it doesn't know which landmark is the target landmark. All rewards are unscaled Euclidean distance (see main MPE documentation for
 average distance). This means good agents have to learn to 'split up' and cover all landmarks to deceive the adversary.
 
-Agent observation space: `[self_pos, self_vel, goal_rel_position, landmark_rel_position, other_agent_rel_positions]`
+Agent observation space: `[goal_rel_position, landmark_rel_position, other_agent_rel_positions]`
 
 Adversary observation space: `[landmark_rel_position, other_agents_rel_positions]`
 

From 4c90946a98bc860d9fa8afac5cec080a9ba5afbe Mon Sep 17 00:00:00 2001
From: Elliot Tower <elliot@elliottower.com>
Date: Wed, 13 Mar 2024 16:11:58 -0400
Subject: [PATCH 08/21] Add comments about necessary linux packages in README
 (#1191)

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index ca0ee9949..395c9cbe9 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,8 @@ To install the dependencies for one family, use `pip install 'pettingzoo[atari]'
 
 We support Python 3.8, 3.9, 3.10 and 3.11 on Linux and macOS. We will accept PRs related to Windows, but do not officially support it.
 
+Note: Some Linux distributions may require manual installation of `cmake`, `swig`, or `zlib1g-dev` (e.g., `sudo apt install cmake swig zlib1g-dev`)
+
 ## Getting started
 
 For an introduction to PettingZoo, see [Basic Usage](https://pettingzoo.farama.org/content/basic_usage/). To create a new environment, see our [Environment Creation Tutorial](https://pettingzoo.farama.org/tutorials/custom_environment/1-project-structure/) and [Custom Environment Examples](https://pettingzoo.farama.org/content/environment_creation/).

From 66097633f51510959b2c0212b9054ab9eff08cdd Mon Sep 17 00:00:00 2001
From: Elliot Tower <elliot@elliottower.com>
Date: Thu, 14 Mar 2024 12:19:45 -0400
Subject: [PATCH 09/21] Fix MacOS pygame no hardware accelerated device error
 (#1190)

---
 .github/workflows/macos-test.yml | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/macos-test.yml b/.github/workflows/macos-test.yml
index b89be000f..82e7a3d15 100644
--- a/.github/workflows/macos-test.yml
+++ b/.github/workflows/macos-test.yml
@@ -3,25 +3,33 @@ name: MacOS tests
 
 on:
     push:
-        branches: [master]
+        branches: [none]
 
 permissions:
     contents: read
 
 jobs:
     macos-test:
-        runs-on: macos-11
+        runs-on: ${{ matrix.os }}
+        strategy:
+            matrix:
+            # Big Sur, Monterey
+                os: [macos-11, macos-12]
+                python-version: ['3.8', '3.9', '3.10', '3.11']
         steps:
             - uses: actions/checkout@v4
-            - name: Set up Python 3.11
+            - name: Set up Python ${{ matrix.python-version }}
               uses: actions/setup-python@v4
               with:
-                  python-version: 3.11
+                  python-version: ${{ matrix.python-version }}
             - name: Install dependencies
               run: |
                   pip install -e .[all]
                   pip install -e .[testing]
                   AutoROM -v
+            - name: Set dummy SDL video driver
+              run: |
+                  export SDL_VIDEODRIVER=dummy
             - name: Full Python tests
               run: |
                   pytest -v --cov=pettingzoo --cov-report term

From b19c02c8734921e4478c53fc994cb344d5631de2 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Wed, 20 Mar 2024 02:02:51 -0400
Subject: [PATCH 10/21] Pin AgileRL version (#1193)

---
 tutorials/AgileRL/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/AgileRL/requirements.txt b/tutorials/AgileRL/requirements.txt
index d5eb552e4..dbdd050a0 100644
--- a/tutorials/AgileRL/requirements.txt
+++ b/tutorials/AgileRL/requirements.txt
@@ -1,4 +1,4 @@
-agilerl>=0.1.21; python_version >= '3.9'
+agilerl==0.1.21; python_version >= '3.9'
 agilerl==0.1.20; python_version < '3.9'
 pettingzoo[classic,atari,mpe]>=1.23.1
 SuperSuit>=3.9.0

From 0cdf49eeda99d07bb524278869484456deecd107 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Fri, 22 Mar 2024 10:57:02 -0400
Subject: [PATCH 11/21] Fix test to handle nested observation dicts (#1172)

---
 pettingzoo/test/api_test.py | 52 ++++++++++++++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 7 deletions(-)

diff --git a/pettingzoo/test/api_test.py b/pettingzoo/test/api_test.py
index f8718579c..558123b6c 100644
--- a/pettingzoo/test/api_test.py
+++ b/pettingzoo/test/api_test.py
@@ -3,6 +3,7 @@
 import re
 import warnings
 from collections import defaultdict
+from typing import Any
 
 import gymnasium
 import numpy as np
@@ -383,6 +384,46 @@ def test_rewards_terminations_truncations(env, agent_0):
         test_reward(env.rewards[agent])
 
 
+def _test_observation_space_compatibility(
+    expected: gymnasium.spaces.Space[Any],
+    seen: gymnasium.spaces.Space[Any] | dict,
+    recursed_keys: list[str],
+) -> None:
+    """Ensure observation's dtypes are same as in observation_space.
+
+    This tests that the dtypes of the spaces are the same.
+    The function will recursively check observation dicts to ensure that
+    all components have the same dtype as declared in the observation space.
+
+    Args:
+        expected: Observation space that is expected.
+        seen: The observation actually seen.
+        recursed_keys: A list of all the dict keys that led to the current
+          observations. This enables a more helpful error message if
+          an assert fails. The initial call should have an empty list.
+    """
+    if isinstance(expected, gymnasium.spaces.Dict):
+        for key in expected.keys():
+            if not recursed_keys and key != "observation":
+                # For the top level, we only care about the 'observation' key.
+                continue
+            # We know a dict is expected. Anything else is an error.
+            assert isinstance(
+                seen, dict
+            ), f"observation at [{']['.join(recursed_keys)}] is {seen.dtype}, but expected dict."
+
+            # note: a previous test (expected.contains(seen)) ensures that
+            # the two dicts have the same keys.
+            _test_observation_space_compatibility(
+                expected[key], seen[key], recursed_keys + [key]
+            )
+    else:
+        # done recursing, now the actual space types should match
+        assert (
+            expected.dtype == seen.dtype
+        ), f"dtype for observation at [{']['.join(recursed_keys)}] is {seen.dtype}, but observation space specifies {expected.dtype}."
+
+
 def play_test(env, observation_0, num_cycles):
     """
     plays through environment and does dynamic checks to make
@@ -466,13 +507,10 @@ def play_test(env, observation_0, num_cycles):
             prev_observe
         ), "Out of bounds observation: " + str(prev_observe)
 
-        if isinstance(env.observation_space(agent), gymnasium.spaces.Box):
-            assert env.observation_space(agent).dtype == prev_observe.dtype
-        elif isinstance(env.observation_space(agent), gymnasium.spaces.Dict):
-            assert (
-                env.observation_space(agent)["observation"].dtype
-                == prev_observe["observation"].dtype
-            )
+        _test_observation_space_compatibility(
+            env.observation_space(agent), prev_observe, recursed_keys=[]
+        )
+
         test_observation(prev_observe, observation_0, str(env.unwrapped))
         if not isinstance(env.infos[env.agent_selection], dict):
             warnings.warn(

From e93fd91e733c02f4b65916489c4f88bce40b27f0 Mon Sep 17 00:00:00 2001
From: Michael Pratt <118982716+mikepratt1@users.noreply.github.com>
Date: Tue, 26 Mar 2024 22:41:10 +0000
Subject: [PATCH 12/21] Agilerl updates (#1196)

---
 .github/workflows/linux-tutorials-test.yml  | 27 ++++++++++++++++++++-
 docs/tutorials/agilerl/DQN.md               |  8 +++---
 tutorials/AgileRL/agilerl_dqn_curriculum.py |  8 +++---
 tutorials/AgileRL/agilerl_maddpg.py         |  8 +++---
 tutorials/AgileRL/agilerl_matd3.py          |  2 +-
 tutorials/AgileRL/requirements.txt          |  3 +--
 6 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/linux-tutorials-test.yml b/.github/workflows/linux-tutorials-test.yml
index 858203d13..f74a9b3c5 100644
--- a/.github/workflows/linux-tutorials-test.yml
+++ b/.github/workflows/linux-tutorials-test.yml
@@ -17,7 +17,32 @@ jobs:
             fail-fast: false
             matrix:
                 python-version: ['3.8', '3.9', '3.10', '3.11']
-                tutorial: [Tianshou, CustomEnvironment, CleanRL, SB3/kaz, SB3/waterworld, SB3/connect_four, SB3/test, AgileRL]  # TODO: fix tutorials and add back Ray
+                tutorial: [Tianshou, CustomEnvironment, CleanRL, SB3/kaz, SB3/waterworld, SB3/connect_four, SB3/test]  # TODO: fix tutorials and add back Ray
+        steps:
+            - uses: actions/checkout@v4
+            - name: Set up Python ${{ matrix.python-version }}
+              uses: actions/setup-python@v4
+              with:
+                  python-version: ${{ matrix.python-version }}
+            - name: Install dependencies and run tutorials
+              run: |
+                  sudo apt-get install python3-opengl xvfb parallel
+                  export PATH=/path/to/parallel:$PATH
+                  export root_dir=$(pwd)
+                  cd tutorials/${{ matrix.tutorial }}
+                  pip install -r requirements.txt
+                  pip uninstall -y pettingzoo
+                  pip install -e $root_dir[testing]
+                  AutoROM -v
+                  for f in *.py; do xvfb-run -a -s "-screen 0 1024x768x24" python "$f"; done
+
+    agilerl-tutorial-test:
+        runs-on: ubuntu-latest
+        strategy:
+            fail-fast: false
+            matrix:
+                python-version: ['3.9', '3.10', '3.11']
+                tutorial: [AgileRL]
         steps:
             - uses: actions/checkout@v4
             - name: Set up Python ${{ matrix.python-version }}
diff --git a/docs/tutorials/agilerl/DQN.md b/docs/tutorials/agilerl/DQN.md
index 6a7cc3731..5b701f28c 100644
--- a/docs/tutorials/agilerl/DQN.md
+++ b/docs/tutorials/agilerl/DQN.md
@@ -612,10 +612,10 @@ Before we go any further in this tutorial, it would be helpful to define and set
    # Define the network configuration
    NET_CONFIG = {
       "arch": "cnn",  # Network architecture
-      "h_size": [64, 64],  # Actor hidden size
-      "c_size": [128],  # CNN channel size
-      "k_size": [4],  # CNN kernel size
-      "s_size": [1],  # CNN stride size
+      "hidden_size": [64, 64],  # Actor hidden size
+      "channel_size": [128],  # CNN channel size
+      "kernel_size": [4],  # CNN kernel size
+      "stride_size": [1],  # CNN stride size
       "normalize": False,  # Normalize image from range [0,255] to [0,1]
    }
 
diff --git a/tutorials/AgileRL/agilerl_dqn_curriculum.py b/tutorials/AgileRL/agilerl_dqn_curriculum.py
index 1b6e86949..a56464c5f 100644
--- a/tutorials/AgileRL/agilerl_dqn_curriculum.py
+++ b/tutorials/AgileRL/agilerl_dqn_curriculum.py
@@ -494,10 +494,10 @@ def outcome(self, action, player, return_length=False):
         # Define the network configuration
         NET_CONFIG = {
             "arch": "cnn",  # Network architecture
-            "h_size": [64, 64],  # Actor hidden size
-            "c_size": [128],  # CNN channel size
-            "k_size": [4],  # CNN kernel size
-            "s_size": [1],  # CNN stride size
+            "hidden_size": [64, 64],  # Actor hidden size
+            "channel_size": [128],  # CNN channel size
+            "kernel_size": [4],  # CNN kernel size
+            "stride_size": [1],  # CNN stride size
             "normalize": False,  # Normalize image from range [0,255] to [0,1]
         }
 
diff --git a/tutorials/AgileRL/agilerl_maddpg.py b/tutorials/AgileRL/agilerl_maddpg.py
index 14a93f6e1..37e193f40 100644
--- a/tutorials/AgileRL/agilerl_maddpg.py
+++ b/tutorials/AgileRL/agilerl_maddpg.py
@@ -22,10 +22,10 @@
     # Define the network configuration
     NET_CONFIG = {
         "arch": "cnn",  # Network architecture
-        "h_size": [32, 32],  # Network hidden size
-        "c_size": [32, 32],  # CNN channel size
-        "k_size": [3, 3],  # CNN kernel size
-        "s_size": [2, 2],  # CNN stride size
+        "hidden_size": [32, 32],  # Network hidden size
+        "channel_size": [32, 32],  # CNN channel size
+        "kernel_size": [3, 3],  # CNN kernel size
+        "stride_size": [2, 2],  # CNN stride size
         "normalize": True,  # Normalize image from range [0,255] to [0,1]
     }
 
diff --git a/tutorials/AgileRL/agilerl_matd3.py b/tutorials/AgileRL/agilerl_matd3.py
index 46aefb5bf..cc6ed9009 100644
--- a/tutorials/AgileRL/agilerl_matd3.py
+++ b/tutorials/AgileRL/agilerl_matd3.py
@@ -21,7 +21,7 @@
     # Define the network configuration
     NET_CONFIG = {
         "arch": "mlp",  # Network architecture
-        "h_size": [32, 32],  # Actor hidden size
+        "hidden_size": [32, 32],  # Actor hidden size
     }
 
     # Define the initial hyperparameters
diff --git a/tutorials/AgileRL/requirements.txt b/tutorials/AgileRL/requirements.txt
index dbdd050a0..1262ee83c 100644
--- a/tutorials/AgileRL/requirements.txt
+++ b/tutorials/AgileRL/requirements.txt
@@ -1,5 +1,4 @@
-agilerl==0.1.21; python_version >= '3.9'
-agilerl==0.1.20; python_version < '3.9'
+agilerl==0.1.22; python_version >= '3.9'
 pettingzoo[classic,atari,mpe]>=1.23.1
 SuperSuit>=3.9.0
 torch>=2.0.1

From 45f7f144c183d27098a4e507cd7cd01609d7e1ac Mon Sep 17 00:00:00 2001
From: Anthony Chang <40710895+Bamboofungus@users.noreply.github.com>
Date: Sat, 30 Mar 2024 00:30:52 +0900
Subject: [PATCH 13/21]  Replace images that weren't rendering correctly with
 inline LaTeX (#1198)

---
 pettingzoo/classic/go/go.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pettingzoo/classic/go/go.py b/pettingzoo/classic/go/go.py
index 3360f520e..ac1749019 100644
--- a/pettingzoo/classic/go/go.py
+++ b/pettingzoo/classic/go/go.py
@@ -81,14 +81,14 @@
 
 |                          Action ID                           | Description                                                  |
 | :----------------------------------------------------------: | ------------------------------------------------------------ |
-| <img src="https://render.githubusercontent.com/render/math?math=0 \ldots (N-1)"> | Place a stone on the 1st row of the board.<br>_`0`: (0,0), `1`: (0,1), ..., `N-1`: (0,N-1)_ |
-| <img src="https://render.githubusercontent.com/render/math?math=N \ldots (2N- 1)"> | Place a stone on the 2nd row of the board.<br>_`N`: (1,0), `N+1`: (1,1), ..., `2N-1`: (1,N-1)_ |
+| $0 \ldots (N-1)$ | Place a stone on the 1st row of the board.<br>_`0`: (0,0), `1`: (0,1), ..., `N-1`: (0,N-1)_ |
+| $N \ldots (2N- 1)$ | Place a stone on the 2nd row of the board.<br>_`N`: (1,0), `N+1`: (1,1), ..., `2N-1`: (1,N-1)_ |
 |                             ...                              | ...                                                          |
-| <img src="https://render.githubusercontent.com/render/math?math=N^2-N \ldots N^2-1"> | Place a stone on the Nth row of the board.<br>_`N^2-N`: (N-1,0), `N^2-N+1`: (N-1,1), ..., `N^2-1`: (N-1,N-1)_ |
-| <img src="https://render.githubusercontent.com/render/math?math=N^2"> | Pass                                                         |
+| $(N^2-N) \ldots (N^2-1)$ | Place a stone on the Nth row of the board.<br>_`N^2-N`: (N-1,0), `N^2-N+1`: (N-1,1), ..., `N^2-1`: (N-1,N-1)_ |
+| $N^2$ | Pass                                                         |
 
-For example, you would use action `4` to place a stone on the board at the (0,3) location or action `N^2` to pass. You can transform a non-pass action `a` back into its 2D (x,y) coordinate by computing `(a//N, a%N)` The total action space is
-<img src="https://render.githubusercontent.com/render/math?math=N^2 %2B 1">.
+For example, you would use action `4` to place a stone on the board at the (0,3) location or action `N^2` to pass. You can transform a non-pass action `a` back into its 2D (x,y) coordinate by computing `(a//N, a%N)`. The total action space is
+$N^2+1$.
 
 ### Rewards
 

From 849414dfcba7f423f4db02bfecc7ef061cccc567 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Mon, 1 Apr 2024 13:59:15 -0400
Subject: [PATCH 14/21] Rename class agent_selector -> AgentSelector (#1194)

---
 docs/api/utils.md                             |  2 +-
 docs/code_examples/aec_rps.py                 |  6 ++---
 docs/content/environment_creation.md          |  6 ++---
 .../cooperative_pong/cooperative_pong.py      |  4 ++--
 .../knights_archers_zombies.py                |  4 ++--
 pettingzoo/butterfly/pistonball/pistonball.py |  4 ++--
 pettingzoo/classic/chess/chess.py             |  6 ++---
 .../classic/connect_four/connect_four.py      |  4 ++--
 pettingzoo/classic/go/go.py                   |  4 ++--
 pettingzoo/classic/hanabi/hanabi.py           |  4 ++--
 pettingzoo/classic/rps/rps.py                 |  4 ++--
 pettingzoo/classic/tictactoe/tictactoe.py     |  4 ++--
 pettingzoo/mpe/_mpe_utils/simple_env.py       |  4 ++--
 pettingzoo/sisl/multiwalker/multiwalker.py    |  4 ++--
 pettingzoo/sisl/pursuit/pursuit.py            |  4 ++--
 pettingzoo/sisl/waterworld/waterworld.py      |  4 ++--
 ...enerated_agents_env_action_mask_info_v0.py |  4 ++--
 ...generated_agents_env_action_mask_obs_v0.py |  4 ++--
 .../generated_agents_env_cust_agentid_v0.py   |  4 ++--
 .../example_envs/generated_agents_env_v0.py   |  4 ++--
 pettingzoo/utils/__init__.py                  |  2 +-
 pettingzoo/utils/agent_selector.py            | 22 ++++++++++++++-----
 pettingzoo/utils/conversions.py               |  6 ++---
 23 files changed, 63 insertions(+), 51 deletions(-)

diff --git a/docs/api/utils.md b/docs/api/utils.md
index abc9d01fc..0b0e319cb 100644
--- a/docs/api/utils.md
+++ b/docs/api/utils.md
@@ -165,7 +165,7 @@ Base class which is used by [CaptureStdoutWrapper](https://pettingzoo.farama.org
 
 The agent selector utility allows for easy cycling of agents in an AEC environment. At any time it can be reset or reinitialized with a new order, allowing for changes in turn order or handling a dynamic number of agents (see [Knights-Archers-Zombies](https://pettingzoo.farama.org/environments/butterfly/knights_archers_zombies/) for an example of spawning/killing agents)
 
-Note: while many PettingZoo environments use agent_selector to manage agent cycling internally, it is not intended to be used externally when interacting with an environment. Instead, use `for agent in env.agent_iter()` (see [AEC API Usage](https://pettingzoo.farama.org/api/aec/#usage)).
+Note: while many PettingZoo environments use AgentSelector to manage agent cycling internally, it is not intended to be used externally when interacting with an environment. Instead, use `for agent in env.agent_iter()` (see [AEC API Usage](https://pettingzoo.farama.org/api/aec/#usage)).
 
 ```{eval-rst}
 .. currentmodule:: pettingzoo.utils
diff --git a/docs/code_examples/aec_rps.py b/docs/code_examples/aec_rps.py
index 7ae982167..7272f75bd 100644
--- a/docs/code_examples/aec_rps.py
+++ b/docs/code_examples/aec_rps.py
@@ -5,7 +5,7 @@
 from gymnasium.spaces import Discrete
 
 from pettingzoo import AECEnv
-from pettingzoo.utils import agent_selector, wrappers
+from pettingzoo.utils import AgentSelector, wrappers
 
 ROCK = 0
 PAPER = 1
@@ -156,9 +156,9 @@ def reset(self, seed=None, options=None):
         self.observations = {agent: NONE for agent in self.agents}
         self.num_moves = 0
         """
-        Our agent_selector utility allows easy cyclic stepping through the agents list.
+        Our AgentSelector utility allows easy cyclic stepping through the agents list.
         """
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.next()
 
     def step(self, action):
diff --git a/docs/content/environment_creation.md b/docs/content/environment_creation.md
index 8b4332872..4347c49c7 100644
--- a/docs/content/environment_creation.md
+++ b/docs/content/environment_creation.md
@@ -62,14 +62,14 @@ The utils directory also contain some classes which are only helpful for develop
 
 ### Agent selector
 
-The `agent_selector` class steps through agents in a cycle
+The `AgentSelector` class steps through agents in a cycle
 
 It can be used as follows to cycle through the list of agents:
 
 ```python
-from pettingzoo.utils import agent_selector
+from pettingzoo.utils import AgentSelector
 agents = ["agent_1", "agent_2", "agent_3"]
-selector = agent_selector(agents)
+selector = AgentSelector(agents)
 agent_selection = selector.reset()
 # agent_selection will be "agent_1"
 for i in range(100):
diff --git a/pettingzoo/butterfly/cooperative_pong/cooperative_pong.py b/pettingzoo/butterfly/cooperative_pong/cooperative_pong.py
index 0751a12e7..4573769fc 100644
--- a/pettingzoo/butterfly/cooperative_pong/cooperative_pong.py
+++ b/pettingzoo/butterfly/cooperative_pong/cooperative_pong.py
@@ -79,7 +79,7 @@
 from pettingzoo.butterfly.cooperative_pong.manual_policy import ManualPolicy
 from pettingzoo.butterfly.cooperative_pong.paddle import Paddle
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 from pettingzoo.utils.conversions import parallel_wrapper_fn
 
 FPS = 15
@@ -370,7 +370,7 @@ def __init__(self, **kwargs):
 
         self.agents = self.env.agents[:]
         self.possible_agents = self.agents[:]
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.reset()
         # spaces
         self.action_spaces = dict(zip(self.agents, self.env.action_space))
diff --git a/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py b/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py
index 0f21753e6..68a9bdfdc 100644
--- a/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py
+++ b/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py
@@ -194,7 +194,7 @@
 from pettingzoo.butterfly.knights_archers_zombies.src.players import Archer, Knight
 from pettingzoo.butterfly.knights_archers_zombies.src.weapons import Arrow, Sword
 from pettingzoo.butterfly.knights_archers_zombies.src.zombie import Zombie
-from pettingzoo.utils import agent_selector, wrappers
+from pettingzoo.utils import AgentSelector, wrappers
 from pettingzoo.utils.conversions import parallel_wrapper_fn
 
 sys.dont_write_bytecode = True
@@ -370,7 +370,7 @@ def __init__(
         self.floor_patch3 = get_image(os.path.join("img", "patch3.png"))
         self.floor_patch4 = get_image(os.path.join("img", "patch4.png"))
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.reinit()
 
     def observation_space(self, agent):
diff --git a/pettingzoo/butterfly/pistonball/pistonball.py b/pettingzoo/butterfly/pistonball/pistonball.py
index b15ea2872..65415593b 100644
--- a/pettingzoo/butterfly/pistonball/pistonball.py
+++ b/pettingzoo/butterfly/pistonball/pistonball.py
@@ -89,7 +89,7 @@
 
 from pettingzoo import AECEnv
 from pettingzoo.butterfly.pistonball.manual_policy import ManualPolicy
-from pettingzoo.utils import agent_selector, wrappers
+from pettingzoo.utils import AgentSelector, wrappers
 from pettingzoo.utils.conversions import parallel_wrapper_fn
 
 _image_library = {}
@@ -180,7 +180,7 @@ def __init__(
         self.agents = ["piston_" + str(r) for r in range(self.n_pistons)]
         self.possible_agents = self.agents[:]
         self.agent_name_mapping = dict(zip(self.agents, list(range(self.n_pistons))))
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
 
         self.observation_spaces = dict(
             zip(
diff --git a/pettingzoo/classic/chess/chess.py b/pettingzoo/classic/chess/chess.py
index 5100f8fc3..81b2ccb31 100644
--- a/pettingzoo/classic/chess/chess.py
+++ b/pettingzoo/classic/chess/chess.py
@@ -116,7 +116,7 @@
 from pettingzoo import AECEnv
 from pettingzoo.classic.chess import chess_utils
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 
 
 def env(**kwargs):
@@ -144,7 +144,7 @@ def __init__(self, render_mode: str | None = None, screen_height: int | None = 8
         self.agents = [f"player_{i}" for i in range(2)]
         self.possible_agents = self.agents[:]
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
 
         self.action_spaces = {name: spaces.Discrete(8 * 8 * 73) for name in self.agents}
         self.observation_spaces = {
@@ -238,7 +238,7 @@ def reset(self, seed=None, options=None):
 
         self.board = chess.Board()
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.reset()
 
         self.rewards = {name: 0 for name in self.agents}
diff --git a/pettingzoo/classic/connect_four/connect_four.py b/pettingzoo/classic/connect_four/connect_four.py
index e2a2390e9..48ce61ce1 100644
--- a/pettingzoo/classic/connect_four/connect_four.py
+++ b/pettingzoo/classic/connect_four/connect_four.py
@@ -69,7 +69,7 @@
 
 from pettingzoo import AECEnv
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 
 
 def get_image(path):
@@ -220,7 +220,7 @@ def reset(self, seed=None, options=None):
         self.truncations = {i: False for i in self.agents}
         self.infos = {i: {} for i in self.agents}
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
 
         self.agent_selection = self._agent_selector.reset()
 
diff --git a/pettingzoo/classic/go/go.py b/pettingzoo/classic/go/go.py
index ac1749019..d9a865c67 100644
--- a/pettingzoo/classic/go/go.py
+++ b/pettingzoo/classic/go/go.py
@@ -119,7 +119,7 @@
 from pettingzoo import AECEnv
 from pettingzoo.classic.go import coords, go_base
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 
 
 def get_image(path):
@@ -191,7 +191,7 @@ def __init__(
             [spaces.Discrete(self._N * self._N + 1) for _ in range(self.num_agents)]
         )
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
 
         self.board_history = np.zeros((self._N, self._N, 16), dtype=bool)
 
diff --git a/pettingzoo/classic/hanabi/hanabi.py b/pettingzoo/classic/hanabi/hanabi.py
index bd2f7480f..bd4441401 100644
--- a/pettingzoo/classic/hanabi/hanabi.py
+++ b/pettingzoo/classic/hanabi/hanabi.py
@@ -171,7 +171,7 @@
 
 from pettingzoo import AECEnv
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 
 
 def env(**kwargs):
@@ -441,7 +441,7 @@ def reset(self, seed=None, options=None):
         self.truncations = self.hanabi_env.truncations
         self.infos = self.hanabi_env.infos
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.reset()
 
     def step(
diff --git a/pettingzoo/classic/rps/rps.py b/pettingzoo/classic/rps/rps.py
index 1b9eb6ad6..83c5abb3f 100644
--- a/pettingzoo/classic/rps/rps.py
+++ b/pettingzoo/classic/rps/rps.py
@@ -121,7 +121,7 @@
 from gymnasium.utils import EzPickle
 
 from pettingzoo import AECEnv
-from pettingzoo.utils import agent_selector, wrappers
+from pettingzoo.utils import AgentSelector, wrappers
 from pettingzoo.utils.conversions import parallel_wrapper_fn
 
 
@@ -419,7 +419,7 @@ def close(self):
 
     def reset(self, seed=None, options=None):
         self.agents = self.possible_agents[:]
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.next()
         self.rewards = {agent: 0 for agent in self.agents}
         self._cumulative_rewards = {agent: 0 for agent in self.agents}
diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index 45d357b6f..e68f900a8 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -80,7 +80,7 @@
 
 from pettingzoo import AECEnv
 from pettingzoo.classic.tictactoe.board import Board
-from pettingzoo.utils import agent_selector, wrappers
+from pettingzoo.utils import AgentSelector, wrappers
 
 
 def get_image(path):
@@ -143,7 +143,7 @@ def __init__(
         self.truncations = {i: False for i in self.agents}
         self.infos = {i: {"legal_moves": list(range(0, 9))} for i in self.agents}
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.reset()
 
         self.render_mode = render_mode
diff --git a/pettingzoo/mpe/_mpe_utils/simple_env.py b/pettingzoo/mpe/_mpe_utils/simple_env.py
index 6d420fe76..af95b64d4 100644
--- a/pettingzoo/mpe/_mpe_utils/simple_env.py
+++ b/pettingzoo/mpe/_mpe_utils/simple_env.py
@@ -9,7 +9,7 @@
 from pettingzoo import AECEnv
 from pettingzoo.mpe._mpe_utils.core import Agent
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 
 alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 
@@ -75,7 +75,7 @@ def __init__(
             agent.name: idx for idx, agent in enumerate(self.world.agents)
         }
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
 
         # set spaces
         self.action_spaces = dict()
diff --git a/pettingzoo/sisl/multiwalker/multiwalker.py b/pettingzoo/sisl/multiwalker/multiwalker.py
index 8edf250d1..30adb9fe0 100644
--- a/pettingzoo/sisl/multiwalker/multiwalker.py
+++ b/pettingzoo/sisl/multiwalker/multiwalker.py
@@ -125,7 +125,7 @@
 from pettingzoo import AECEnv
 from pettingzoo.sisl.multiwalker.multiwalker_base import FPS
 from pettingzoo.sisl.multiwalker.multiwalker_base import MultiWalkerEnv as _env
-from pettingzoo.utils import agent_selector, wrappers
+from pettingzoo.utils import AgentSelector, wrappers
 from pettingzoo.utils.conversions import parallel_wrapper_fn
 
 
@@ -156,7 +156,7 @@ def __init__(self, *args, **kwargs):
         self.agent_name_mapping = dict(
             zip(self.agents, list(range(self.env.n_walkers)))
         )
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         # spaces
         self.action_spaces = dict(zip(self.agents, self.env.action_space))
         self.observation_spaces = dict(zip(self.agents, self.env.observation_space))
diff --git a/pettingzoo/sisl/pursuit/pursuit.py b/pettingzoo/sisl/pursuit/pursuit.py
index c75728d31..c68f189bb 100644
--- a/pettingzoo/sisl/pursuit/pursuit.py
+++ b/pettingzoo/sisl/pursuit/pursuit.py
@@ -85,7 +85,7 @@
 from pettingzoo import AECEnv
 from pettingzoo.sisl.pursuit.manual_policy import ManualPolicy
 from pettingzoo.sisl.pursuit.pursuit_base import Pursuit as _env
-from pettingzoo.utils import agent_selector, wrappers
+from pettingzoo.utils import AgentSelector, wrappers
 from pettingzoo.utils.conversions import parallel_wrapper_fn
 
 __all__ = ["ManualPolicy", "env", "parallel_env", "raw_env"]
@@ -118,7 +118,7 @@ def __init__(self, *args, **kwargs):
         self.agents = ["pursuer_" + str(a) for a in range(self.env.num_agents)]
         self.possible_agents = self.agents[:]
         self.agent_name_mapping = dict(zip(self.agents, list(range(self.num_agents))))
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         # spaces
         self.n_act_agents = self.env.act_dims[0]
         self.action_spaces = dict(zip(self.agents, self.env.action_space))
diff --git a/pettingzoo/sisl/waterworld/waterworld.py b/pettingzoo/sisl/waterworld/waterworld.py
index d2de2eb21..7684d7206 100644
--- a/pettingzoo/sisl/waterworld/waterworld.py
+++ b/pettingzoo/sisl/waterworld/waterworld.py
@@ -141,7 +141,7 @@
 from pettingzoo import AECEnv
 from pettingzoo.sisl.waterworld.waterworld_base import FPS
 from pettingzoo.sisl.waterworld.waterworld_base import WaterworldBase as _env
-from pettingzoo.utils import agent_selector, wrappers
+from pettingzoo.utils import AgentSelector, wrappers
 from pettingzoo.utils.conversions import parallel_wrapper_fn
 
 
@@ -171,7 +171,7 @@ def __init__(self, *args, **kwargs):
         self.agents = ["pursuer_" + str(r) for r in range(self.env.num_agents)]
         self.possible_agents = self.agents[:]
         self.agent_name_mapping = dict(zip(self.agents, list(range(self.num_agents))))
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
 
         # spaces
         self.action_spaces = dict(zip(self.agents, self.env.action_space))
diff --git a/pettingzoo/test/example_envs/generated_agents_env_action_mask_info_v0.py b/pettingzoo/test/example_envs/generated_agents_env_action_mask_info_v0.py
index 2985a07c6..1c48d6083 100644
--- a/pettingzoo/test/example_envs/generated_agents_env_action_mask_info_v0.py
+++ b/pettingzoo/test/example_envs/generated_agents_env_action_mask_info_v0.py
@@ -5,7 +5,7 @@
 
 from pettingzoo import AECEnv
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 
 
 def env():
@@ -105,7 +105,7 @@ def reset(self, seed=None, options=None):
         for i in range(5):
             self.add_agent(self.np_random.choice(self.types))
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.reset()
 
         # seed observation and action spaces
diff --git a/pettingzoo/test/example_envs/generated_agents_env_action_mask_obs_v0.py b/pettingzoo/test/example_envs/generated_agents_env_action_mask_obs_v0.py
index b7cbf2b30..726afa6a9 100644
--- a/pettingzoo/test/example_envs/generated_agents_env_action_mask_obs_v0.py
+++ b/pettingzoo/test/example_envs/generated_agents_env_action_mask_obs_v0.py
@@ -5,7 +5,7 @@
 
 from pettingzoo import AECEnv
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 
 
 def env():
@@ -107,7 +107,7 @@ def reset(self, seed=None, options=None):
         for i in range(5):
             self.add_agent(self.np_random.choice(self.types))
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.reset()
 
         # seed observation and action spaces
diff --git a/pettingzoo/test/example_envs/generated_agents_env_cust_agentid_v0.py b/pettingzoo/test/example_envs/generated_agents_env_cust_agentid_v0.py
index 7f307d5e8..5b966b174 100644
--- a/pettingzoo/test/example_envs/generated_agents_env_cust_agentid_v0.py
+++ b/pettingzoo/test/example_envs/generated_agents_env_cust_agentid_v0.py
@@ -5,7 +5,7 @@
 
 from pettingzoo import AECEnv
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 
 
 def env():
@@ -99,7 +99,7 @@ def reset(self, seed=None, options=None):
         for i in range(5):
             self.add_agent(self.np_random.choice(self.types))
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.reset()
 
         # seed observation and action spaces
diff --git a/pettingzoo/test/example_envs/generated_agents_env_v0.py b/pettingzoo/test/example_envs/generated_agents_env_v0.py
index 28f11469b..827465382 100644
--- a/pettingzoo/test/example_envs/generated_agents_env_v0.py
+++ b/pettingzoo/test/example_envs/generated_agents_env_v0.py
@@ -5,7 +5,7 @@
 
 from pettingzoo import AECEnv
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 
 
 def env():
@@ -99,7 +99,7 @@ def reset(self, seed=None, options=None):
         for i in range(5):
             self.add_agent(self.np_random.choice(self.types))
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.reset()
 
         # seed observation and action spaces
diff --git a/pettingzoo/utils/__init__.py b/pettingzoo/utils/__init__.py
index af9445539..1d16fe76b 100644
--- a/pettingzoo/utils/__init__.py
+++ b/pettingzoo/utils/__init__.py
@@ -1,4 +1,4 @@
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 from pettingzoo.utils.average_total_reward import average_total_reward
 from pettingzoo.utils.conversions import (
     aec_to_parallel,
diff --git a/pettingzoo/utils/agent_selector.py b/pettingzoo/utils/agent_selector.py
index 0b6222990..2643b1c9a 100644
--- a/pettingzoo/utils/agent_selector.py
+++ b/pettingzoo/utils/agent_selector.py
@@ -1,16 +1,17 @@
 from __future__ import annotations
 
 from typing import Any
+from warnings import warn
 
 
-class agent_selector:
+class AgentSelector:
     """Outputs an agent in the given order whenever agent_select is called.
 
     Can reinitialize to a new order.
 
     Example:
-        >>> from pettingzoo.utils import agent_selector
-        >>> agent_selector = agent_selector(agent_order=["player1", "player2"])
+        >>> from pettingzoo.utils import AgentSelector
+        >>> agent_selector = AgentSelector(agent_order=["player1", "player2"])
         >>> agent_selector.reset()
         'player1'
         >>> agent_selector.next()
@@ -52,8 +53,8 @@ def is_first(self) -> bool:
         """Check if the current agent is the first agent in the cycle."""
         return self.selected_agent == self.agent_order[0]
 
-    def __eq__(self, other: agent_selector) -> bool:
-        if not isinstance(other, agent_selector):
+    def __eq__(self, other: AgentSelector) -> bool:
+        if not isinstance(other, AgentSelector):
             return NotImplemented
 
         return (
@@ -61,3 +62,14 @@ def __eq__(self, other: agent_selector) -> bool:
             and self._current_agent == other._current_agent
             and self.selected_agent == other.selected_agent
         )
+
+
+class agent_selector(AgentSelector):
+    """Deprecated version of AgentSelector. Use that instead."""
+
+    def __init__(self, *args, **kwargs):
+        warn(
+            "agent_selector is deprecated, please use AgentSelector",
+            DeprecationWarning,
+        )
+        super().__init__(*args, **kwargs)
diff --git a/pettingzoo/utils/conversions.py b/pettingzoo/utils/conversions.py
index 601a1fb06..7cf99f6d9 100644
--- a/pettingzoo/utils/conversions.py
+++ b/pettingzoo/utils/conversions.py
@@ -4,7 +4,7 @@
 from collections import defaultdict
 from typing import Callable, Dict, Optional
 
-from pettingzoo.utils import agent_selector
+from pettingzoo.utils import AgentSelector
 from pettingzoo.utils.env import ActionType, AECEnv, AgentID, ObsType, ParallelEnv
 from pettingzoo.utils.wrappers import OrderEnforcingWrapper
 
@@ -309,7 +309,7 @@ def reset(self, seed=None, options=None):
         self._actions: Dict[AgentID, Optional[ActionType]] = {
             agent: None for agent in self.agents
         }
-        self._agent_selector = agent_selector(self._live_agents)
+        self._agent_selector = AgentSelector(self._live_agents)
         self.agent_selection = self._agent_selector.reset()
         self.terminations = {agent: False for agent in self.agents}
         self.truncations = {agent: False for agent in self.agents}
@@ -377,7 +377,7 @@ def step(self, action: Optional[ActionType]):
             ]
 
             if len(self.env.agents):
-                self._agent_selector = agent_selector(self.env.agents)
+                self._agent_selector = AgentSelector(self.env.agents)
                 self.agent_selection = self._agent_selector.reset()
 
             self._deads_step_first()

From 6f9df27f924e8aa2906837e59b6afacdbab7d2f7 Mon Sep 17 00:00:00 2001
From: Joras Oliveira <43121361+JorasOliveira@users.noreply.github.com>
Date: Sat, 27 Apr 2024 10:59:33 -0300
Subject: [PATCH 15/21] Update third_party_envs.md (#1201)

---
 docs/environments/third_party_envs.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/environments/third_party_envs.md b/docs/environments/third_party_envs.md
index aeca31fb9..928904be9 100644
--- a/docs/environments/third_party_envs.md
+++ b/docs/environments/third_party_envs.md
@@ -57,6 +57,12 @@ CookingZoo: a gym-cooking derivative to simulate a complex cooking environment.
 
 A library for doing reinforcement learning using [Crazyflie](https://www.bitcraze.io/products/crazyflie-2-1/) drones.
 
+### [DSSE: Drone Swarm Search Environment](https://github.com/pfeinsper/drone-swarm-search)
+[![PettingZoo version dependency](https://img.shields.io/badge/PettingZoo-v1.22.3-blue)]()
+![GitHub stars](https://img.shields.io/github/stars/pfeinsper/drone-swarm-search)
+
+A single and multi-agent environment to train swarms of drones for maritime search.
+
 
 ### [PettingZoo Dilemma Envs](https://github.com/tianyu-z/pettingzoo_dilemma_envs)
 

From 38e252020d0b067b8c04f7fd9e47c2943691a184 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Fri, 3 May 2024 17:35:06 -0400
Subject: [PATCH 16/21] Fix bug in SB3 tutorial ActionMask (#1203)

---
 .../sb3_connect_four_action_mask.py            | 18 ++++++++++++++++--
 tutorials/SB3/test/test_sb3_action_mask.py     |  9 ++++-----
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/tutorials/SB3/connect_four/sb3_connect_four_action_mask.py b/tutorials/SB3/connect_four/sb3_connect_four_action_mask.py
index d8d890362..29d623251 100644
--- a/tutorials/SB3/connect_four/sb3_connect_four_action_mask.py
+++ b/tutorials/SB3/connect_four/sb3_connect_four_action_mask.py
@@ -37,9 +37,23 @@ def reset(self, seed=None, options=None):
         return self.observe(self.agent_selection), {}
 
     def step(self, action):
-        """Gymnasium-like step function, returning observation, reward, termination, truncation, info."""
+        """Gymnasium-like step function, returning observation, reward, termination, truncation, info.
+
+        The observation is for the next agent (used to determine the next action), while the remaining
+        items are for the agent that just acted (used to understand what just happened).
+        """
+        current_agent = self.agent_selection
+
         super().step(action)
-        return super().last()
+
+        next_agent = self.agent_selection
+        return (
+            self.observe(next_agent),
+            self._cumulative_rewards[current_agent],
+            self.terminations[current_agent],
+            self.truncations[current_agent],
+            self.infos[current_agent],
+        )
 
     def observe(self, agent):
         """Return only raw observation, removing action mask."""
diff --git a/tutorials/SB3/test/test_sb3_action_mask.py b/tutorials/SB3/test/test_sb3_action_mask.py
index 3835af393..de4ee3c07 100644
--- a/tutorials/SB3/test/test_sb3_action_mask.py
+++ b/tutorials/SB3/test/test_sb3_action_mask.py
@@ -23,14 +23,14 @@
 EASY_ENVS = [
     gin_rummy_v4,
     texas_holdem_no_limit_v6,  # texas holdem human rendered game ends instantly, but with random actions it works fine
-    texas_holdem_v4,
+    tictactoe_v3,
+    leduc_holdem_v4,
 ]
 
 # More difficult environments which will likely take more training time
 MEDIUM_ENVS = [
-    leduc_holdem_v4,  # with 10x as many steps it gets higher total rewards (9 vs -9), 0.52 winrate, and 0.92 vs 0.83 total scores
     hanabi_v5,  # even with 10x as many steps, total score seems to always be tied between the two agents
-    tictactoe_v3,  # even with 10x as many steps, agent still loses every time (most likely an error somewhere)
+    texas_holdem_v4,  # this performs poorly with updates to SB3 wrapper
     chess_v6,  # difficult to train because games take so long, performance varies heavily
 ]
 
@@ -50,8 +50,7 @@ def test_action_mask_easy(env_fn):
 
     env_kwargs = {}
 
-    # Leduc Hold`em takes slightly longer to outperform random
-    steps = 8192 if env_fn != leduc_holdem_v4 else 8192 * 4
+    steps = 8192 * 4
 
     # Train a model against itself (takes ~2 minutes on GPU)
     train_action_mask(env_fn, steps=steps, seed=0, **env_kwargs)

From 98e8c206daca7b45e1b61785d245628872c149b6 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Fri, 3 May 2024 17:40:37 -0400
Subject: [PATCH 17/21] Update the TicTacToe environment (#1192)

Co-authored-by: Elliot Tower <elliot@elliottower.com>
---
 pettingzoo/classic/tictactoe/board.py      | 155 ++++++++++++---------
 pettingzoo/classic/tictactoe/test_board.py | 127 +++++++++++++++++
 pettingzoo/classic/tictactoe/tictactoe.py  |  86 +++++-------
 tutorials/SB3/test/test_sb3_action_mask.py |   2 +-
 4 files changed, 251 insertions(+), 119 deletions(-)
 create mode 100644 pettingzoo/classic/tictactoe/test_board.py

diff --git a/pettingzoo/classic/tictactoe/board.py b/pettingzoo/classic/tictactoe/board.py
index 35186a57a..e6fee6853 100644
--- a/pettingzoo/classic/tictactoe/board.py
+++ b/pettingzoo/classic/tictactoe/board.py
@@ -1,79 +1,102 @@
+TTT_PLAYER1_WIN = 0
+TTT_PLAYER2_WIN = 1
+TTT_TIE = -1
+TTT_GAME_NOT_OVER = -2
+
+
 class Board:
+    """Board for a TicTacToe Game.
+
+    This tracks the position and identity of marks on the game board
+    and allows checking for a winner.
+
+    Example of usage:
+
+    import random
+    board = Board()
+
+    # random legal moves - for example purposes
+    def choose_move(board_obj: Board) -> int:
+        legal_moves = [i for i, mark in enumerate(board_obj.squares) if mark == 0]
+        return random.choice(legal_moves)
+
+    player = 0
+    while True:
+        move = choose_move(board)
+        board.play_turn(player, move)
+        status = board.game_status()
+        if status != TTT_GAME_NOT_OVER:
+            if status in [TTT_PLAYER1_WIN, TTT_PLAYER2_WIN]:
+                print(f"player {status} won")
+            else:  # status == TTT_TIE
+                print("Tie Game")
+            break
+        player = player ^ 1  # swaps between players 0 and 1
+    """
+
+    # indices of the winning lines: vertical(x3), horizontal(x3), diagonal(x2)
+    winning_combinations = [
+        (0, 1, 2),
+        (3, 4, 5),
+        (6, 7, 8),
+        (0, 3, 6),
+        (1, 4, 7),
+        (2, 5, 8),
+        (0, 4, 8),
+        (2, 4, 6),
+    ]
+
     def __init__(self):
-        # internally self.board.squares holds a flat representation of tic tac toe board
-        # where an empty board is [0, 0, 0, 0, 0, 0, 0, 0, 0]
-        # where indexes are column wise order
+        # self.squares holds a flat representation of the tic tac toe board.
+        # an empty board is [0, 0, 0, 0, 0, 0, 0, 0, 0].
+        # player 1's squares are marked 1, while player 2's are marked 2.
+        # mapping of the flat indices to the 3x3 grid is as follows:
         # 0 3 6
         # 1 4 7
         # 2 5 8
-
-        # empty -- 0
-        # player 0 -- 1
-        # player 1 -- 2
         self.squares = [0] * 9
 
-        # precommute possible winning combinations
-        self.calculate_winners()
+    @property
+    def _n_empty_squares(self):
+        """The current number of empty squares on the board."""
+        return self.squares.count(0)
 
-    def setup(self):
-        self.calculate_winners()
+    def reset(self):
+        """Remove all marks from the board."""
+        self.squares = [0] * 9
 
     def play_turn(self, agent, pos):
-        # if spot is empty
-        if self.squares[pos] != 0:
-            return
-        if agent == 0:
-            self.squares[pos] = 1
-        elif agent == 1:
-            self.squares[pos] = 2
-        return
-
-    def calculate_winners(self):
-        winning_combinations = []
-        indices = [x for x in range(0, 9)]
-
-        # Vertical combinations
-        winning_combinations += [
-            tuple(indices[i : (i + 3)]) for i in range(0, len(indices), 3)
-        ]
-
-        # Horizontal combinations
-        winning_combinations += [
-            tuple(indices[x] for x in range(y, len(indices), 3)) for y in range(0, 3)
-        ]
-
-        # Diagonal combinations
-        winning_combinations.append(tuple(x for x in range(0, len(indices), 4)))
-        winning_combinations.append(tuple(x for x in range(2, len(indices) - 1, 2)))
-
-        self.winning_combinations = winning_combinations
-
-    # returns:
-    # -1 for no winner
-    # 1 -- agent 0 wins
-    # 2 -- agent 1 wins
-    def check_for_winner(self):
-        winner = -1
-        for combination in self.winning_combinations:
-            states = []
-            for index in combination:
-                states.append(self.squares[index])
-            if all(x == 1 for x in states):
-                winner = 1
-            if all(x == 2 for x in states):
-                winner = 2
-        return winner
-
-    def check_game_over(self):
-        winner = self.check_for_winner()
-
-        if winner == -1 and all(square in [1, 2] for square in self.squares):
-            # tie
-            return True
-        elif winner in [1, 2]:
-            return True
-        else:
-            return False
+        """Place a mark by the agent in the spot given.
+
+        The following are required for a move to be valid:
+        * The agent must be a known agent ID (either 0 or 1).
+        * The spot must be be empty.
+        * The spot must be in the board (integer: 0 <= spot <= 8)
+
+        If any of those are not true, an assertion will fail.
+        """
+        assert pos >= 0 and pos <= 8, "Invalid move location"
+        assert agent in [0, 1], "Invalid agent"
+        assert self.squares[pos] == 0, "Location is not empty"
+
+        # agent is [0, 1]. board values are stored as [1, 2].
+        self.squares[pos] = agent + 1
+
+    def game_status(self):
+        """Return status (winner, TTT_TIE if no winner, or TTT_GAME_NOT_OVER)."""
+        for indices in self.winning_combinations:
+            states = [self.squares[idx] for idx in indices]
+            if states == [1, 1, 1]:
+                return TTT_PLAYER1_WIN
+            if states == [2, 2, 2]:
+                return TTT_PLAYER2_WIN
+        if self._n_empty_squares == 0:
+            return TTT_TIE
+        return TTT_GAME_NOT_OVER
 
     def __str__(self):
         return str(self.squares)
+
+    def legal_moves(self):
+        """Return list of legal moves (as flat indices for spaces on the board)."""
+        return [i for i, mark in enumerate(self.squares) if mark == 0]
diff --git a/pettingzoo/classic/tictactoe/test_board.py b/pettingzoo/classic/tictactoe/test_board.py
new file mode 100644
index 000000000..b8f7e9248
--- /dev/null
+++ b/pettingzoo/classic/tictactoe/test_board.py
@@ -0,0 +1,127 @@
+"""Test cases for TicTacToe board."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from pettingzoo.classic.tictactoe.board import (  # type: ignore
+    TTT_GAME_NOT_OVER,
+    TTT_PLAYER1_WIN,
+    TTT_PLAYER2_WIN,
+    TTT_TIE,
+    Board,
+)
+
+# Note: mapping of moves to board positions are:
+# 0 3 6
+# 1 4 7
+# 2 5 8
+
+agent2_win = {
+    "moves": [
+        # agent_id, position, board after move
+        (0, 4, [0, 0, 0, 0, 1, 0, 0, 0, 0]),
+        (1, 0, [2, 0, 0, 0, 1, 0, 0, 0, 0]),
+        (0, 2, [2, 0, 1, 0, 1, 0, 0, 0, 0]),
+        (1, 6, [2, 0, 1, 0, 1, 0, 2, 0, 0]),
+        (0, 3, [2, 0, 1, 1, 1, 0, 2, 0, 0]),
+        (1, 7, [2, 0, 1, 1, 1, 0, 2, 2, 0]),
+        (0, 1, [2, 1, 1, 1, 1, 0, 2, 2, 0]),
+        (1, 8, [2, 1, 1, 1, 1, 0, 2, 2, 2]),  # agent 2 wins here
+        (0, 5, [2, 1, 1, 1, 1, 1, 2, 2, 2]),
+    ],
+    "max_step": 7,  # should not get past here
+    "winner": TTT_PLAYER2_WIN,
+}
+
+tie = {
+    "moves": [  # should be tie
+        (0, 0, [1, 0, 0, 0, 0, 0, 0, 0, 0]),
+        (1, 3, [1, 0, 0, 2, 0, 0, 0, 0, 0]),
+        (0, 1, [1, 1, 0, 2, 0, 0, 0, 0, 0]),
+        (1, 4, [1, 1, 0, 2, 2, 0, 0, 0, 0]),
+        (0, 5, [1, 1, 0, 2, 2, 1, 0, 0, 0]),
+        (1, 2, [1, 1, 2, 2, 2, 1, 0, 0, 0]),
+        (0, 6, [1, 1, 2, 2, 2, 1, 1, 0, 0]),
+        (1, 7, [1, 1, 2, 2, 2, 1, 1, 2, 0]),
+        (0, 8, [1, 1, 2, 2, 2, 1, 1, 2, 1]),
+    ],
+    "max_step": 8,
+    "winner": TTT_TIE,
+}
+
+agent1_win = {
+    "moves": [
+        (0, 0, [1, 0, 0, 0, 0, 0, 0, 0, 0]),
+        (1, 3, [1, 0, 0, 2, 0, 0, 0, 0, 0]),
+        (0, 1, [1, 1, 0, 2, 0, 0, 0, 0, 0]),
+        (1, 4, [1, 1, 0, 2, 2, 0, 0, 0, 0]),
+        (0, 2, [1, 1, 1, 2, 2, 0, 0, 0, 0]),  # agent 1 should win here
+        (1, 5, [1, 1, 1, 2, 2, 2, 0, 0, 0]),
+        (0, 6, [1, 1, 1, 2, 2, 2, 1, 0, 0]),
+        (1, 7, [1, 1, 1, 2, 2, 2, 1, 2, 0]),
+        (0, 8, [1, 1, 1, 2, 2, 2, 1, 2, 1]),
+    ],
+    "max_step": 4,
+    "winner": TTT_PLAYER1_WIN,
+}
+
+
+@pytest.mark.parametrize("values", [agent1_win, agent2_win, tie])
+def test_tictactoe_board_games(values: dict[str, Any]) -> None:
+    """Test that TicTacToe games go as expected."""
+    expected_winner = values["winner"]
+    max_step = values["max_step"]
+
+    board = Board()
+    for i, (agent, pos, board_layout) in enumerate(values["moves"]):
+        assert i <= max_step, "max step exceed in tictactoe game"
+        board.play_turn(agent, pos)
+        assert board_layout == board.squares, "wrong tictactoe layout after move"
+        status = board.game_status()
+        if status != TTT_GAME_NOT_OVER:
+            assert i == max_step, "tictactoe game ended on wrong step"
+            assert status == expected_winner, "wrong winner in tictactoe board test"
+            break
+
+
+def test_tictactoe_winning_boards() -> None:
+    """Test that winning board configurations actually win."""
+    # these are the winning lines for player 1. Note that moves
+    # for player 2 are included to make it a legal board.
+    winning_lines = [  # vertical(x3), horizontal(x3), diagonal(x2)
+        [1, 1, 1, 0, 0, 0, 0, 0, 0],
+        [0, 0, 0, 1, 1, 1, 0, 0, 0],
+        [0, 0, 0, 0, 0, 0, 1, 1, 1],
+        [1, 0, 0, 1, 0, 0, 1, 0, 0],
+        [0, 1, 0, 0, 1, 0, 0, 1, 0],
+        [0, 0, 1, 0, 0, 1, 0, 0, 1],
+        [1, 0, 0, 0, 1, 0, 0, 0, 1],
+        [0, 0, 1, 0, 1, 0, 1, 0, 0],
+    ]
+    for line in winning_lines:
+        board = Board()
+        board.squares = line
+        assert board.game_status() == TTT_PLAYER1_WIN, "Bad win check in TicTacToe"
+
+
+def test_tictactoe_bad_move() -> None:
+    """Test that illegal TicTacToe moves are rejected."""
+    board = Board()
+    # 1) move out of bounds should be rejected
+    for outside_space in [-1, 9]:
+        with pytest.raises(AssertionError, match="Invalid move location"):
+            board.play_turn(0, outside_space)
+
+    # 2) move by unknown agent should be rejected
+    for unknown_agent in [-1, 2]:
+        with pytest.raises(AssertionError, match="Invalid agent"):
+            board.play_turn(unknown_agent, 0)
+
+    # 3) move in occupied space by either agent should be rejected
+    board.play_turn(0, 4)  # this is fine
+    for agent in [0, 1]:
+        with pytest.raises(AssertionError, match="Location is not empty"):
+            board.play_turn(agent, 4)  # repeating move is not valid
diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index e68f900a8..e3c219c5a 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -79,11 +79,12 @@
 from gymnasium.utils import EzPickle
 
 from pettingzoo import AECEnv
-from pettingzoo.classic.tictactoe.board import Board
+from pettingzoo.classic.tictactoe.board import TTT_GAME_NOT_OVER, TTT_TIE, Board
 from pettingzoo.utils import AgentSelector, wrappers
 
 
 def get_image(path):
+    """Return a pygame image loaded from the given path."""
     from os import path as os_path
 
     cwd = os_path.dirname(__file__)
@@ -92,6 +93,7 @@ def get_image(path):
 
 
 def get_font(path, size):
+    """Return a pygame font loaded from the given path."""
     from os import path as os_path
 
     cwd = os_path.dirname(__file__)
@@ -141,7 +143,7 @@ def __init__(
         self.rewards = {i: 0 for i in self.agents}
         self.terminations = {i: False for i in self.agents}
         self.truncations = {i: False for i in self.agents}
-        self.infos = {i: {"legal_moves": list(range(0, 9))} for i in self.agents}
+        self.infos = {i: {} for i in self.agents}
 
         self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.reset()
@@ -153,42 +155,38 @@ def __init__(
         if self.render_mode == "human":
             self.clock = pygame.time.Clock()
 
-    # Key
-    # ----
-    # blank space = 0
-    # agent 0 = 1
-    # agent 1 = 2
-    # An observation is list of lists, where each list represents a row
-    #
-    # [[0,0,2]
-    #  [1,2,1]
-    #  [2,1,0]]
     def observe(self, agent):
         board_vals = np.array(self.board.squares).reshape(3, 3)
         cur_player = self.possible_agents.index(agent)
         opp_player = (cur_player + 1) % 2
 
-        cur_p_board = np.equal(board_vals, cur_player + 1)
-        opp_p_board = np.equal(board_vals, opp_player + 1)
-
-        observation = np.stack([cur_p_board, opp_p_board], axis=2).astype(np.int8)
-        legal_moves = self._legal_moves() if agent == self.agent_selection else []
+        observation = np.empty((3, 3, 2), dtype=np.int8)
+        # this will give a copy of the board that is 1 for player 1's
+        # marks and zero for every other square, whether empty or not.
+        observation[:, :, 0] = np.equal(board_vals, cur_player + 1)
+        observation[:, :, 1] = np.equal(board_vals, opp_player + 1)
 
-        action_mask = np.zeros(9, "int8")
-        for i in legal_moves:
-            action_mask[i] = 1
+        action_mask = self._get_mask(agent)
 
         return {"observation": observation, "action_mask": action_mask}
 
+    def _get_mask(self, agent):
+        action_mask = np.zeros(9, dtype=np.int8)
+
+        # Per the documentation, the mask of any agent other than the
+        # currently selected one is all zeros.
+        if agent == self.agent_selection:
+            for i in self.board.legal_moves():
+                action_mask[i] = 1
+
+        return action_mask
+
     def observation_space(self, agent):
         return self.observation_spaces[agent]
 
     def action_space(self, agent):
         return self.action_spaces[agent]
 
-    def _legal_moves(self):
-        return [i for i in range(len(self.board.squares)) if self.board.squares[i] == 0]
-
     # action in this case is a value from 0 to 8 indicating position to move on tictactoe board
     def step(self, action):
         if (
@@ -196,45 +194,30 @@ def step(self, action):
             or self.truncations[self.agent_selection]
         ):
             return self._was_dead_step(action)
-        # check if input action is a valid move (0 == empty spot)
-        assert self.board.squares[action] == 0, "played illegal move"
-        # play turn
-        self.board.play_turn(self.agents.index(self.agent_selection), action)
-
-        # update infos
-        # list of valid actions (indexes in board)
-        # next_agent = self.agents[(self.agents.index(self.agent_selection) + 1) % len(self.agents)]
-        next_agent = self._agent_selector.next()
 
-        if self.board.check_game_over():
-            winner = self.board.check_for_winner()
+        self.board.play_turn(self.agents.index(self.agent_selection), action)
 
-            if winner == -1:
-                # tie
+        status = self.board.game_status()
+        if status != TTT_GAME_NOT_OVER:
+            if status == TTT_TIE:
                 pass
-            elif winner == 1:
-                # agent 0 won
-                self.rewards[self.agents[0]] += 1
-                self.rewards[self.agents[1]] -= 1
             else:
-                # agent 1 won
-                self.rewards[self.agents[1]] += 1
-                self.rewards[self.agents[0]] -= 1
+                winner = status  # either TTT_PLAYER1_WIN or TTT_PLAYER2_WIN
+                loser = winner ^ 1  # 0 -> 1; 1 -> 0
+                self.rewards[self.agents[winner]] += 1
+                self.rewards[self.agents[loser]] -= 1
 
             # once either play wins or there is a draw, game over, both players are done
             self.terminations = {i: True for i in self.agents}
+            self._accumulate_rewards()
 
-        # Switch selection to next agents
-        self._cumulative_rewards[self.agent_selection] = 0
-        self.agent_selection = next_agent
+        self.agent_selection = self._agent_selector.next()
 
-        self._accumulate_rewards()
         if self.render_mode == "human":
             self.render()
 
     def reset(self, seed=None, options=None):
-        # reset environment
-        self.board = Board()
+        self.board.reset()
 
         self.agents = self.possible_agents[:]
         self.rewards = {i: 0 for i in self.agents}
@@ -244,10 +227,9 @@ def reset(self, seed=None, options=None):
         self.infos = {i: {} for i in self.agents}
         # selects the first agent
         self._agent_selector.reinit(self.agents)
-        self._agent_selector.reset()
         self.agent_selection = self._agent_selector.reset()
 
-        if self.screen is None:
+        if self.render_mode is not None and self.screen is None:
             pygame.init()
 
         if self.render_mode == "human":
@@ -255,7 +237,7 @@ def reset(self, seed=None, options=None):
                 (self.screen_height, self.screen_height)
             )
             pygame.display.set_caption("Tic-Tac-Toe")
-        else:
+        elif self.render_mode == "rgb_array":
             self.screen = pygame.Surface((self.screen_height, self.screen_height))
 
     def close(self):
diff --git a/tutorials/SB3/test/test_sb3_action_mask.py b/tutorials/SB3/test/test_sb3_action_mask.py
index de4ee3c07..2be85b1d8 100644
--- a/tutorials/SB3/test/test_sb3_action_mask.py
+++ b/tutorials/SB3/test/test_sb3_action_mask.py
@@ -91,7 +91,7 @@ def test_action_mask_medium(env_fn):
 
     assert (
         winrate < 0.75
-    ), "Policy should not perform better than 75% winrate"  # 30-40% for leduc, 0% for hanabi, 0% for tic-tac-toe
+    ), "Policy should not perform better than 75% winrate"  # 30-40% for leduc, 0% for hanabi
 
     # Watch two games (disabled by default)
     # eval_action_mask(env_fn, num_games=2, render_mode="human", **env_kwargs)

From 46d92e024c1382a2568fce2dd517c186bd6bb775 Mon Sep 17 00:00:00 2001
From: "HP (Hetav)" <60848863+pandyah5@users.noreply.github.com>
Date: Thu, 20 Jun 2024 18:26:07 -0700
Subject: [PATCH 18/21] Updated documentation of observation format in Simple
 World Comm (#1212)

---
 pettingzoo/mpe/simple_world_comm/simple_world_comm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pettingzoo/mpe/simple_world_comm/simple_world_comm.py b/pettingzoo/mpe/simple_world_comm/simple_world_comm.py
index 598c0d23e..2601dd76e 100644
--- a/pettingzoo/mpe/simple_world_comm/simple_world_comm.py
+++ b/pettingzoo/mpe/simple_world_comm/simple_world_comm.py
@@ -30,11 +30,11 @@
 In particular, the good agents reward, is -5 for every collision with an adversary, -2 x bound by the `bound` function described in simple_tag, +2 for every collision with a food, and -0.05 x minimum distance to any food. The adversarial agents are rewarded +5 for collisions and -0.1 x minimum
 distance to a good agent. s
 
-Good agent observations: `[self_vel, self_pos, landmark_rel_positions, other_agent_rel_positions, other_agent_velocities, self_in_forest]`
+Good agent observations: `[self_vel, self_pos, landmark_rel_positions, other_agent_rel_positions, self_in_forest, other_agent_velocities]`
 
 Normal adversary observations:`[self_vel, self_pos, landmark_rel_positions, other_agent_rel_positions, other_agent_velocities, self_in_forest, leader_comm]`
 
-Adversary leader observations: `[self_vel, self_pos, landmark_rel_positions, other_agent_rel_positions, other_agent_velocities, leader_comm]`
+Adversary leader observations: `[self_vel, self_pos, landmark_rel_positions, other_agent_rel_positions, other_agent_velocities, self_in_forest, leader_comm]`
 
 *Note that when the forests prevent an agent from being seen, the observation of that agents relative position is set to (0,0).*
 

From 4ecc0e1ce41994d19e89e72806b37f07b79f2356 Mon Sep 17 00:00:00 2001
From: ajmeek <61296971+ajmeek@users.noreply.github.com>
Date: Thu, 20 Jun 2024 21:27:51 -0400
Subject: [PATCH 19/21] Changed dead link to correct link in Tianshou tutorial
 (#1214)

---
 docs/tutorials/tianshou/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tutorials/tianshou/index.md b/docs/tutorials/tianshou/index.md
index eef3a7d0c..1a879f12c 100644
--- a/docs/tutorials/tianshou/index.md
+++ b/docs/tutorials/tianshou/index.md
@@ -21,7 +21,7 @@ It boasts a large number of algorithms and high quality software engineering sta
 
 ## Examples using PettingZoo
 
-* [Multi-Agent RL](https://tianshou.readthedocs.io/en/master/tutorials/tictactoe.html)
+* [Multi-Agent RL](https://tianshou.org/en/master/01_tutorials/04_tictactoe.html)
 
 ## Architecture
 

From 9f441feb56772e1a08c5998318d66b9989ea75b0 Mon Sep 17 00:00:00 2001
From: Jannik Hinrichs <58370727+Zoraiyo@users.noreply.github.com>
Date: Fri, 21 Jun 2024 03:32:03 +0200
Subject: [PATCH 20/21] Fix: Swapped colors in waterworld description (#1210)

---
 pettingzoo/sisl/waterworld/waterworld.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pettingzoo/sisl/waterworld/waterworld.py b/pettingzoo/sisl/waterworld/waterworld.py
index 7684d7206..df1e31549 100644
--- a/pettingzoo/sisl/waterworld/waterworld.py
+++ b/pettingzoo/sisl/waterworld/waterworld.py
@@ -33,7 +33,7 @@
 poison respectively. The number of features per sensor is 8 by default with `speed_features` enabled, or 5 if `speed_features` is turned off. Therefore with `speed_features` enabled, the observation shape takes the full form of `(8 × n_sensors) + 2`. Elements of the observation vector take on
 values in the range [-1, 1].
 
-For example, by default there are 5 agents (purple), 5 food targets (red) and 10 poison targets (green). Each agent has 30 range-limited sensors, depicted by the black lines, to detect neighboring entities (food and poison targets) resulting in 242 element vector of computed values about the
+For example, by default there are 5 agents (purple), 5 food targets (green) and 10 poison targets (red). Each agent has 30 range-limited sensors, depicted by the black lines, to detect neighboring entities (food and poison targets) resulting in 242 element vector of computed values about the
 environment for the observation space. These values represent the distances and speeds sensed by each sensor on the archea. Sensors that do not sense any objects within their range report 0 for speed and 1 for distance.
 
 This has been fixed from the reference environments to keep items floating off screen and being lost forever.

From 1eef080e59d4e81503d2138d42116edcea5224c4 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Thu, 20 Jun 2024 21:40:43 -0400
Subject: [PATCH 21/21] TerminateIllegalWrapper fix (#1206)

---
 .../utils/wrappers/terminate_illegal.py       | 11 ++-
 test/wrapper_test.py                          | 71 ++++++++++++++++++-
 2 files changed, 73 insertions(+), 9 deletions(-)

diff --git a/pettingzoo/utils/wrappers/terminate_illegal.py b/pettingzoo/utils/wrappers/terminate_illegal.py
index a49d9a0be..79f95504a 100644
--- a/pettingzoo/utils/wrappers/terminate_illegal.py
+++ b/pettingzoo/utils/wrappers/terminate_illegal.py
@@ -1,4 +1,3 @@
-# pyright reportGeneralTypeIssues=false
 from __future__ import annotations
 
 from pettingzoo.utils.env import ActionType, AECEnv, AgentID, ObsType
@@ -20,6 +19,7 @@ def __init__(
         self._illegal_value = illegal_reward
         self._prev_obs = None
         self._prev_info = None
+        self._terminated = False  # terminated by an illegal move
 
     def reset(self, seed: int | None = None, options: dict | None = None) -> None:
         self._terminated = False
@@ -42,7 +42,6 @@ def step(self, action: ActionType) -> None:
         if self._prev_obs is None:
             self.observe(self.agent_selection)
         if isinstance(self._prev_obs, dict):
-            assert self._prev_obs is not None
             assert (
                 "action_mask" in self._prev_obs
             ), f"`action_mask` not found in dictionary observation: {self._prev_obs}. Action mask must either be in `observation['action_mask']` or `info['action_mask']` to use TerminateIllegalWrapper."
@@ -60,7 +59,7 @@ def step(self, action: ActionType) -> None:
             self.terminations[self.agent_selection]
             or self.truncations[self.agent_selection]
         ):
-            self._was_dead_step(action)  # pyright: ignore[reportGeneralTypeIssues]
+            self.env.unwrapped._was_dead_step(action)
         elif (
             not self.terminations[self.agent_selection]
             and not self.truncations[self.agent_selection]
@@ -70,12 +69,10 @@ def step(self, action: ActionType) -> None:
             self.env.unwrapped._cumulative_rewards[self.agent_selection] = 0
             self.env.unwrapped.terminations = {d: True for d in self.agents}
             self.env.unwrapped.truncations = {d: True for d in self.agents}
-            self._prev_obs = None
-            self._prev_info = None
             self.env.unwrapped.rewards = {d: 0 for d in self.truncations}
             self.env.unwrapped.rewards[current_agent] = float(self._illegal_value)
-            self._accumulate_rewards()
-            self._deads_step_first()
+            self.env.unwrapped._accumulate_rewards()
+            self.env.unwrapped._deads_step_first()
             self._terminated = True
         else:
             super().step(action)
diff --git a/test/wrapper_test.py b/test/wrapper_test.py
index 650fe328b..a03bd81b3 100644
--- a/test/wrapper_test.py
+++ b/test/wrapper_test.py
@@ -3,8 +3,13 @@
 import pytest
 
 from pettingzoo.butterfly import pistonball_v6
-from pettingzoo.classic import texas_holdem_no_limit_v6
-from pettingzoo.utils.wrappers import MultiEpisodeEnv, MultiEpisodeParallelEnv
+from pettingzoo.classic import texas_holdem_no_limit_v6, tictactoe_v3
+from pettingzoo.utils.wrappers import (
+    BaseWrapper,
+    MultiEpisodeEnv,
+    MultiEpisodeParallelEnv,
+    TerminateIllegalWrapper,
+)
 
 
 @pytest.mark.parametrize(("num_episodes"), [1, 2, 3, 4, 5, 6])
@@ -67,3 +72,65 @@ def test_multi_episode_parallel_env_wrapper(num_episodes) -> None:
     assert (
         steps == num_episodes * 125
     ), f"Expected to have 125 steps per episode, got {steps / num_episodes}."
+
+
+def _do_game(env: TerminateIllegalWrapper, seed: int) -> None:
+    """Run a single game with reproducible random moves."""
+    assert isinstance(
+        env, TerminateIllegalWrapper
+    ), "test_terminate_illegal must use TerminateIllegalWrapper"
+    env.reset(seed)
+    for agent in env.agents:
+        # make the random moves reproducible
+        env.action_space(agent).seed(seed)
+
+    for agent in env.agent_iter():
+        _, _, termination, truncation, _ = env.last()
+
+        if termination or truncation:
+            env.step(None)
+        else:
+            action = env.action_space(agent).sample()
+            env.step(action)
+
+
+def test_terminate_illegal() -> None:
+    """Test for a problem with terminate illegal wrapper.
+
+    The problem is that env variables, including agent_selection, are set by
+    calls from TerminateIllegalWrapper to env functions. However, they are
+    called by the wrapper object, not the env so they are set in the wrapper
+    object rather than the base env object. When the code later tries to run,
+    the values get updated in the env code, but the wrapper pulls it's own
+    values that shadow them.
+
+    The test here confirms that is fixed.
+    """
+    # not using env() because we need to ensure that the env is
+    # wrapped by TerminateIllegalWrapper
+    raw_env = tictactoe_v3.raw_env()
+    env = TerminateIllegalWrapper(raw_env, illegal_reward=-1)
+
+    _do_game(env, 42)
+    # bug is triggered by a corrupted state after a game is terminated
+    # due to an illegal move. So we need to run the game twice to
+    # see the effect.
+    _do_game(env, 42)
+
+    # get a list of what all the agent_selection values in the wrapper stack
+    unwrapped = env
+    agent_selections = []
+    while unwrapped != env.unwrapped:
+        # the actual value for this wrapper (or None if no value)
+        agent_selections.append(unwrapped.__dict__.get("agent_selection", None))
+        assert isinstance(unwrapped, BaseWrapper)
+        unwrapped = unwrapped.env
+
+    # last one from the actual env
+    agent_selections.append(unwrapped.__dict__.get("agent_selection", None))
+
+    # remove None from agent_selections
+    agent_selections = [x for x in agent_selections if x is not None]
+
+    # all values must be the same, or else the wrapper and env are mismatched
+    assert len(set(agent_selections)) == 1, "agent_selection mismatch"