Merge branch 'master' into jet/remove_lru_cache

Farama-Foundation · Jun 21, 2024 · 451a303 · 451a303
2 parents cfb783e + 1eef080
commit 451a303
Show file tree

Hide file tree

Showing 51 changed files with 598 additions and 272 deletions.
diff --git a/.github/workflows/linux-tutorials-test.yml b/.github/workflows/linux-tutorials-test.yml
@@ -17,7 +17,32 @@ jobs:
             fail-fast: false
             matrix:
                 python-version: ['3.8', '3.9', '3.10', '3.11']
-                tutorial: [Tianshou, CustomEnvironment, CleanRL, SB3/kaz, SB3/waterworld, SB3/connect_four, SB3/test, AgileRL]  # TODO: fix tutorials and add back Ray
+                tutorial: [Tianshou, CustomEnvironment, CleanRL, SB3/kaz, SB3/waterworld, SB3/connect_four, SB3/test]  # TODO: fix tutorials and add back Ray
+        steps:
+            - uses: actions/checkout@v4
+            - name: Set up Python ${{ matrix.python-version }}
+              uses: actions/setup-python@v4
+              with:
+                  python-version: ${{ matrix.python-version }}
+            - name: Install dependencies and run tutorials
+              run: |
+                  sudo apt-get install python3-opengl xvfb parallel
+                  export PATH=/path/to/parallel:$PATH
+                  export root_dir=$(pwd)
+                  cd tutorials/${{ matrix.tutorial }}
+                  pip install -r requirements.txt
+                  pip uninstall -y pettingzoo
+                  pip install -e $root_dir[testing]
+                  AutoROM -v
+                  for f in *.py; do xvfb-run -a -s "-screen 0 1024x768x24" python "$f"; done
+
+    agilerl-tutorial-test:
+        runs-on: ubuntu-latest
+        strategy:
+            fail-fast: false
+            matrix:
+                python-version: ['3.9', '3.10', '3.11']
+                tutorial: [AgileRL]
         steps:
             - uses: actions/checkout@v4
             - name: Set up Python ${{ matrix.python-version }}

diff --git a/.github/workflows/macos-test.yml b/.github/workflows/macos-test.yml
@@ -3,25 +3,33 @@ name: MacOS tests
 
 on:
     push:
-        branches: [master]
+        branches: [none]
 
 permissions:
     contents: read
 
 jobs:
     macos-test:
-        runs-on: macos-11
+        runs-on: ${{ matrix.os }}
+        strategy:
+            matrix:
+            # Big Sur, Monterey
+                os: [macos-11, macos-12]
+                python-version: ['3.8', '3.9', '3.10', '3.11']
         steps:
             - uses: actions/checkout@v4
-            - name: Set up Python 3.11
+            - name: Set up Python ${{ matrix.python-version }}
               uses: actions/setup-python@v4
               with:
-                  python-version: 3.11
+                  python-version: ${{ matrix.python-version }}
             - name: Install dependencies
               run: |
                   pip install -e .[all]
                   pip install -e .[testing]
                   AutoROM -v
+            - name: Set dummy SDL video driver
+              run: |
+                  export SDL_VIDEODRIVER=dummy
             - name: Full Python tests
               run: |
                   pytest -v --cov=pettingzoo --cov-report term
diff --git a/README.md b/README.md
@@ -28,6 +28,8 @@ To install the dependencies for one family, use `pip install 'pettingzoo[atari]'
 
 We support Python 3.8, 3.9, 3.10 and 3.11 on Linux and macOS. We will accept PRs related to Windows, but do not officially support it.
 
+Note: Some Linux distributions may require manual installation of `cmake`, `swig`, or `zlib1g-dev` (e.g., `sudo apt install cmake swig zlib1g-dev`)
+
 ## Getting started
 
 For an introduction to PettingZoo, see [Basic Usage](https://pettingzoo.farama.org/content/basic_usage/). To create a new environment, see our [Environment Creation Tutorial](https://pettingzoo.farama.org/tutorials/custom_environment/1-project-structure/) and [Custom Environment Examples](https://pettingzoo.farama.org/content/environment_creation/).

diff --git a/docs/api/utils.md b/docs/api/utils.md
@@ -165,7 +165,7 @@ Base class which is used by [CaptureStdoutWrapper](https://pettingzoo.farama.org
 
 The agent selector utility allows for easy cycling of agents in an AEC environment. At any time it can be reset or reinitialized with a new order, allowing for changes in turn order or handling a dynamic number of agents (see [Knights-Archers-Zombies](https://pettingzoo.farama.org/environments/butterfly/knights_archers_zombies/) for an example of spawning/killing agents)
 
-Note: while many PettingZoo environments use agent_selector to manage agent cycling internally, it is not intended to be used externally when interacting with an environment. Instead, use `for agent in env.agent_iter()` (see [AEC API Usage](https://pettingzoo.farama.org/api/aec/#usage)).
+Note: while many PettingZoo environments use AgentSelector to manage agent cycling internally, it is not intended to be used externally when interacting with an environment. Instead, use `for agent in env.agent_iter()` (see [AEC API Usage](https://pettingzoo.farama.org/api/aec/#usage)).
 
 ```{eval-rst}
 .. currentmodule:: pettingzoo.utils

diff --git a/docs/code_examples/aec_rps.py b/docs/code_examples/aec_rps.py
@@ -3,7 +3,7 @@
 from gymnasium.spaces import Discrete
 
 from pettingzoo import AECEnv
-from pettingzoo.utils import agent_selector, wrappers
+from pettingzoo.utils import AgentSelector, wrappers
 
 ROCK = 0
 PAPER = 1
@@ -145,9 +145,9 @@ def reset(self, seed=None, options=None):
         self.observations = {agent: NONE for agent in self.agents}
         self.num_moves = 0
         """
-        Our agent_selector utility allows easy cyclic stepping through the agents list.
+        Our AgentSelector utility allows easy cyclic stepping through the agents list.
         """
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.next()
 
     def step(self, action):

diff --git a/docs/code_examples/parallel_rps.py b/docs/code_examples/parallel_rps.py
@@ -1,4 +1,5 @@
 import gymnasium
+import numpy as np
 from gymnasium.spaces import Discrete
 
 from pettingzoo import ParallelEnv
@@ -7,7 +8,7 @@
 ROCK = 0
 PAPER = 1
 SCISSORS = 2
-NONE = 3
+NO_MOVE = 3
 MOVES = ["ROCK", "PAPER", "SCISSORS", "None"]
 NUM_ITERS = 100
 REWARD_MAP = {
@@ -74,17 +75,20 @@ def __init__(self, render_mode=None):
             zip(self.possible_agents, list(range(len(self.possible_agents))))
         )
 
-        # we want to define the spaces as fixed objects so we can seed them
-        self._observation_spaces = {
-            agent: Discrete(4) for agent in self.possible_agents
-        }
-        self._action_spaces = {agent: Discrete(3) for agent in self.possible_agents}
-
-        # observation and action spaces are defined as functions which take in an agent id
-        # and returns the relevant spaces.
-        self.observation_space = lambda agent: self._observation_spaces[agent]
-        self.action_space = lambda agent: self._action_spaces[agent]
-        self.render_mode = render_mode
+    # Observation space should be defined here.
+    # lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space.
+    # If your spaces change over time, remove this line (disable caching).
+    @functools.lru_cache(maxsize=None)
+    def observation_space(self, agent):
+        # gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/
+        # Discrete(4) means an integer in range(0, 4)
+        return Discrete(4)
+
+    # Action space should be defined here.
+    # If your spaces change over time, remove this line (disable caching).
+    @functools.lru_cache(maxsize=None)
+    def action_space(self, agent):
+        return Discrete(3)
 
     def render(self):
         """
@@ -123,7 +127,8 @@ def reset(self, seed=None, options=None):
         """
         self.agents = self.possible_agents[:]
         self.num_moves = 0
-        observations = {agent: NONE for agent in self.agents}
+        # the observations should be numpy arrays even if there is only one value
+        observations = {agent: np.array(NO_MOVE) for agent in self.agents}
         infos = {agent: {} for agent in self.agents}
         self.state = observations
 
@@ -156,9 +161,11 @@ def step(self, actions):
         env_truncation = self.num_moves >= NUM_ITERS
         truncations = {agent: env_truncation for agent in self.agents}
 
-        # current observation is just the other player's most recent action
+        # Current observation is just the other player's most recent action
+        # This is converted to a numpy value of type int to match the type
+        # that we declared in observation_space()
         observations = {
-            self.agents[i]: int(actions[self.agents[1 - i]])
+            self.agents[i]: np.array(actions[self.agents[1 - i]], dtype=np.int64)
             for i in range(len(self.agents))
         }
         self.state = observations

diff --git a/docs/content/basic_usage.md b/docs/content/basic_usage.md
@@ -148,5 +148,6 @@ In certain cases, separating agent from environment actions is helpful for study
 Environments are by default wrapped in a handful of lightweight wrappers that handle error messages and ensure reasonable behavior given incorrect usage (i.e. playing illegal moves or stepping before resetting). However, these add a very small amount of overhead. If you want to create an environment without them, you can do so by using the `raw_env()` constructor contained within each module:
 
 ``` python
-env = knights_archers_zombies_v10.raw_env(<environment parameters>)
+environment_parameters = {}  # any parameters to pass to the environment
+env = knights_archers_zombies_v10.raw_env(**environment_parameters)
 ```
diff --git a/docs/content/environment_creation.md b/docs/content/environment_creation.md
@@ -62,14 +62,14 @@ The utils directory also contain some classes which are only helpful for develop
 
 ### Agent selector
 
-The `agent_selector` class steps through agents in a cycle
+The `AgentSelector` class steps through agents in a cycle
 
 It can be used as follows to cycle through the list of agents:
 
 ```python
-from pettingzoo.utils import agent_selector
+from pettingzoo.utils import AgentSelector
 agents = ["agent_1", "agent_2", "agent_3"]
-selector = agent_selector(agents)
+selector = AgentSelector(agents)
 agent_selection = selector.reset()
 # agent_selection will be "agent_1"
 for i in range(100):

diff --git a/docs/content/environment_tests.md b/docs/content/environment_tests.md
@@ -80,10 +80,14 @@ render_test(env_func)
 The render test method takes in an optional argument `custom_tests` that allows for additional tests in non-standard modes.
 
 ``` python
+from pettingzoo.test import render_test
+from pettingzoo.butterfly import pistonball_v6
+env_func = pistonball_v6.env
+
 custom_tests = {
-    "svg": lambda render_result: return isinstance(render_result, str)
+    "svg": lambda render_result: isinstance(render_result, str)
 }
-render_test(env, custom_tests=custom_tests)
+render_test(env_func, custom_tests=custom_tests)
 ```
 
 ## Performance Benchmark Test

diff --git a/docs/environments/atari.md b/docs/environments/atari.md
@@ -121,7 +121,10 @@ env = supersuit.frame_stack_v1(env, 4)
 All the Atari environments have the following environment parameters:
 
 ``` python
-<atari_game>.env(obs_type='rgb_image', full_action_space=True, max_cycles=100000, auto_rom_install_path=None)
+# using space invaders as an example, but replace with any atari game
+from pettingzoo.atari import space_invaders_v2
+
+space_invaders_v2.env(obs_type='rgb_image', full_action_space=True, max_cycles=100000, auto_rom_install_path=None)
 ```
 
 `obs_type`:  There are three possible values for this parameter:

diff --git a/docs/environments/third_party_envs.md b/docs/environments/third_party_envs.md
@@ -57,6 +57,12 @@ CookingZoo: a gym-cooking derivative to simulate a complex cooking environment.
 
 A library for doing reinforcement learning using [Crazyflie](https://www.bitcraze.io/products/crazyflie-2-1/) drones.
 
+### [DSSE: Drone Swarm Search Environment](https://github.com/pfeinsper/drone-swarm-search)
+[![PettingZoo version dependency](https://img.shields.io/badge/PettingZoo-v1.22.3-blue)]()
+![GitHub stars](https://img.shields.io/github/stars/pfeinsper/drone-swarm-search)
+
+A single and multi-agent environment to train swarms of drones for maritime search.
+
 
 ### [PettingZoo Dilemma Envs](https://github.com/tianyu-z/pettingzoo_dilemma_envs)
 

diff --git a/docs/index.md b/docs/index.md
@@ -78,11 +78,18 @@ The [AEC API](/api/aec/) supports sequential turn based environments, while the
 Environments can be interacted with using a similar interface to [Gymnasium](https://gymnasium.farama.org):
 
 ```python
-  from pettingzoo.butterfly import knights_archers_zombies_v10
-  env = knights_archers_zombies_v10.env(render_mode="human")
-  env.reset(seed=42)
-  for agent in env.agent_iter():
-      observation, reward, termination, truncation, info = env.last()
-      action = policy(observation, agent)
-      env.step(action)
+from pettingzoo.butterfly import knights_archers_zombies_v10
+env = knights_archers_zombies_v10.env(render_mode="human")
+env.reset(seed=42)
+
+for agent in env.agent_iter():
+    observation, reward, termination, truncation, info = env.last()
+
+    if termination or truncation:
+        action = None
+    else:
+        # this is where you would insert your policy
+        action = env.action_space(agent).sample()
+
+    env.step(action)
 ```
diff --git a/docs/tutorials/agilerl/DQN.md b/docs/tutorials/agilerl/DQN.md
@@ -612,10 +612,10 @@ Before we go any further in this tutorial, it would be helpful to define and set
    # Define the network configuration
    NET_CONFIG = {
       "arch": "cnn",  # Network architecture
-      "h_size": [64, 64],  # Actor hidden size
-      "c_size": [128],  # CNN channel size
-      "k_size": [4],  # CNN kernel size
-      "s_size": [1],  # CNN stride size
+      "hidden_size": [64, 64],  # Actor hidden size
+      "channel_size": [128],  # CNN channel size
+      "kernel_size": [4],  # CNN kernel size
+      "stride_size": [1],  # CNN stride size
       "normalize": False,  # Normalize image from range [0,255] to [0,1]
    }
 

diff --git a/docs/tutorials/custom_environment/index.md b/docs/tutorials/custom_environment/index.md
@@ -14,7 +14,7 @@ These tutorials walk you though the full process of creating a custom environmen
 
 4. [Testing Your Environment](/tutorials/custom_environment/4-testing-your-environment.md)
 
-For a simpler example environment, including both [AEC](/api/aec/) and [Parallel](/api/aec/) implementations, see our [Environment Creation](/content/environment_creation/) documentation.
+For a simpler example environment, including both [AEC](/api/aec/) and [Parallel](/api/parallel/) implementations, see our [Environment Creation](/content/environment_creation/) documentation.
 
 
 ```{toctree}

diff --git a/docs/tutorials/tianshou/index.md b/docs/tutorials/tianshou/index.md
@@ -21,7 +21,7 @@ It boasts a large number of algorithms and high quality software engineering sta
 
 ## Examples using PettingZoo
 
-* [Multi-Agent RL](https://tianshou.readthedocs.io/en/master/tutorials/tictactoe.html)
+* [Multi-Agent RL](https://tianshou.org/en/master/01_tutorials/04_tictactoe.html)
 
 ## Architecture
 

diff --git a/pettingzoo/atari/maze_craze/maze_craze.py b/pettingzoo/atari/maze_craze/maze_craze.py
@@ -85,7 +85,6 @@
 """
 
 import os
-import warnings
 from glob import glob
 
 from pettingzoo.atari.base_atari_env import (
@@ -102,10 +101,6 @@
 
 
 def raw_env(game_version="robbers", visibilty_level=0, **kwargs):
-    if game_version == "robbers" and visibilty_level == 0:
-        warnings.warn(
-            "maze_craze has different versions of the game via the `game_version` argument, consider overriding."
-        )
     assert (
         game_version in avaliable_versions
     ), f"`game_version` parameter must be one of {avaliable_versions.keys()}"

diff --git a/pettingzoo/butterfly/cooperative_pong/cooperative_pong.py b/pettingzoo/butterfly/cooperative_pong/cooperative_pong.py
@@ -79,7 +79,7 @@
 from pettingzoo.butterfly.cooperative_pong.manual_policy import ManualPolicy
 from pettingzoo.butterfly.cooperative_pong.paddle import Paddle
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 from pettingzoo.utils.conversions import parallel_wrapper_fn
 
 FPS = 15
@@ -370,7 +370,7 @@ def __init__(self, **kwargs):
 
         self.agents = self.env.agents[:]
         self.possible_agents = self.agents[:]
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.reset()
         # spaces
         self.action_spaces = dict(zip(self.agents, self.env.action_space))

diff --git a/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py b/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py
@@ -132,6 +132,7 @@
   vector_state=True,
   use_typemasks=False,
   sequence_space=False,
+)
 ```
 
 `spawn_rate`:  how many cycles before a new zombie is spawned. A lower number means zombies are spawned at a higher rate.
@@ -193,7 +194,7 @@
 from pettingzoo.butterfly.knights_archers_zombies.src.players import Archer, Knight
 from pettingzoo.butterfly.knights_archers_zombies.src.weapons import Arrow, Sword
 from pettingzoo.butterfly.knights_archers_zombies.src.zombie import Zombie
-from pettingzoo.utils import agent_selector, wrappers
+from pettingzoo.utils import AgentSelector, wrappers
 from pettingzoo.utils.conversions import parallel_wrapper_fn
 
 sys.dont_write_bytecode = True
@@ -369,7 +370,7 @@ def __init__(
         self.floor_patch3 = get_image(os.path.join("img", "patch3.png"))
         self.floor_patch4 = get_image(os.path.join("img", "patch4.png"))
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.reinit()
 
     def observation_space(self, agent):

diff --git a/pettingzoo/butterfly/pistonball/pistonball.py b/pettingzoo/butterfly/pistonball/pistonball.py
@@ -89,7 +89,7 @@
 
 from pettingzoo import AECEnv
 from pettingzoo.butterfly.pistonball.manual_policy import ManualPolicy
-from pettingzoo.utils import agent_selector, wrappers
+from pettingzoo.utils import AgentSelector, wrappers
 from pettingzoo.utils.conversions import parallel_wrapper_fn
 
 _image_library = {}
@@ -180,7 +180,7 @@ def __init__(
         self.agents = ["piston_" + str(r) for r in range(self.n_pistons)]
         self.possible_agents = self.agents[:]
         self.agent_name_mapping = dict(zip(self.agents, list(range(self.n_pistons))))
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
 
         self.observation_spaces = dict(
             zip(