From e38bb1021c74104204c065f5892908d6d0eef775 Mon Sep 17 00:00:00 2001 From: Cameron Angliss <50247523+cameronangliss@users.noreply.github.com> Date: Sat, 14 Dec 2024 17:52:41 -0800 Subject: [PATCH 1/9] add open sheet parameter (#654) --- src/poke_env/player/env_player.py | 4 ++++ src/poke_env/player/openai_api.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/poke_env/player/env_player.py b/src/poke_env/player/env_player.py index 7f32b659e..61c0f810f 100644 --- a/src/poke_env/player/env_player.py +++ b/src/poke_env/player/env_player.py @@ -32,6 +32,7 @@ def __init__( save_replays: Union[bool, str] = False, server_configuration: Optional[ServerConfiguration] = None, start_listening: bool = True, + accept_open_team_sheet: Optional[bool] = False, start_timer_on_battle_start: bool = False, ping_interval: Optional[float] = 20.0, ping_timeout: Optional[float] = 20.0, @@ -63,6 +64,8 @@ def __init__( :param start_listening: Whether to start listening to the server. Defaults to True. :type start_listening: bool + :param accept_open_team_sheet: Whether to automatically start the battle with + open team sheets on. Defaults to False. :param start_timer_on_battle_start: Whether to automatically start the battle timer on battle start. Defaults to False. :type start_timer_on_battle_start: bool @@ -100,6 +103,7 @@ def __init__( save_replays=save_replays, server_configuration=server_configuration, start_listening=start_listening, + accept_open_team_sheet=accept_open_team_sheet, start_timer_on_battle_start=start_timer_on_battle_start, team=team, ping_interval=ping_interval, diff --git a/src/poke_env/player/openai_api.py b/src/poke_env/player/openai_api.py index 563d7677d..19270c3d5 100644 --- a/src/poke_env/player/openai_api.py +++ b/src/poke_env/player/openai_api.py @@ -118,6 +118,7 @@ def __init__( server_configuration: Optional[ ServerConfiguration ] = LocalhostServerConfiguration, + accept_open_team_sheet: Optional[bool] = False, start_timer_on_battle_start: bool = False, start_listening: bool = True, ping_interval: Optional[float] = 20.0, @@ -148,6 +149,8 @@ def __init__( :param start_listening: Whether to start listening to the server. Defaults to True. :type start_listening: bool + :param accept_open_team_sheet: Whether to automatically start the battle with + open team sheets on. Defaults to False. :param start_timer_on_battle_start: Whether to automatically start the battle timer on battle start. Defaults to False. :type start_timer_on_battle_start: bool @@ -177,6 +180,7 @@ def __init__( max_concurrent_battles=1, save_replays=save_replays, server_configuration=server_configuration, + accept_open_team_sheet=accept_open_team_sheet, start_timer_on_battle_start=start_timer_on_battle_start, start_listening=start_listening, ping_interval=ping_interval, From 0c1430d0b7a43321e564635eff122b15db56cca5 Mon Sep 17 00:00:00 2001 From: Haris Sahovic Date: Sun, 15 Dec 2024 02:56:33 +0100 Subject: [PATCH 2/9] Up version to 0.8.3 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 22d26e899..3c4e6d563 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "poke_env" -version = "0.8.2" +version = "0.8.3" description = "A python interface for training Reinforcement Learning bots to battle on pokemon showdown." readme = "README.md" requires-python = ">=3.9.0" From 68e550ade6413851a35d567eb7281e642479f23e Mon Sep 17 00:00:00 2001 From: Akash Patel <17132214+acxz@users.noreply.github.com> Date: Mon, 23 Dec 2024 15:13:57 -0500 Subject: [PATCH 3/9] remove openai branding, update gym terminology with gymnasium (#649) --- docs/source/examples/index.rst | 2 +- ...pper.rst => rl_with_gymnasium_wrapper.rst} | 20 +++++++++---------- docs/source/getting_started.rst | 2 +- docs/source/index.rst | 2 +- docs/source/modules/player.rst | 4 ++-- ...openai_example.py => gymnasium_example.py} | 20 +++++++++---------- ..._gym_wrapper.py => rl_with_gym_wrapper.py} | 0 ...rapper.py => rl_with_gymnasium_wrapper.py} | 2 +- integration_tests/test_env_player.py | 12 +++++------ src/poke_env/player/__init__.py | 8 ++++---- src/poke_env/player/env_player.py | 8 ++++---- .../{openai_api.py => gymnasium_api.py} | 14 ++++++------- unit_tests/player/test_env_player.py | 12 +++++------ .../{test_openai.py => test_gymnasium.py} | 6 +++--- 14 files changed, 56 insertions(+), 56 deletions(-) rename docs/source/examples/{rl_with_open_ai_gym_wrapper.rst => rl_with_gymnasium_wrapper.rst} (93%) rename examples/{openai_example.py => gymnasium_example.py} (90%) rename examples/{rl_with_open_ai_gym_wrapper.py => rl_with_gym_wrapper.py} (100%) rename examples/{rl_with_new_open_ai_gym_wrapper.py => rl_with_gymnasium_wrapper.py} (99%) rename src/poke_env/player/{openai_api.py => gymnasium_api.py} (98%) rename unit_tests/player/{test_openai.py => test_gymnasium.py} (96%) diff --git a/docs/source/examples/index.rst b/docs/source/examples/index.rst index a953cc4be..145014727 100644 --- a/docs/source/examples/index.rst +++ b/docs/source/examples/index.rst @@ -11,4 +11,4 @@ This page lists detailled examples demonstrating how to use this package. They a quickstart using_a_custom_teambuilder connecting_to_showdown_and_challenging_humans - rl_with_open_ai_gym_wrapper \ No newline at end of file + rl_with_gymnasium_wrapper diff --git a/docs/source/examples/rl_with_open_ai_gym_wrapper.rst b/docs/source/examples/rl_with_gymnasium_wrapper.rst similarity index 93% rename from docs/source/examples/rl_with_open_ai_gym_wrapper.rst rename to docs/source/examples/rl_with_gymnasium_wrapper.rst index feeef43bf..0f7a970ad 100644 --- a/docs/source/examples/rl_with_open_ai_gym_wrapper.rst +++ b/docs/source/examples/rl_with_gymnasium_wrapper.rst @@ -1,18 +1,18 @@ -.. _rl_with_open_ai_gym_wrapper: +.. _rl_with_gymnasium_wrapper: -Reinforcement learning with the OpenAI Gym wrapper +Reinforcement learning with the Gymnasium wrapper ================================================== -The corresponding complete source code can be found `here `__. +The corresponding complete source code can be found `here `__. -The goal of this example is to demonstrate how to use the `open ai gym `__ interface proposed by ``EnvPlayer``, and to train a simple deep reinforcement learning agent comparable in performance to the ``MaxDamagePlayer`` we created in :ref:`max_damage_player`. +The goal of this example is to demonstrate how to use the `farama gymnasium `__ interface proposed by ``EnvPlayer``, and to train a simple deep reinforcement learning agent comparable in performance to the ``MaxDamagePlayer`` we created in :ref:`max_damage_player`. -.. note:: This example necessitates `keras-rl `__ (compatible with Tensorflow 1.X) or `keras-rl2 `__ (Tensorflow 2.X), which implement numerous reinforcement learning algorithms and offer a simple API fully compatible with the Open AI Gym API. You can install them by running ``pip install keras-rl`` or ``pip install keras-rl2``. If you are unsure, ``pip install keras-rl2`` is recommended. +.. note:: This example necessitates `keras-rl `__ (compatible with Tensorflow 1.X) or `keras-rl2 `__ (Tensorflow 2.X), which implement numerous reinforcement learning algorithms and offer a simple API fully compatible with the Gymnasium API. You can install them by running ``pip install keras-rl`` or ``pip install keras-rl2``. If you are unsure, ``pip install keras-rl2`` is recommended. Implementing rewards and observations ************************************* -The open ai gym API provides *rewards* and *observations* for each step of each episode. In our case, each step corresponds to one decision in a battle and battles correspond to episodes. +The Gymnasium API provides *rewards* and *observations* for each step of each episode. In our case, each step corresponds to one decision in a battle and battles correspond to episodes. Defining observations ^^^^^^^^^^^^^^^^^^^^^ @@ -26,9 +26,9 @@ Observations are embeddings of the current state of the battle. They can be an a To define our observations, we will create a custom ``embed_battle`` method. It takes one argument, a ``Battle`` object, and returns our embedding. -In addition to this, we also need to describe the embedding to the gym interface. +In addition to this, we also need to describe the embedding to the gymnasium interface. To achieve this, we need to implement the ``describe_embedding`` method where we specify the low bound and the high bound -for each component of the embedding vector and return them as a ``gym.Space`` object. +for each component of the embedding vector and return them as a ``gymnasium.Space`` object. Defining rewards ^^^^^^^^^^^^^^^^ @@ -108,7 +108,7 @@ Our player will play the ``gen8randombattle`` format. We can therefore inherit f Instantiating and testing a player ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Now that our custom class is defined, we can instantiate our RL player and test if it's compliant with the OpenAI gym API. +Now that our custom class is defined, we can instantiate our RL player and test if it's compliant with the Gymnasium API. .. code-block:: python @@ -340,7 +340,7 @@ To use the ``cross_evaluate`` method, the strategy is the same to the one used f Final result ************ -Running the `whole file `__ should take a couple of minutes and print something similar to this: +Running the `whole file `__ should take a couple of minutes and print something similar to this: .. code-block:: console diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index c3dcd2be9..3042f9a42 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -41,7 +41,7 @@ Agents in ``poke-env`` are instances of the ``Player`` class. Explore the follow - Basic agent: :ref:`/examples/cross_evaluate_random_players.ipynb` - Advanced agent: :ref:`max_damage_player` -- RL agent: :ref:`rl_with_open_ai_gym_wrapper` +- RL agent: :ref:`rl_with_gymnasium_wrapper` - Using teams: :ref:`ou_max_player` - Custom team builder: :ref:`using_a_custom_teambuilder` diff --git a/docs/source/index.rst b/docs/source/index.rst index 2fe84f098..3711cade9 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -6,7 +6,7 @@ Poke-env: A Python Interface for Training Reinforcement Learning Pokémon Bots Poke-env provides an environment for engaging in `Pokémon Showdown `__ battles with a focus on reinforcement learning. -It boasts a straightforward API for handling Pokémon, Battles, Moves, and other battle-centric objects, alongside an `OpenAI Gym `__ interface for training agents. +It boasts a straightforward API for handling Pokémon, Battles, Moves, and other battle-centric objects, alongside a `Farama Gymnasium `__ interface for training agents. .. attention:: While poke-env aims to support all Pokémon generations, it was primarily developed with the latest generations in mind. If you discover any missing or incorrect functionalities for earlier generations, please `open an issue `__ to help improve the library. diff --git a/docs/source/modules/player.rst b/docs/source/modules/player.rst index eb3e0381e..75b0ee969 100644 --- a/docs/source/modules/player.rst +++ b/docs/source/modules/player.rst @@ -21,10 +21,10 @@ Player :undoc-members: :show-inheritance: -OpenAIGymEnv +GymnasiumEnv ************ -.. automodule:: poke_env.player.openai_api +.. automodule:: poke_env.player.gymnasium_api :members: :undoc-members: :show-inheritance: diff --git a/examples/openai_example.py b/examples/gymnasium_example.py similarity index 90% rename from examples/openai_example.py rename to examples/gymnasium_example.py index b14e634c8..276e325fd 100644 --- a/examples/openai_example.py +++ b/examples/gymnasium_example.py @@ -7,13 +7,13 @@ from poke_env.environment.abstract_battle import AbstractBattle from poke_env.player import ( Gen8EnvSinglePlayer, + GymnasiumEnv, ObservationType, - OpenAIGymEnv, RandomPlayer, ) -class TestEnv(OpenAIGymEnv): +class TestEnv(GymnasiumEnv): def __init__(self, **kwargs): self.opponent = RandomPlayer( battle_format="gen8randombattle", @@ -66,14 +66,14 @@ def describe_embedding(self) -> Space: return Box(np.array([0, 0]), np.array([6, 6]), dtype=int) -def openai_api(): - gym_env = TestEnv( +def gymnasium_api(): + gymnasium_env = TestEnv( battle_format="gen8randombattle", server_configuration=LocalhostServerConfiguration, start_challenging=True, ) - check_env(gym_env) - gym_env.close() + check_env(gymnasium_env) + gymnasium_env.close() def env_player(): @@ -81,16 +81,16 @@ def env_player(): battle_format="gen8randombattle", server_configuration=LocalhostServerConfiguration, ) - gym_env = Gen8( + gymnasium_env = Gen8( battle_format="gen8randombattle", server_configuration=LocalhostServerConfiguration, start_challenging=True, opponent=opponent, ) - check_env(gym_env) - gym_env.close() + check_env(gymnasium_env) + gymnasium_env.close() if __name__ == "__main__": - openai_api() + gymnasium_api() env_player() diff --git a/examples/rl_with_open_ai_gym_wrapper.py b/examples/rl_with_gym_wrapper.py similarity index 100% rename from examples/rl_with_open_ai_gym_wrapper.py rename to examples/rl_with_gym_wrapper.py diff --git a/examples/rl_with_new_open_ai_gym_wrapper.py b/examples/rl_with_gymnasium_wrapper.py similarity index 99% rename from examples/rl_with_new_open_ai_gym_wrapper.py rename to examples/rl_with_gymnasium_wrapper.py index 7bcf23d9e..9248b7edc 100644 --- a/examples/rl_with_new_open_ai_gym_wrapper.py +++ b/examples/rl_with_gymnasium_wrapper.py @@ -72,7 +72,7 @@ def describe_embedding(self) -> Space: async def main(): # First test the environment to ensure the class is consistent - # with the OpenAI API + # with the Gymnasium API opponent = RandomPlayer(battle_format="gen8randombattle") test_env = SimpleRLPlayer( battle_format="gen8randombattle", start_challenging=True, opponent=opponent diff --git a/integration_tests/test_env_player.py b/integration_tests/test_env_player.py index a7ef4ba68..cfc761926 100644 --- a/integration_tests/test_env_player.py +++ b/integration_tests/test_env_player.py @@ -90,7 +90,7 @@ def play_function(player, n_battles): @pytest.mark.timeout(30) -def test_random_gym_players_gen4(): +def test_random_gymnasium_players_gen4(): random_player = RandomPlayer(battle_format="gen4randombattle", log_level=25) env_player = RandomGen4EnvPlayer( log_level=25, opponent=random_player, start_challenging=False @@ -100,7 +100,7 @@ def test_random_gym_players_gen4(): @pytest.mark.timeout(30) -def test_random_gym_players_gen5(): +def test_random_gymnasium_players_gen5(): random_player = RandomPlayer(battle_format="gen5randombattle", log_level=25) env_player = RandomGen5EnvPlayer( log_level=25, opponent=random_player, start_challenging=False @@ -110,7 +110,7 @@ def test_random_gym_players_gen5(): @pytest.mark.timeout(30) -def test_random_gym_players_gen6(): +def test_random_gymnasium_players_gen6(): random_player = RandomPlayer(battle_format="gen6randombattle", log_level=25) env_player = RandomGen6EnvPlayer( log_level=25, opponent=random_player, start_challenging=False @@ -120,7 +120,7 @@ def test_random_gym_players_gen6(): @pytest.mark.timeout(30) -def test_random_gym_players_gen7(): +def test_random_gymnasium_players_gen7(): random_player = RandomPlayer(battle_format="gen7randombattle", log_level=25) env_player = RandomGen7EnvPlayer( log_level=25, opponent=random_player, start_challenging=False @@ -130,7 +130,7 @@ def test_random_gym_players_gen7(): @pytest.mark.timeout(30) -def test_random_gym_players_gen8(): +def test_random_gymnasium_players_gen8(): random_player = RandomPlayer(battle_format="gen8randombattle", log_level=25) env_player = RandomGen8EnvPlayer( log_level=25, opponent=random_player, start_challenging=False @@ -140,7 +140,7 @@ def test_random_gym_players_gen8(): @pytest.mark.timeout(30) -def test_random_gym_players_gen9(): +def test_random_gymnasium_players_gen9(): random_player = RandomPlayer(battle_format="gen9randombattle", log_level=25) env_player = RandomGen9EnvPlayer( log_level=25, opponent=random_player, start_challenging=False diff --git a/src/poke_env/player/__init__.py b/src/poke_env/player/__init__.py index 0f88467cc..b364bcec8 100644 --- a/src/poke_env/player/__init__.py +++ b/src/poke_env/player/__init__.py @@ -2,7 +2,7 @@ """ from poke_env.concurrency import POKE_LOOP -from poke_env.player import env_player, openai_api, player, random_player, utils +from poke_env.player import env_player, gymnasium_api, player, random_player, utils from poke_env.player.baselines import MaxBasePowerPlayer, SimpleHeuristicsPlayer from poke_env.player.battle_order import ( BattleOrder, @@ -19,7 +19,7 @@ Gen8EnvSinglePlayer, Gen9EnvSinglePlayer, ) -from poke_env.player.openai_api import ActType, ObsType, OpenAIGymEnv +from poke_env.player.gymnasium_api import ActType, GymnasiumEnv, ObsType from poke_env.player.player import Player from poke_env.player.random_player import RandomPlayer from poke_env.player.utils import ( @@ -32,7 +32,7 @@ __all__ = [ "env_player", - "openai_api", + "gymnasium_api", "player", "random_player", "utils", @@ -47,7 +47,7 @@ "Gen8EnvSinglePlayer", "Gen9EnvSinglePlayer", "POKE_LOOP", - "OpenAIGymEnv", + "GymnasiumEnv", "PSClient", "Player", "RandomPlayer", diff --git a/src/poke_env/player/env_player.py b/src/poke_env/player/env_player.py index 61c0f810f..fb4ce7803 100644 --- a/src/poke_env/player/env_player.py +++ b/src/poke_env/player/env_player.py @@ -1,4 +1,4 @@ -"""This module defines a player class exposing the Open AI Gym API with utility functions. +"""This module defines a player class exposing the Gymnasium API with utility functions. """ from abc import ABC @@ -8,15 +8,15 @@ from poke_env.environment.abstract_battle import AbstractBattle from poke_env.player.battle_order import BattleOrder, ForfeitBattleOrder -from poke_env.player.openai_api import ActType, ObsType, OpenAIGymEnv +from poke_env.player.gymnasium_api import ActType, GymnasiumEnv, ObsType from poke_env.player.player import Player from poke_env.ps_client.account_configuration import AccountConfiguration from poke_env.ps_client.server_configuration import ServerConfiguration from poke_env.teambuilder.teambuilder import Teambuilder -class EnvPlayer(OpenAIGymEnv[ObsType, ActType], ABC): - """Player exposing the Open AI Gym Env API.""" +class EnvPlayer(GymnasiumEnv[ObsType, ActType], ABC): + """Player exposing the Gymnasium Env API.""" _ACTION_SPACE: List[int] = [] _DEFAULT_BATTLE_FORMAT = "gen8randombattle" diff --git a/src/poke_env/player/openai_api.py b/src/poke_env/player/gymnasium_api.py similarity index 98% rename from src/poke_env/player/openai_api.py rename to src/poke_env/player/gymnasium_api.py index 19270c3d5..72a585d64 100644 --- a/src/poke_env/player/openai_api.py +++ b/src/poke_env/player/gymnasium_api.py @@ -1,4 +1,4 @@ -"""This module defines a player class with the OpenAI API on the main thread. +"""This module defines a player class with the Gymnasium API on the main thread. For a black-box implementation consider using the module env_player. """ @@ -62,7 +62,7 @@ class _AsyncPlayer(Generic[ObsType, ActType], Player): def __init__( self, - user_funcs: OpenAIGymEnv[ObsType, ActType], + user_funcs: GymnasiumEnv[ObsType, ActType], username: str, **kwargs: Any, ): @@ -94,12 +94,12 @@ def _battle_finished_callback(self, battle: AbstractBattle): asyncio.run_coroutine_threadsafe(self.observations.async_put(to_put), POKE_LOOP) -class OpenAIGymEnv( +class GymnasiumEnv( Env[ObsType, ActType], ABC, ): """ - Base class implementing the OpenAI Gym API on the main thread. + Base class implementing the Gymnasium API on the main thread. """ _INIT_RETRIES = 100 @@ -239,7 +239,7 @@ def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder: def embed_battle(self, battle: AbstractBattle) -> ObsType: """ Returns the embedding of the current battle state in a format compatible with - the OpenAI gym API. + the Gymnasium API. :param battle: The current battle state. :type battle: AbstractBattle @@ -416,7 +416,7 @@ def close(self, purge: bool = True): def background_send_challenge(self, username: str): """ Sends a single challenge specified player. The function immediately returns - to allow use of the OpenAI gym API. + to allow use of the Gymnasium API. :param username: The username of the player to challenge. :type username: str @@ -434,7 +434,7 @@ def background_send_challenge(self, username: str): def background_accept_challenge(self, username: str): """ Accepts a single challenge specified player. The function immediately returns - to allow use of the OpenAI gym API. + to allow use of the Gymnasium API. :param username: The username of the player to challenge. :type username: str diff --git a/unit_tests/player/test_env_player.py b/unit_tests/player/test_env_player.py index b079de6b4..56b77b1f4 100644 --- a/unit_tests/player/test_env_player.py +++ b/unit_tests/player/test_env_player.py @@ -19,7 +19,7 @@ Gen9EnvSinglePlayer, RandomPlayer, ) -from poke_env.player.openai_api import _AsyncPlayer +from poke_env.player.gymnasium_api import _AsyncPlayer account_configuration = AccountConfiguration("username", "password") server_configuration = ServerConfiguration("server.url", "auth.url") @@ -42,15 +42,15 @@ def embed_battle(self, battle): def test_init(): - gym_env = CustomEnvPlayer( + gymnasium_env = CustomEnvPlayer( None, account_configuration=account_configuration, server_configuration=server_configuration, start_listening=False, battle_format="gen7randombattles", ) - player = gym_env.agent - assert isinstance(gym_env, CustomEnvPlayer) + player = gymnasium_env.agent + assert isinstance(gymnasium_env, CustomEnvPlayer) assert isinstance(player, _AsyncPlayer) @@ -60,11 +60,11 @@ async def __call__(self, *args, **kwargs): @patch( - "poke_env.player.openai_api._AsyncQueue.async_get", + "poke_env.player.gymnasium_api._AsyncQueue.async_get", return_value=2, new_callable=AsyncMock, ) -@patch("poke_env.player.openai_api._AsyncQueue.async_put", new_callable=AsyncMock) +@patch("poke_env.player.gymnasium_api._AsyncQueue.async_put", new_callable=AsyncMock) def test_choose_move(queue_put_mock, queue_get_mock): player = CustomEnvPlayer( None, diff --git a/unit_tests/player/test_openai.py b/unit_tests/player/test_gymnasium.py similarity index 96% rename from unit_tests/player/test_openai.py rename to unit_tests/player/test_gymnasium.py index 40e9ec82a..e40c0fd7d 100644 --- a/unit_tests/player/test_openai.py +++ b/unit_tests/player/test_gymnasium.py @@ -10,14 +10,14 @@ ActType, BattleOrder, ForfeitBattleOrder, + GymnasiumEnv, ObsType, - OpenAIGymEnv, Player, ) -from poke_env.player.openai_api import _AsyncPlayer, _AsyncQueue +from poke_env.player.gymnasium_api import _AsyncPlayer, _AsyncQueue -class DummyEnv(OpenAIGymEnv[ObsType, ActType]): +class DummyEnv(GymnasiumEnv[ObsType, ActType]): def __init__(self, *args, **kwargs): self.opponent = None super().__init__(*args, **kwargs) From 2f98f418426f8a10c15b9197b39b49b80852b1dc Mon Sep 17 00:00:00 2001 From: Cameron Angliss <50247523+cameronangliss@users.noreply.github.com> Date: Wed, 25 Dec 2024 14:14:20 -0800 Subject: [PATCH 4/9] open_timeout is configurable (#665) --- src/poke_env/player/env_player.py | 7 +++++++ src/poke_env/player/gymnasium_api.py | 7 +++++++ src/poke_env/player/player.py | 7 +++++++ src/poke_env/ps_client/ps_client.py | 8 ++++++++ 4 files changed, 29 insertions(+) diff --git a/src/poke_env/player/env_player.py b/src/poke_env/player/env_player.py index fb4ce7803..d7526f622 100644 --- a/src/poke_env/player/env_player.py +++ b/src/poke_env/player/env_player.py @@ -34,6 +34,7 @@ def __init__( start_listening: bool = True, accept_open_team_sheet: Optional[bool] = False, start_timer_on_battle_start: bool = False, + open_timeout: Optional[float] = 10.0, ping_interval: Optional[float] = 20.0, ping_timeout: Optional[float] = 20.0, team: Optional[Union[str, Teambuilder]] = None, @@ -69,6 +70,11 @@ def __init__( :param start_timer_on_battle_start: Whether to automatically start the battle timer on battle start. Defaults to False. :type start_timer_on_battle_start: bool + :param open_timeout: How long to wait for a timeout when connecting the socket + (important for backend websockets. + Increase only if timeouts occur during runtime). + If None connect will never time out. + :type open_timeout: float, optional :param ping_interval: How long between keepalive pings (Important for backend websockets). If None, disables keepalive entirely. :type ping_interval: float, optional @@ -106,6 +112,7 @@ def __init__( accept_open_team_sheet=accept_open_team_sheet, start_timer_on_battle_start=start_timer_on_battle_start, team=team, + open_timeout=open_timeout, ping_interval=ping_interval, ping_timeout=ping_timeout, start_challenging=start_challenging, diff --git a/src/poke_env/player/gymnasium_api.py b/src/poke_env/player/gymnasium_api.py index 72a585d64..c10793e6b 100644 --- a/src/poke_env/player/gymnasium_api.py +++ b/src/poke_env/player/gymnasium_api.py @@ -121,6 +121,7 @@ def __init__( accept_open_team_sheet: Optional[bool] = False, start_timer_on_battle_start: bool = False, start_listening: bool = True, + open_timeout: Optional[float] = 10.0, ping_interval: Optional[float] = 20.0, ping_timeout: Optional[float] = 20.0, team: Optional[Union[str, Teambuilder]] = None, @@ -154,6 +155,11 @@ def __init__( :param start_timer_on_battle_start: Whether to automatically start the battle timer on battle start. Defaults to False. :type start_timer_on_battle_start: bool + :param open_timeout: How long to wait for a timeout when connecting the socket + (important for backend websockets. + Increase only if timeouts occur during runtime). + If None connect will never time out. + :type open_timeout: float, optional :param ping_interval: How long between keepalive pings (Important for backend websockets). If None, disables keepalive entirely. :type ping_interval: float, optional @@ -183,6 +189,7 @@ def __init__( accept_open_team_sheet=accept_open_team_sheet, start_timer_on_battle_start=start_timer_on_battle_start, start_listening=start_listening, + open_timeout=open_timeout, ping_interval=ping_interval, ping_timeout=ping_timeout, team=team, diff --git a/src/poke_env/player/player.py b/src/poke_env/player/player.py index 302b00d81..2093f3491 100644 --- a/src/poke_env/player/player.py +++ b/src/poke_env/player/player.py @@ -61,6 +61,7 @@ def __init__( server_configuration: Optional[ServerConfiguration] = None, start_timer_on_battle_start: bool = False, start_listening: bool = True, + open_timeout: Optional[float] = 10.0, ping_interval: Optional[float] = 20.0, ping_timeout: Optional[float] = 20.0, team: Optional[Union[str, Teambuilder]] = None, @@ -93,6 +94,11 @@ def __init__( :param start_listening: Whether to start listening to the server. Defaults to True. :type start_listening: bool + :param open_timeout: How long to wait for a timeout when connecting the socket + (important for backend websockets. + Increase only if timeouts occur during runtime). + If None connect will never time out. + :type open_timeout: float, optional :param ping_interval: How long between keepalive pings (Important for backend websockets). If None, disables keepalive entirely. :type ping_interval: float, optional @@ -121,6 +127,7 @@ def __init__( log_level=log_level, server_configuration=server_configuration, start_listening=start_listening, + open_timeout=open_timeout, ping_interval=ping_interval, ping_timeout=ping_timeout, ) diff --git a/src/poke_env/ps_client/ps_client.py b/src/poke_env/ps_client/ps_client.py index 601592877..8199b7da6 100644 --- a/src/poke_env/ps_client/ps_client.py +++ b/src/poke_env/ps_client/ps_client.py @@ -41,6 +41,7 @@ def __init__( log_level: Optional[int] = None, server_configuration: ServerConfiguration, start_listening: bool = True, + open_timeout: Optional[float] = 10.0, ping_interval: Optional[float] = 20.0, ping_timeout: Optional[float] = 20.0, ): @@ -56,6 +57,11 @@ def __init__( :param start_listening: Whether to start listening to the server. Defaults to True. :type start_listening: bool + :param open_timeout: How long to wait for a timeout when connecting the socket + (important for backend websockets. + Increase only if timeouts occur during runtime). + If None connect will never time out. + :type open_timeout: float, optional :param ping_interval: How long between keepalive pings (Important for backend websockets). If None, disables keepalive entirely. :type ping_interval: float, optional @@ -66,6 +72,7 @@ def __init__( :type ping_timeout: float, optional """ self._active_tasks: Set[Any] = set() + self._open_timeout = open_timeout self._ping_interval = ping_interval self._ping_timeout = ping_timeout @@ -210,6 +217,7 @@ async def listen(self): async with ws.connect( self.websocket_url, max_queue=None, + open_timeout=self._open_timeout, ping_interval=self._ping_interval, ping_timeout=self._ping_timeout, ) as websocket: From 128a9437bc4abd1f2c3716c098b9d204d33ad199 Mon Sep 17 00:00:00 2001 From: Cameron Angliss <50247523+cameronangliss@users.noreply.github.com> Date: Thu, 2 Jan 2025 18:55:23 -0500 Subject: [PATCH 5/9] Revival Blessing Bugfix (#667) --- src/poke_env/environment/pokemon.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/poke_env/environment/pokemon.py b/src/poke_env/environment/pokemon.py index 35387fa36..7d9b260d5 100644 --- a/src/poke_env/environment/pokemon.py +++ b/src/poke_env/environment/pokemon.py @@ -276,6 +276,8 @@ def forme_change(self, species: str): def heal(self, hp_status: str): self.set_hp_status(hp_status) + if self.fainted: + self._status = None def invert_boosts(self): self._boosts = {k: -v for k, v in self._boosts.items()} From 9b5556e42beca9fe2dde6203fe8403e2f1c8a9d2 Mon Sep 17 00:00:00 2001 From: Cameron Angliss <50247523+cameronangliss@users.noreply.github.com> Date: Thu, 2 Jan 2025 19:08:31 -0500 Subject: [PATCH 6/9] Improved `battle_against` API (#657) * improved api * fix test * update docstring * make the API more consistent * unused import * whoops, forgot to fix this signature * accidental extra diff * unused import * remove unnecessary part --- src/poke_env/player/gymnasium_api.py | 4 +-- src/poke_env/player/player.py | 35 +++++++++++++++------------ src/poke_env/player/utils.py | 33 ++++++++----------------- unit_tests/player/test_player_misc.py | 24 ++++++++---------- 4 files changed, 42 insertions(+), 54 deletions(-) diff --git a/src/poke_env/player/gymnasium_api.py b/src/poke_env/player/gymnasium_api.py index c10793e6b..7be39f6b3 100644 --- a/src/poke_env/player/gymnasium_api.py +++ b/src/poke_env/player/gymnasium_api.py @@ -465,7 +465,7 @@ async def _challenge_loop( while self._keep_challenging: opponent = self._get_opponent() if isinstance(opponent, Player): - await self.agent.battle_against(opponent, 1) + await self.agent.battle_against(opponent, n_battles=1) else: await self.agent.send_challenges(opponent, 1) if callback and self.current_battle is not None: @@ -474,7 +474,7 @@ async def _challenge_loop( for _ in range(n_challenges): opponent = self._get_opponent() if isinstance(opponent, Player): - await self.agent.battle_against(opponent, 1) + await self.agent.battle_against(opponent, n_battles=1) else: await self.agent.send_challenges(opponent, 1) if callback and self.current_battle is not None: diff --git a/src/poke_env/player/player.py b/src/poke_env/player/player.py index 2093f3491..fd26f9eb7 100644 --- a/src/poke_env/player/player.py +++ b/src/poke_env/player/player.py @@ -1,6 +1,8 @@ """This module defines a base class for players. """ +from __future__ import annotations + import asyncio import random from abc import ABC, abstractmethod @@ -699,28 +701,31 @@ async def _ladder(self, n_games: int): perf_counter() - start_time, ) - async def battle_against(self, opponent: "Player", n_battles: int = 1): - """Make the player play n_battles against opponent. + async def battle_against(self, *opponents: Player, n_battles: int = 1): + """Make the player play n_battles against the given opponents. - This function is a wrapper around send_challenges and accept challenges. + This function is a wrapper around send_challenges and accept_challenges. - :param opponent: The opponent to play against. - :type opponent: Player + :param opponents: The opponents to play against. + :type opponents: Player :param n_battles: The number of games to play. Defaults to 1. :type n_battles: int """ - await handle_threaded_coroutines(self._battle_against(opponent, n_battles)) - - async def _battle_against(self, opponent: "Player", n_battles: int): - await asyncio.gather( - self.send_challenges( - to_id_str(opponent.username), - n_battles, - to_wait=opponent.ps_client.logged_in, - ), - opponent.accept_challenges(to_id_str(self.username), n_battles), + await handle_threaded_coroutines( + self._battle_against(*opponents, n_battles=n_battles) ) + async def _battle_against(self, *opponents: Player, n_battles: int): + for opponent in opponents: + await asyncio.gather( + self.send_challenges( + to_id_str(opponent.username), + n_battles, + to_wait=opponent.ps_client.logged_in, + ), + opponent.accept_challenges(to_id_str(self.username), n_battles), + ) + async def send_challenges( self, opponent: str, n_challenges: int, to_wait: Optional[Event] = None ): diff --git a/src/poke_env/player/utils.py b/src/poke_env/player/utils.py index e2d9dc24d..3dd9c4256 100644 --- a/src/poke_env/player/utils.py +++ b/src/poke_env/player/utils.py @@ -7,7 +7,6 @@ from typing import Dict, List, Optional, Tuple from poke_env.concurrency import POKE_LOOP -from poke_env.data import to_id_str from poke_env.player.baselines import MaxBasePowerPlayer, SimpleHeuristicsPlayer from poke_env.player.player import Player from poke_env.player.random_player import RandomPlayer @@ -31,29 +30,17 @@ async def cross_evaluate( players: List[Player], n_challenges: int ) -> Dict[str, Dict[str, Optional[float]]]: results: Dict[str, Dict[str, Optional[float]]] = { - p_1.username: {p_2.username: None for p_2 in players} for p_1 in players + p1.username: {p2.username: None for p2 in players} for p1 in players } - for i, p_1 in enumerate(players): - for j, p_2 in enumerate(players): + for i, p1 in enumerate(players): + for j, p2 in enumerate(players): if j <= i: continue - await asyncio.gather( - p_1.send_challenges( - opponent=to_id_str(p_2.username), - n_challenges=n_challenges, - to_wait=p_2.ps_client.logged_in, - ), - p_2.accept_challenges( - opponent=to_id_str(p_1.username), - n_challenges=n_challenges, - packed_team=p_2.next_team, - ), - ) - results[p_1.username][p_2.username] = p_1.win_rate - results[p_2.username][p_1.username] = p_2.win_rate - - p_1.reset_battles() - p_2.reset_battles() + await p1.battle_against(p2, n_battles=n_challenges) + results[p1.username][p2.username] = p1.win_rate + results[p2.username][p1.username] = p2.win_rate + p1.reset_battles() + p2.reset_battles() return results @@ -170,7 +157,7 @@ async def evaluate_player( baselines = [p(max_concurrent_battles=n_battles) for p in _EVALUATION_RATINGS] # type: ignore for p in baselines: - await p.battle_against(player, n_placement_battles) + await p.battle_against(player, n_battles=n_placement_battles) # Select the best opponent for evaluation best_opp = min( @@ -179,7 +166,7 @@ async def evaluate_player( # Performing the main evaluation remaining_battles = n_battles - len(_EVALUATION_RATINGS) * n_placement_battles - await best_opp.battle_against(player, remaining_battles) + await best_opp.battle_against(player, n_battles=remaining_battles) return _estimate_strength_from_results( best_opp.n_finished_battles, diff --git a/unit_tests/player/test_player_misc.py b/unit_tests/player/test_player_misc.py index 07ef9e15e..9c86d35c4 100644 --- a/unit_tests/player/test_player_misc.py +++ b/unit_tests/player/test_player_misc.py @@ -1,8 +1,8 @@ -from collections import namedtuple from unittest.mock import MagicMock, patch import pytest +from poke_env import AccountConfiguration from poke_env.environment import AbstractBattle, Battle, DoubleBattle, Move, PokemonType from poke_env.player import BattleOrder, Player, RandomPlayer, cross_evaluate from poke_env.stats import _raw_hp, _raw_stat @@ -13,7 +13,13 @@ def choose_move(self, battle: AbstractBattle) -> BattleOrder: return self.choose_random_move(battle) -class FixedWinRatePlayer: +class FixedWinRatePlayer(Player): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def choose_move(self, battle: AbstractBattle) -> BattleOrder: + return self.choose_random_move(battle) + async def accept_challenges(self, *args, **kwargs): pass @@ -27,14 +33,6 @@ def reset_battles(self): def win_rate(self): return 0.5 - @property - def next_team(self): - return None - - @property - def ps_client(self): - return namedtuple("PSClient", "logged_in")(logged_in=None) - def test_player_default_order(): assert SimplePlayer().choose_default_move().message == "/choose default" @@ -208,11 +206,9 @@ async def test_basic_challenge_handling(): @pytest.mark.asyncio async def test_cross_evaluate(): - p1 = FixedWinRatePlayer() - p2 = FixedWinRatePlayer() + p1 = FixedWinRatePlayer(account_configuration=AccountConfiguration("p1", None)) + p2 = FixedWinRatePlayer(account_configuration=AccountConfiguration("p2", None)) - p1.username = "p1" - p2.username = "p2" cross_evaluation = await cross_evaluate([p1, p2], 10) assert cross_evaluation == { "p1": {"p1": None, "p2": 0.5}, From 5eaeedfabcde6d5548d6005185ee0c77afe75837 Mon Sep 17 00:00:00 2001 From: Cameron Angliss <50247523+cameronangliss@users.noreply.github.com> Date: Thu, 2 Jan 2025 19:29:36 -0500 Subject: [PATCH 7/9] PettingZoo Integration (#664) * adopting pettingzoo * fix tests * fixing tests * using staticness and removing dead stuff * fix test * fix test * fix test * format and fix tests * fix test * fix tests * fix test * get rid of seed usage for now * cleanup and bugfixes * try to appease mypy * hopefully stop complaining * now we should be good * a better way * and this * ? * format * simplify * unused import * log * aligning diff a bit more * handle wait situation * better parity with previous version * try this, it looks sus though * log * try this maybe idk * more leeway * try this * debugging * this should give a nice runtime improvement * assign possible_agents * following API rules * fix bugs * more bugfixing * bugfix * remove clutter * tinkering * more tinkering * add timeout for action-putting as well * debugging * just need to fix close() now I think * better type ignore * symmetry * log * log * experiment * tinkering * attempt fix * let's try sticking to the basics * log * log * just kill it * log * confused * relax * tinkering more * more logs * try this * bugfixing * simplifications, and better action-putting logic * more logging and slight improvement * trying to make battle tracking better * bugfix * logs * better coordination * log * adjustments * furthering consistency * faster * trying to solve battle reference bug * investigating * experiment * more sussing * fix * hmm * this is sus bro * prints * better * im going crazy * debugging * debugging * debugging * diagnosis * bugfixing * this is a scary bug jeez * fix * bugfix * remove insane amount of prints lol * more cleanup, put back the forfeit thing * more cleanup * back to the old timeout * cleanup * staying faithful to original logic * safer * cleaning diff * timeout go back down * you can give 2 account configurations * bugfix --- integration_tests/test_env_player.py | 94 ++---- requirements.txt | 1 + src/poke_env/player/__init__.py | 6 +- src/poke_env/player/env_player.py | 112 +++----- src/poke_env/player/gymnasium_api.py | 416 +++++++++++++-------------- unit_tests/player/test_env_player.py | 73 ++--- unit_tests/player/test_gymnasium.py | 32 +-- 7 files changed, 296 insertions(+), 438 deletions(-) diff --git a/integration_tests/test_env_player.py b/integration_tests/test_env_player.py index cfc761926..aa5c74177 100644 --- a/integration_tests/test_env_player.py +++ b/integration_tests/test_env_player.py @@ -1,7 +1,7 @@ import numpy as np import pytest from gymnasium.spaces import Box, Space -from gymnasium.utils.env_checker import check_env +from pettingzoo.test.parallel_test import parallel_api_test from poke_env.player import ( Gen4EnvSinglePlayer, @@ -10,7 +10,6 @@ Gen7EnvSinglePlayer, Gen8EnvSinglePlayer, Gen9EnvSinglePlayer, - RandomPlayer, ) @@ -80,81 +79,61 @@ def embed_battle(self, battle): return np.array([0]) -def play_function(player, n_battles): +def play_function(env, n_battles): for _ in range(n_battles): done = False - player.reset() + env.reset() while not done: - _, _, terminated, truncated, _ = player.step(player.action_space.sample()) - done = terminated or truncated + actions = {name: env.action_space(name).sample() for name in env.agents} + _, _, terminated, truncated, _ = env.step(actions) + done = any(terminated.values()) or any(truncated.values()) @pytest.mark.timeout(30) def test_random_gymnasium_players_gen4(): - random_player = RandomPlayer(battle_format="gen4randombattle", log_level=25) - env_player = RandomGen4EnvPlayer( - log_level=25, opponent=random_player, start_challenging=False - ) + env_player = RandomGen4EnvPlayer(log_level=25, start_challenging=False) env_player.start_challenging(3) play_function(env_player, 3) @pytest.mark.timeout(30) def test_random_gymnasium_players_gen5(): - random_player = RandomPlayer(battle_format="gen5randombattle", log_level=25) - env_player = RandomGen5EnvPlayer( - log_level=25, opponent=random_player, start_challenging=False - ) + env_player = RandomGen5EnvPlayer(log_level=25, start_challenging=False) env_player.start_challenging(3) play_function(env_player, 3) @pytest.mark.timeout(30) def test_random_gymnasium_players_gen6(): - random_player = RandomPlayer(battle_format="gen6randombattle", log_level=25) - env_player = RandomGen6EnvPlayer( - log_level=25, opponent=random_player, start_challenging=False - ) + env_player = RandomGen6EnvPlayer(log_level=25, start_challenging=False) env_player.start_challenging(3) play_function(env_player, 3) @pytest.mark.timeout(30) def test_random_gymnasium_players_gen7(): - random_player = RandomPlayer(battle_format="gen7randombattle", log_level=25) - env_player = RandomGen7EnvPlayer( - log_level=25, opponent=random_player, start_challenging=False - ) + env_player = RandomGen7EnvPlayer(log_level=25, start_challenging=False) env_player.start_challenging(3) play_function(env_player, 3) @pytest.mark.timeout(30) def test_random_gymnasium_players_gen8(): - random_player = RandomPlayer(battle_format="gen8randombattle", log_level=25) - env_player = RandomGen8EnvPlayer( - log_level=25, opponent=random_player, start_challenging=False - ) + env_player = RandomGen8EnvPlayer(log_level=25, start_challenging=False) env_player.start_challenging(3) play_function(env_player, 3) @pytest.mark.timeout(30) def test_random_gymnasium_players_gen9(): - random_player = RandomPlayer(battle_format="gen9randombattle", log_level=25) - env_player = RandomGen9EnvPlayer( - log_level=25, opponent=random_player, start_challenging=False - ) + env_player = RandomGen9EnvPlayer(log_level=25, start_challenging=False) env_player.start_challenging(3) play_function(env_player, 3) @pytest.mark.timeout(60) def test_two_successive_calls_gen8(): - random_player = RandomPlayer(battle_format="gen8randombattle", log_level=25) - env_player = RandomGen8EnvPlayer( - log_level=25, opponent=random_player, start_challenging=False - ) + env_player = RandomGen8EnvPlayer(log_level=25, start_challenging=False) env_player.start_challenging(2) play_function(env_player, 2) env_player.start_challenging(2) @@ -163,10 +142,7 @@ def test_two_successive_calls_gen8(): @pytest.mark.timeout(60) def test_two_successive_calls_gen9(): - random_player = RandomPlayer(battle_format="gen9randombattle", log_level=25) - env_player = RandomGen9EnvPlayer( - log_level=25, opponent=random_player, start_challenging=False - ) + env_player = RandomGen9EnvPlayer(log_level=25, start_challenging=False) env_player.start_challenging(2) play_function(env_player, 2) env_player.start_challenging(2) @@ -175,39 +151,21 @@ def test_two_successive_calls_gen9(): @pytest.mark.timeout(60) def test_check_envs(): - random_player = RandomPlayer(battle_format="gen4randombattle", log_level=25) - env_player_gen4 = RandomGen4EnvPlayer( - log_level=25, opponent=random_player, start_challenging=True - ) - check_env(env_player_gen4) + env_player_gen4 = RandomGen4EnvPlayer(log_level=25, start_challenging=True) + parallel_api_test(env_player_gen4) env_player_gen4.close() - random_player = RandomPlayer(battle_format="gen5randombattle", log_level=25) - env_player_gen5 = RandomGen5EnvPlayer( - log_level=25, opponent=random_player, start_challenging=True - ) - check_env(env_player_gen5) + env_player_gen5 = RandomGen5EnvPlayer(log_level=25, start_challenging=True) + parallel_api_test(env_player_gen5) env_player_gen5.close() - random_player = RandomPlayer(battle_format="gen6randombattle", log_level=25) - env_player_gen6 = RandomGen6EnvPlayer( - log_level=25, opponent=random_player, start_challenging=True - ) - check_env(env_player_gen6) + env_player_gen6 = RandomGen6EnvPlayer(log_level=25, start_challenging=True) + parallel_api_test(env_player_gen6) env_player_gen6.close() - random_player = RandomPlayer(battle_format="gen7randombattle", log_level=25) - env_player_gen7 = RandomGen7EnvPlayer( - log_level=25, opponent=random_player, start_challenging=True - ) - check_env(env_player_gen7) + env_player_gen7 = RandomGen7EnvPlayer(log_level=25, start_challenging=True) + parallel_api_test(env_player_gen7) env_player_gen7.close() - random_player = RandomPlayer(battle_format="gen8randombattle", log_level=25) - env_player_gen8 = RandomGen8EnvPlayer( - log_level=25, opponent=random_player, start_challenging=True - ) - check_env(env_player_gen8) + env_player_gen8 = RandomGen8EnvPlayer(log_level=25, start_challenging=True) + parallel_api_test(env_player_gen8) env_player_gen8.close() - random_player = RandomPlayer(battle_format="gen9randombattle", log_level=25) - env_player_gen9 = RandomGen9EnvPlayer( - log_level=25, opponent=random_player, start_challenging=True - ) - check_env(env_player_gen9) + env_player_gen9 = RandomGen9EnvPlayer(log_level=25, start_challenging=True) + parallel_api_test(env_player_gen9) env_player_gen9.close() diff --git a/requirements.txt b/requirements.txt index 75f2fa17b..fbe09f617 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ gymnasium numpy orjson +pettingzoo requests tabulate websockets==12.0 diff --git a/src/poke_env/player/__init__.py b/src/poke_env/player/__init__.py index b364bcec8..eba6d1f9e 100644 --- a/src/poke_env/player/__init__.py +++ b/src/poke_env/player/__init__.py @@ -1,6 +1,8 @@ """poke_env.player module init. """ +from pettingzoo.utils.env import ActionType, ObsType # type: ignore[import-untyped] + from poke_env.concurrency import POKE_LOOP from poke_env.player import env_player, gymnasium_api, player, random_player, utils from poke_env.player.baselines import MaxBasePowerPlayer, SimpleHeuristicsPlayer @@ -19,7 +21,7 @@ Gen8EnvSinglePlayer, Gen9EnvSinglePlayer, ) -from poke_env.player.gymnasium_api import ActType, GymnasiumEnv, ObsType +from poke_env.player.gymnasium_api import GymnasiumEnv from poke_env.player.player import Player from poke_env.player.random_player import RandomPlayer from poke_env.player.utils import ( @@ -36,7 +38,7 @@ "player", "random_player", "utils", - "ActType", + "ActionType", "ObsType", "EnvPlayer", "ForfeitBattleOrder", diff --git a/src/poke_env/player/env_player.py b/src/poke_env/player/env_player.py index d7526f622..2ae0a623e 100644 --- a/src/poke_env/player/env_player.py +++ b/src/poke_env/player/env_player.py @@ -1,21 +1,19 @@ """This module defines a player class exposing the Gymnasium API with utility functions. """ -from abc import ABC -from threading import Lock from typing import List, Optional, Union from weakref import WeakKeyDictionary from poke_env.environment.abstract_battle import AbstractBattle from poke_env.player.battle_order import BattleOrder, ForfeitBattleOrder -from poke_env.player.gymnasium_api import ActType, GymnasiumEnv, ObsType +from poke_env.player.gymnasium_api import GymnasiumEnv from poke_env.player.player import Player from poke_env.ps_client.account_configuration import AccountConfiguration from poke_env.ps_client.server_configuration import ServerConfiguration from poke_env.teambuilder.teambuilder import Teambuilder -class EnvPlayer(GymnasiumEnv[ObsType, ActType], ABC): +class EnvPlayer(GymnasiumEnv): """Player exposing the Gymnasium Env API.""" _ACTION_SPACE: List[int] = [] @@ -23,8 +21,8 @@ class EnvPlayer(GymnasiumEnv[ObsType, ActType], ABC): def __init__( self, - opponent: Optional[Union[Player, str]], - account_configuration: Optional[AccountConfiguration] = None, + account_configuration1: Optional[AccountConfiguration] = None, + account_configuration2: Optional[AccountConfiguration] = None, *, avatar: Optional[int] = None, battle_format: Optional[str] = None, @@ -94,15 +92,12 @@ def __init__( self._reward_buffer: WeakKeyDictionary[AbstractBattle, float] = ( WeakKeyDictionary() ) - self._opponent_lock = Lock() - self._opponent = opponent b_format = self._DEFAULT_BATTLE_FORMAT if battle_format: b_format = battle_format - if opponent is None: - start_challenging = False super().__init__( - account_configuration=account_configuration, + account_configuration1=account_configuration1, + account_configuration2=account_configuration2, avatar=avatar, battle_format=b_format, log_level=log_level, @@ -205,28 +200,7 @@ def reward_computing_helper( def action_space_size(self) -> int: return len(self._ACTION_SPACE) - def get_opponent(self) -> Union[Player, str, List[Player], List[str]]: - with self._opponent_lock: - if self._opponent is None: - raise RuntimeError( - "Unspecified opponent. " - "Specify it in the constructor or use set_opponent" - ) - return self._opponent - - def set_opponent(self, opponent: Union[Player, str]): - """ - Sets the next opponent to the specified opponent. - - :param opponent: The next opponent to challenge - :type opponent: Player or str - """ - with self._opponent_lock: - self._opponent = opponent - - def reset_env( - self, opponent: Optional[Union[Player, str]] = None, restart: bool = True - ): + def reset_env(self, restart: bool = True): """ Resets the environment to an inactive state: it will forfeit all unfinished battles, reset the internal battle tracker and optionally change the next @@ -242,13 +216,11 @@ def reset_env( """ self.close(purge=False) self.reset_battles() - if opponent: - self.set_opponent(opponent) if restart: self.start_challenging() -class Gen4EnvSinglePlayer(EnvPlayer[ObsType, ActType], ABC): +class Gen4EnvSinglePlayer(EnvPlayer): _ACTION_SPACE = list(range(4 + 6)) _DEFAULT_BATTLE_FORMAT = "gen4randombattle" @@ -280,18 +252,18 @@ def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder: and action < len(battle.available_moves) and not battle.force_switch ): - return self.agent.create_order(battle.available_moves[action]) + return Player.create_order(battle.available_moves[action]) elif 0 <= action - 4 < len(battle.available_switches): - return self.agent.create_order(battle.available_switches[action - 4]) + return Player.create_order(battle.available_switches[action - 4]) else: - return self.agent.choose_random_move(battle) + return Player.choose_random_move(battle) -class Gen5EnvSinglePlayer(Gen4EnvSinglePlayer[ObsType, ActType], ABC): +class Gen5EnvSinglePlayer(Gen4EnvSinglePlayer): _DEFAULT_BATTLE_FORMAT = "gen5randombattle" -class Gen6EnvSinglePlayer(EnvPlayer[ObsType, ActType], ABC): +class Gen6EnvSinglePlayer(EnvPlayer): _ACTION_SPACE = list(range(2 * 4 + 6)) _DEFAULT_BATTLE_FORMAT = "gen6randombattle" @@ -326,22 +298,20 @@ def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder: and action < len(battle.available_moves) and not battle.force_switch ): - return self.agent.create_order(battle.available_moves[action]) + return Player.create_order(battle.available_moves[action]) elif ( battle.can_mega_evolve and 0 <= action - 4 < len(battle.available_moves) and not battle.force_switch ): - return self.agent.create_order( - battle.available_moves[action - 4], mega=True - ) + return Player.create_order(battle.available_moves[action - 4], mega=True) elif 0 <= action - 8 < len(battle.available_switches): - return self.agent.create_order(battle.available_switches[action - 8]) + return Player.create_order(battle.available_switches[action - 8]) else: - return self.agent.choose_random_move(battle) + return Player.choose_random_move(battle) -class Gen7EnvSinglePlayer(EnvPlayer[ObsType, ActType], ABC): +class Gen7EnvSinglePlayer(EnvPlayer): _ACTION_SPACE = list(range(3 * 4 + 6)) _DEFAULT_BATTLE_FORMAT = "gen7randombattle" @@ -379,14 +349,14 @@ def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder: and action < len(battle.available_moves) and not battle.force_switch ): - return self.agent.create_order(battle.available_moves[action]) + return Player.create_order(battle.available_moves[action]) elif ( not battle.force_switch and battle.can_z_move and battle.active_pokemon and 0 <= action - 4 < len(battle.active_pokemon.available_z_moves) ): - return self.agent.create_order( + return Player.create_order( battle.active_pokemon.available_z_moves[action - 4], z_move=True ) elif ( @@ -394,16 +364,14 @@ def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder: and 0 <= action - 8 < len(battle.available_moves) and not battle.force_switch ): - return self.agent.create_order( - battle.available_moves[action - 8], mega=True - ) + return Player.create_order(battle.available_moves[action - 8], mega=True) elif 0 <= action - 12 < len(battle.available_switches): - return self.agent.create_order(battle.available_switches[action - 12]) + return Player.create_order(battle.available_switches[action - 12]) else: - return self.agent.choose_random_move(battle) + return Player.choose_random_move(battle) -class Gen8EnvSinglePlayer(EnvPlayer[ObsType, ActType], ABC): +class Gen8EnvSinglePlayer(EnvPlayer): _ACTION_SPACE = list(range(4 * 4 + 6)) _DEFAULT_BATTLE_FORMAT = "gen8randombattle" @@ -447,14 +415,14 @@ def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder: and action < len(battle.available_moves) and not battle.force_switch ): - return self.agent.create_order(battle.available_moves[action]) + return Player.create_order(battle.available_moves[action]) elif ( not battle.force_switch and battle.can_z_move and battle.active_pokemon and 0 <= action - 4 < len(battle.active_pokemon.available_z_moves) ): - return self.agent.create_order( + return Player.create_order( battle.active_pokemon.available_z_moves[action - 4], z_move=True ) elif ( @@ -462,24 +430,22 @@ def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder: and 0 <= action - 8 < len(battle.available_moves) and not battle.force_switch ): - return self.agent.create_order( - battle.available_moves[action - 8], mega=True - ) + return Player.create_order(battle.available_moves[action - 8], mega=True) elif ( battle.can_dynamax and 0 <= action - 12 < len(battle.available_moves) and not battle.force_switch ): - return self.agent.create_order( + return Player.create_order( battle.available_moves[action - 12], dynamax=True ) elif 0 <= action - 16 < len(battle.available_switches): - return self.agent.create_order(battle.available_switches[action - 16]) + return Player.create_order(battle.available_switches[action - 16]) else: - return self.agent.choose_random_move(battle) + return Player.choose_random_move(battle) -class Gen9EnvSinglePlayer(EnvPlayer[ObsType, ActType], ABC): +class Gen9EnvSinglePlayer(EnvPlayer): _ACTION_SPACE = list(range(5 * 4 + 6)) _DEFAULT_BATTLE_FORMAT = "gen9randombattle" @@ -526,14 +492,14 @@ def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder: and action < len(battle.available_moves) and not battle.force_switch ): - return self.agent.create_order(battle.available_moves[action]) + return Player.create_order(battle.available_moves[action]) elif ( not battle.force_switch and battle.can_z_move and battle.active_pokemon and 0 <= action - 4 < len(battle.active_pokemon.available_z_moves) ): - return self.agent.create_order( + return Player.create_order( battle.active_pokemon.available_z_moves[action - 4], z_move=True ) elif ( @@ -541,15 +507,13 @@ def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder: and 0 <= action - 8 < len(battle.available_moves) and not battle.force_switch ): - return self.agent.create_order( - battle.available_moves[action - 8], mega=True - ) + return Player.create_order(battle.available_moves[action - 8], mega=True) elif ( battle.can_dynamax and 0 <= action - 12 < len(battle.available_moves) and not battle.force_switch ): - return self.agent.create_order( + return Player.create_order( battle.available_moves[action - 12], dynamax=True ) elif ( @@ -557,10 +521,10 @@ def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder: and 0 <= action - 16 < len(battle.available_moves) and not battle.force_switch ): - return self.agent.create_order( + return Player.create_order( battle.available_moves[action - 16], terastallize=True ) elif 0 <= action - 20 < len(battle.available_switches): - return self.agent.create_order(battle.available_switches[action - 20]) + return Player.create_order(battle.available_switches[action - 20]) else: - return self.agent.choose_random_move(battle) + return Player.choose_random_move(battle) diff --git a/src/poke_env/player/gymnasium_api.py b/src/poke_env/player/gymnasium_api.py index 7be39f6b3..3592ffa09 100644 --- a/src/poke_env/player/gymnasium_api.py +++ b/src/poke_env/player/gymnasium_api.py @@ -6,14 +6,16 @@ import asyncio import copy -import random import time -from abc import ABC, abstractmethod -from logging import Logger -from typing import Any, Awaitable, Callable, Dict, Generic, List, Optional, Tuple, Union +from abc import abstractmethod +from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple, Union -from gymnasium.core import ActType, Env, ObsType from gymnasium.spaces import Discrete, Space +from pettingzoo.utils.env import ( # type: ignore[import-untyped] + ActionType, + ObsType, + ParallelEnv, +) from poke_env.concurrency import POKE_LOOP, create_in_poke_loop from poke_env.environment.abstract_battle import AbstractBattle @@ -34,9 +36,14 @@ def __init__(self, queue: asyncio.Queue[Any]): async def async_get(self): return await self.queue.get() - def get(self): - res = asyncio.run_coroutine_threadsafe(self.queue.get(), POKE_LOOP) - return res.result() + def get(self, timeout: Optional[float] = None, default: Any = None): + try: + res = asyncio.run_coroutine_threadsafe( + asyncio.wait_for(self.async_get(), timeout), POKE_LOOP + ) + return res.result() + except asyncio.TimeoutError: + return default async def async_put(self, item: Any): await self.queue.put(item) @@ -56,13 +63,13 @@ async def async_join(self): await self.queue.join() -class _AsyncPlayer(Generic[ObsType, ActType], Player): +class _AsyncPlayer(Player): actions: _AsyncQueue observations: _AsyncQueue def __init__( self, - user_funcs: GymnasiumEnv[ObsType, ActType], + user_funcs: GymnasiumEnv, username: str, **kwargs: Any, ): @@ -72,6 +79,7 @@ def __init__( self.observations = _AsyncQueue(create_in_poke_loop(asyncio.Queue, 1)) self.actions = _AsyncQueue(create_in_poke_loop(asyncio.Queue, 1)) self.current_battle: Optional[AbstractBattle] = None + self.waiting = False self._user_funcs = user_funcs def choose_move(self, battle: AbstractBattle) -> Awaitable[BattleOrder]: @@ -84,7 +92,9 @@ async def _env_move(self, battle: AbstractBattle) -> BattleOrder: raise RuntimeError("Using different battles for queues") battle_to_send = self._user_funcs.embed_battle(battle) await self.observations.async_put(battle_to_send) + self.waiting = True action = await self.actions.async_get() + self.waiting = False if action == -1: return ForfeitBattleOrder() return self._user_funcs.action_to_move(action, battle) @@ -94,10 +104,7 @@ def _battle_finished_callback(self, battle: AbstractBattle): asyncio.run_coroutine_threadsafe(self.observations.async_put(to_put), POKE_LOOP) -class GymnasiumEnv( - Env[ObsType, ActType], - ABC, -): +class GymnasiumEnv(ParallelEnv[str, ObsType, ActionType]): """ Base class implementing the Gymnasium API on the main thread. """ @@ -109,7 +116,8 @@ class GymnasiumEnv( def __init__( self, - account_configuration: Optional[AccountConfiguration] = None, + account_configuration1: Optional[AccountConfiguration] = None, + account_configuration2: Optional[AccountConfiguration] = None, *, avatar: Optional[int] = None, battle_format: str = "gen8randombattle", @@ -176,10 +184,10 @@ def __init__( leave it inactive. :type start_challenging: bool """ - self.agent = _AsyncPlayer( + self.agent1 = _AsyncPlayer( self, username=self.__class__.__name__, # type: ignore - account_configuration=account_configuration, + account_configuration=account_configuration1, avatar=avatar, battle_format=battle_format, log_level=log_level, @@ -194,12 +202,39 @@ def __init__( ping_timeout=ping_timeout, team=team, ) - self._actions = self.agent.actions - self._observations = self.agent.observations - self.action_space = Discrete(self.action_space_size()) # type: ignore - self.observation_space = self.describe_embedding() - self.current_battle: Optional[AbstractBattle] = None - self.last_battle: Optional[AbstractBattle] = None + self.agent2 = _AsyncPlayer( + self, + username=self.__class__.__name__, # type: ignore + account_configuration=account_configuration2, + avatar=avatar, + battle_format=battle_format, + log_level=log_level, + max_concurrent_battles=1, + save_replays=save_replays, + server_configuration=server_configuration, + accept_open_team_sheet=accept_open_team_sheet, + start_timer_on_battle_start=start_timer_on_battle_start, + start_listening=start_listening, + ping_interval=ping_interval, + ping_timeout=ping_timeout, + team=team, + ) + self.agents: List[str] = [] + self.possible_agents = [self.agent1.username, self.agent2.username] + self.observation_spaces = { + name: self.describe_embedding() for name in self.possible_agents + } + self.action_spaces = { + name: Discrete(self.action_space_size()) for name in self.possible_agents + } + self._actions1 = self.agent1.actions + self._observations1 = self.agent1.observations + self._actions2 = self.agent2.actions + self._observations2 = self.agent2.observations + self.current_battle1: Optional[AbstractBattle] = None + self.current_battle2: Optional[AbstractBattle] = None + self.last_battle1: Optional[AbstractBattle] = None + self.last_battle2: Optional[AbstractBattle] = None self._keep_challenging: bool = False self._challenge_task = None self._seed_initialized: bool = False @@ -277,62 +312,45 @@ def action_space_size(self) -> int: """ pass - @abstractmethod - def get_opponent( - self, - ) -> Union[Player, str, List[Player], List[str]]: - """ - Returns the opponent (or list of opponents) that will be challenged - on the next iteration of the challenge loop. If a list is returned, - a random element will be chosen at random during the challenge loop. - - :return: The opponent (or list of opponents). - :rtype: Player or str or list(Player) or list(str) - """ - pass - - def _get_opponent(self) -> Union[Player, str]: - opponent = self.get_opponent() - random_opponent = ( - random.choice(opponent) if isinstance(opponent, list) else opponent - ) - return random_opponent # type: ignore - def reset( self, - *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None, - ) -> Tuple[ObsType, Dict[str, Any]]: - if seed is not None: - super().reset(seed=seed) # type: ignore - self._seed_initialized = True - elif not self._seed_initialized: - super().reset(seed=int(time.time())) # type: ignore - self._seed_initialized = True - if not self.agent.current_battle: + ) -> Tuple[Dict[str, ObsType], Dict[str, Dict[str, Any]]]: + self.agents = [self.agent1.username, self.agent2.username] + # TODO: use the seed + if not self.agent1.current_battle or not self.agent2.current_battle: count = self._INIT_RETRIES - while not self.agent.current_battle: + while not self.agent1.current_battle or not self.agent2.current_battle: if count == 0: raise RuntimeError("Agent is not challenging") count -= 1 time.sleep(self._TIME_BETWEEN_RETRIES) - if self.current_battle and not self.current_battle.finished: - if self.current_battle == self.agent.current_battle: - self._actions.put(-1) - self._observations.get() + if self.current_battle1 and not self.current_battle1.finished: + if self.current_battle1 == self.agent1.current_battle: + self._actions1.put(-1) + self._actions2.put(0) + self._observations1.get() + self._observations2.get() else: raise RuntimeError( "Environment and agent aren't synchronized. Try to restart" ) - while self.current_battle == self.agent.current_battle: + while self.current_battle1 == self.agent1.current_battle: time.sleep(0.01) - self.current_battle = self.agent.current_battle - self.current_battle.logger = None - self.last_battle = self.current_battle - return self._observations.get(), self.get_additional_info() - - def get_additional_info(self) -> Dict[str, Any]: + observations = { + self.agents[0]: self._observations1.get(), + self.agents[1]: self._observations2.get(), + } + self.current_battle1 = self.agent1.current_battle + self.current_battle1.logger = None + self.current_battle2 = self.agent2.current_battle + self.current_battle2.logger = None + self.last_battle1 = self.current_battle1 + self.last_battle2 = self.current_battle2 + return observations, self.get_additional_info() + + def get_additional_info(self) -> Dict[str, Dict[str, Any]]: """ Returns additional info for the reset method. Override only if you really need it. @@ -340,86 +358,115 @@ def get_additional_info(self) -> Dict[str, Any]: :return: Additional information as a Dict :rtype: Dict """ - return {} - - def step( - self, action: ActType - ) -> Tuple[ObsType, float, bool, bool, Dict[str, Any]]: - """ - Execute the specified action in the environment. - - :param ActType action: The action to be executed. - :return: A tuple containing the new observation, reward, termination flag, truncation flag, and info dictionary. - :rtype: Tuple[ObsType, float, bool, bool, Dict[str, Any]] - """ - if not self.current_battle: - obs, info = self.reset() - return obs, 0.0, False, False, info - if self.current_battle.finished: + return {self.possible_agents[0]: {}, self.possible_agents[1]: {}} + + def step(self, actions: Dict[str, ActionType]) -> Tuple[ + Dict[str, ObsType], + Dict[str, float], + Dict[str, bool], + Dict[str, bool], + Dict[str, Dict[str, Any]], + ]: + assert self.current_battle1 is not None + assert self.current_battle2 is not None + if self.current_battle1.finished: raise RuntimeError("Battle is already finished, call reset") - battle = copy.copy(self.current_battle) - battle.logger = None - self.last_battle = battle - self._actions.put(action) - observation = self._observations.get() - reward = self.calc_reward(self.last_battle, self.current_battle) + battle1 = copy.copy(self.current_battle1) + battle1.logger = None + battle2 = copy.copy(self.current_battle2) + battle2.logger = None + self.last_battle1 = battle1 + self.last_battle2 = battle2 + if self.agent1.waiting: + self._actions1.put(actions[self.agents[0]]) + if self.agent2.waiting: + self._actions2.put(actions[self.agents[1]]) + observations = { + self.agents[0]: self._observations1.get( + timeout=0.1, default=self.embed_battle(self.last_battle1) + ), + self.agents[1]: self._observations2.get( + timeout=0.1, default=self.embed_battle(self.last_battle2) + ), + } + assert self.current_battle1 == self.agent1.current_battle + reward = { + self.agents[0]: self.calc_reward(self.last_battle1, self.current_battle1), + self.agents[1]: self.calc_reward(self.last_battle2, self.current_battle2), + } + term1, trunc1 = self.calc_term_trunc(self.current_battle1) + term2, trunc2 = self.calc_term_trunc(self.current_battle2) + terminated = {self.agents[0]: term1, self.agents[1]: term2} + truncated = {self.agents[0]: trunc1, self.agents[1]: trunc2} + if self.current_battle1.finished: + self.agents = [] + return observations, reward, terminated, truncated, self.get_additional_info() + + @staticmethod + def calc_term_trunc(battle: AbstractBattle): terminated = False truncated = False - if self.current_battle.finished: - size = self.current_battle.team_size + if battle.finished: + size = battle.team_size remaining_mons = size - len( - [mon for mon in self.current_battle.team.values() if mon.fainted] + [mon for mon in battle.team.values() if mon.fainted] ) remaining_opponent_mons = size - len( - [ - mon - for mon in self.current_battle.opponent_team.values() - if mon.fainted - ] + [mon for mon in battle.opponent_team.values() if mon.fainted] ) if (remaining_mons == 0) != (remaining_opponent_mons == 0): terminated = True else: truncated = True - return observation, reward, terminated, truncated, self.get_additional_info() + return terminated, truncated def render(self, mode: str = "human"): - if self.current_battle is not None: + if self.current_battle1 is not None: print( " Turn %4d. | [%s][%3d/%3dhp] %10.10s - %10.10s [%3d%%hp][%s]" % ( - self.current_battle.turn, + self.current_battle1.turn, "".join( [ "⦻" if mon.fainted else "●" - for mon in self.current_battle.team.values() + for mon in self.current_battle1.team.values() ] ), - self.current_battle.active_pokemon.current_hp or 0, - self.current_battle.active_pokemon.max_hp or 0, - self.current_battle.active_pokemon.species, - self.current_battle.opponent_active_pokemon.species, - self.current_battle.opponent_active_pokemon.current_hp or 0, + self.current_battle1.active_pokemon.current_hp or 0, + self.current_battle1.active_pokemon.max_hp or 0, + self.current_battle1.active_pokemon.species, + self.current_battle1.opponent_active_pokemon.species, + self.current_battle1.opponent_active_pokemon.current_hp or 0, "".join( [ "⦻" if mon.fainted else "●" - for mon in self.current_battle.opponent_team.values() + for mon in self.current_battle1.opponent_team.values() ] ), ), - end="\n" if self.current_battle.finished else "\r", + end="\n" if self.current_battle1.finished else "\r", ) def close(self, purge: bool = True): - if self.current_battle is None or self.current_battle.finished: + if self.current_battle1 is None or self.current_battle1.finished: + time.sleep(1) + if self.current_battle1 != self.agent1.current_battle: + self.current_battle1 = self.agent1.current_battle + if self.current_battle2 is None or self.current_battle2.finished: time.sleep(1) - if self.current_battle != self.agent.current_battle: - self.current_battle = self.agent.current_battle + if self.current_battle2 != self.agent2.current_battle: + self.current_battle2 = self.agent2.current_battle closing_task = asyncio.run_coroutine_threadsafe( self._stop_challenge_loop(purge=purge), POKE_LOOP ) closing_task.result() + def observation_space(self, agent: str) -> Space: + return self.observation_spaces[agent] + + def action_space(self, agent: str): + return self.action_spaces[agent] + def background_send_challenge(self, username: str): """ Sends a single challenge specified player. The function immediately returns @@ -435,7 +482,7 @@ def background_send_challenge(self, username: str): "'await agent.stop_challenge_loop()' to clear the task." ) self._challenge_task = asyncio.run_coroutine_threadsafe( - self.agent.send_challenges(username, 1), POKE_LOOP + self.agent1.send_challenges(username, 1), POKE_LOOP ) def background_accept_challenge(self, username: str): @@ -453,7 +500,7 @@ def background_accept_challenge(self, username: str): "'await agent.stop_challenge_loop()' to clear the task." ) self._challenge_task = asyncio.run_coroutine_threadsafe( - self.agent.accept_challenges(username, 1, self.agent.next_team), POKE_LOOP + self.agent1.accept_challenges(username, 1, self.agent1.next_team), POKE_LOOP ) async def _challenge_loop( @@ -463,22 +510,14 @@ async def _challenge_loop( ): if not n_challenges: while self._keep_challenging: - opponent = self._get_opponent() - if isinstance(opponent, Player): - await self.agent.battle_against(opponent, n_battles=1) - else: - await self.agent.send_challenges(opponent, 1) - if callback and self.current_battle is not None: - callback(copy.deepcopy(self.current_battle)) + await self.agent1.battle_against(self.agent2, n_battles=1) + if callback and self.current_battle1 is not None: + callback(copy.deepcopy(self.current_battle1)) elif n_challenges > 0: for _ in range(n_challenges): - opponent = self._get_opponent() - if isinstance(opponent, Player): - await self.agent.battle_against(opponent, n_battles=1) - else: - await self.agent.send_challenges(opponent, 1) - if callback and self.current_battle is not None: - callback(copy.deepcopy(self.current_battle)) + await self.agent1.battle_against(self.agent2, n_battles=1) + if callback and self.current_battle1 is not None: + callback(copy.deepcopy(self.current_battle1)) else: raise ValueError(f"Number of challenges must be > 0. Got {n_challenges}") @@ -521,14 +560,14 @@ async def _ladder_loop( f"Number of challenges must be > 0. Got {n_challenges}" ) for _ in range(n_challenges): - await self.agent.ladder(1) - if callback and self.current_battle is not None: - callback(self.current_battle) + await self.agent1.ladder(1) + if callback and self.current_battle1 is not None: + callback(self.current_battle1) else: while self._keep_challenging: - await self.agent.ladder(1) - if callback and self.current_battle is not None: - callback(self.current_battle) + await self.agent1.ladder(1) + if callback and self.current_battle1 is not None: + callback(self.current_battle1) def start_laddering( self, @@ -564,18 +603,21 @@ async def _stop_challenge_loop( self._keep_challenging = False if force: - if self.current_battle and not self.current_battle.finished: - if not self._actions.empty(): + if self.current_battle1 and not self.current_battle1.finished: + if not (self._actions1.empty() and self._actions2.empty()): await asyncio.sleep(2) - if not self._actions.empty(): + if not (self._actions1.empty() and self._actions2.empty()): raise RuntimeError( "The agent is still sending actions. " "Use this method only when training or " "evaluation are over." ) - if not self._observations.empty(): - await self._observations.async_get() - await self._actions.async_put(-1) + if not self._observations1.empty(): + await self._observations1.async_get() + if not self._observations2.empty(): + await self._observations2.async_get() + await self._actions1.async_put(-1) + await self._actions2.async_put(0) if wait and self._challenge_task: while not self._challenge_task.done(): @@ -583,19 +625,26 @@ async def _stop_challenge_loop( self._challenge_task.result() self._challenge_task = None - self.current_battle = None - self.agent.current_battle = None - while not self._actions.empty(): - await self._actions.async_get() - while not self._observations.empty(): - await self._observations.async_get() + self.current_battle1 = None + self.current_battle2 = None + self.agent1.current_battle = None + self.agent2.current_battle = None + while not self._actions1.empty(): + await self._actions1.async_get() + while not self._actions2.empty(): + await self._actions2.async_get() + while not self._observations1.empty(): + await self._observations1.async_get() + while not self._observations2.empty(): + await self._observations2.async_get() if purge: - self.agent.reset_battles() + self.reset_battles() def reset_battles(self): """Resets the player's inner battle tracker.""" - self.agent.reset_battles() + self.agent1.reset_battles() + self.agent2.reset_battles() def done(self, timeout: Optional[int] = None) -> bool: """ @@ -618,80 +667,3 @@ def done(self, timeout: Optional[int] = None) -> bool: return True time.sleep(timeout) return self._challenge_task.done() - - # Expose properties of Player class - - @property - def battles(self) -> Dict[str, AbstractBattle]: - return self.agent.battles - - @property - def format(self) -> str: - return self.agent.format - - @property - def format_is_doubles(self) -> bool: - return self.agent.format_is_doubles - - @property - def n_finished_battles(self) -> int: - return self.agent.n_finished_battles - - @property - def n_lost_battles(self) -> int: - return self.agent.n_lost_battles - - @property - def n_tied_battles(self) -> int: - return self.agent.n_tied_battles - - @property - def n_won_battles(self) -> int: - return self.agent.n_won_battles - - @property - def win_rate(self) -> float: - return self.agent.win_rate - - # Expose properties of Player Network Interface Class - - @property - def logged_in(self) -> asyncio.Event: - """Event object associated with user login. - - :return: The logged-in event - :rtype: Event - """ - return self.agent.ps_client.logged_in - - @property - def logger(self) -> Logger: - """Logger associated with the player. - - :return: The logger. - :rtype: Logger - """ - return self.agent.logger - - @property - def username(self) -> str: - """The player's username. - - :return: The player's username. - :rtype: str - """ - return self.agent.username - - @property - def websocket_url(self) -> str: - """The websocket url. - - It is derived from the server url. - - :return: The websocket url. - :rtype: str - """ - return self.agent.ps_client.websocket_url - - def __getattr__(self, item: str): - return getattr(self.agent, item) diff --git a/unit_tests/player/test_env_player.py b/unit_tests/player/test_env_player.py index 56b77b1f4..d92c547df 100644 --- a/unit_tests/player/test_env_player.py +++ b/unit_tests/player/test_env_player.py @@ -3,7 +3,6 @@ from inspect import isawaitable from unittest.mock import patch -import pytest from gymnasium.spaces import Discrete, Space from poke_env import AccountConfiguration, ServerConfiguration @@ -17,11 +16,11 @@ Gen7EnvSinglePlayer, Gen8EnvSinglePlayer, Gen9EnvSinglePlayer, - RandomPlayer, ) from poke_env.player.gymnasium_api import _AsyncPlayer -account_configuration = AccountConfiguration("username", "password") +account_configuration1 = AccountConfiguration("username1", "password1") +account_configuration2 = AccountConfiguration("username2", "password2") server_configuration = ServerConfiguration("server.url", "auth.url") @@ -43,13 +42,13 @@ def embed_battle(self, battle): def test_init(): gymnasium_env = CustomEnvPlayer( - None, - account_configuration=account_configuration, + account_configuration1=account_configuration1, + account_configuration2=account_configuration2, server_configuration=server_configuration, start_listening=False, battle_format="gen7randombattles", ) - player = gymnasium_env.agent + player = gymnasium_env.agent1 assert isinstance(gymnasium_env, CustomEnvPlayer) assert isinstance(player, _AsyncPlayer) @@ -67,16 +66,17 @@ async def __call__(self, *args, **kwargs): @patch("poke_env.player.gymnasium_api._AsyncQueue.async_put", new_callable=AsyncMock) def test_choose_move(queue_put_mock, queue_get_mock): player = CustomEnvPlayer( - None, - account_configuration=account_configuration, + account_configuration1=account_configuration1, + account_configuration2=account_configuration2, server_configuration=server_configuration, start_listening=False, battle_format="gen7randombattles", start_challenging=False, ) - battle = Battle("bat1", player.username, player.logger, gen=8) - battle._available_moves = {Move("flamethrower", gen=8)} - message = player.agent.choose_move(battle) + battle = Battle("bat1", player.agent1.username, player.agent1.logger, gen=8) + battle._available_moves = [Move("flamethrower", gen=8)] + message = player.agent1.choose_move(battle) + player.agent2.choose_move(battle) assert isawaitable(message) @@ -84,9 +84,11 @@ def test_choose_move(queue_put_mock, queue_get_mock): assert message.message == "/choose move flamethrower" - battle._available_moves = {Pokemon(species="charizard", gen=8)} + battle._available_moves = [] + battle._available_switches = [Pokemon(species="charizard", gen=8)] - message = player.agent.choose_move(battle) + message = player.agent1.choose_move(battle) + player.agent2.choose_move(battle) assert isawaitable(message) @@ -97,16 +99,16 @@ def test_choose_move(queue_put_mock, queue_get_mock): def test_reward_computing_helper(): player = CustomEnvPlayer( - None, - account_configuration=account_configuration, + account_configuration1=account_configuration1, + account_configuration2=account_configuration2, server_configuration=server_configuration, start_listening=False, battle_format="gen7randombattles", ) - battle_1 = Battle("bat1", player.username, player.logger, gen=8) - battle_2 = Battle("bat2", player.username, player.logger, gen=8) - battle_3 = Battle("bat3", player.username, player.logger, gen=8) - battle_4 = Battle("bat4", player.username, player.logger, gen=8) + battle_1 = Battle("bat1", player.agent1.username, player.agent1.logger, gen=8) + battle_2 = Battle("bat2", player.agent1.username, player.agent1.logger, gen=8) + battle_3 = Battle("bat3", player.agent1.username, player.agent1.logger, gen=8) + battle_4 = Battle("bat4", player.agent1.username, player.agent1.logger, gen=8) assert ( player.reward_computing_helper( @@ -219,8 +221,10 @@ def test_reward_computing_helper(): def test_action_space(): - player = CustomEnvPlayer(None, start_listening=False) - assert player.action_space == Discrete(len(Gen7EnvSinglePlayer._ACTION_SPACE)) + player = CustomEnvPlayer(start_listening=False) + assert player.action_space(player.possible_agents[0]) == Discrete( + len(Gen7EnvSinglePlayer._ACTION_SPACE) + ) for PlayerClass, (has_megas, has_z_moves, has_dynamax) in zip( [ @@ -249,32 +253,13 @@ def calc_reward(self, last_battle, current_battle): def describe_embedding(self): return None - def get_opponent(self): - return None - - p = CustomEnvClass(None, start_listening=False, start_challenging=False) + p = CustomEnvClass(start_listening=False, start_challenging=False) - assert p.action_space == Discrete( + assert p.action_space(p.possible_agents[0]) == Discrete( 4 * sum([1, has_megas, has_z_moves, has_dynamax]) + 6 ) -def test_get_opponent(): - player = CustomEnvPlayer(start_listening=False, opponent="test") - assert player.get_opponent() == "test" - player._opponent = None - with pytest.raises(RuntimeError): - player.get_opponent() - - -def test_set_opponent(): - player = CustomEnvPlayer(None, start_listening=False) - assert player._opponent is None - dummy_player = RandomPlayer() - player.set_opponent(dummy_player) - assert player._opponent == dummy_player - - @patch( "poke_env.environment.Pokemon.available_z_moves", new_callable=unittest.mock.PropertyMock, @@ -312,8 +297,8 @@ def describe_embedding(self): def get_opponent(self): return None - p = CustomEnvClass(None, start_listening=False, start_challenging=False) - battle = Battle("bat1", p.username, p.logger, gen=8) + p = CustomEnvClass(start_listening=False, start_challenging=False) + battle = Battle("bat1", p.agent1.username, p.agent1.logger, gen=8) assert p.action_to_move(-1, battle).message == "/forfeit" battle._available_moves = [Move("flamethrower", gen=8)] assert p.action_to_move(0, battle).message == "/choose move flamethrower" diff --git a/unit_tests/player/test_gymnasium.py b/unit_tests/player/test_gymnasium.py index e40c0fd7d..8b0895f0b 100644 --- a/unit_tests/player/test_gymnasium.py +++ b/unit_tests/player/test_gymnasium.py @@ -1,23 +1,16 @@ import asyncio import sys from io import StringIO -from typing import Union from gymnasium import Space +from pettingzoo.utils.env import ActionType, ObsType from poke_env.environment import AbstractBattle, Battle, Pokemon -from poke_env.player import ( - ActType, - BattleOrder, - ForfeitBattleOrder, - GymnasiumEnv, - ObsType, - Player, -) +from poke_env.player import BattleOrder, ForfeitBattleOrder, GymnasiumEnv from poke_env.player.gymnasium_api import _AsyncPlayer, _AsyncQueue -class DummyEnv(GymnasiumEnv[ObsType, ActType]): +class DummyEnv(GymnasiumEnv[ObsType, ActionType]): def __init__(self, *args, **kwargs): self.opponent = None super().__init__(*args, **kwargs) @@ -39,9 +32,6 @@ def describe_embedding(self) -> Space: def action_space_size(self) -> int: return 1 - def get_opponent(self) -> Union[Player, str]: - return self.opponent - class UserFuncs: def embed_battle(self, battle): @@ -85,7 +75,7 @@ def render(battle): player = DummyEnv(start_listening=False) captured_output = StringIO() sys.stdout = captured_output - player.current_battle = battle + player.current_battle1 = battle player.render() sys.stdout = sys.__stdout__ return captured_output.getvalue() @@ -113,17 +103,3 @@ def test_render(): battle._team["2"] = other_mon expected = " Turn 3. | [●●][ 60/120hp] charizard - pikachu [ 20%hp][●]\r" assert render(battle) == expected - - -def test_get_opponent(): - player = DummyEnv(start_listening=False) - assert player._get_opponent() is None - player.opponent = "test" - assert player._get_opponent() == "test" - player.opponent = ["test"] - assert player._get_opponent() == "test" - opponents = ["test1", "test2", "test3", "test4", "test5"] - player.opponent = opponents - for _ in range(100): - assert player._get_opponent() in opponents - player.opponent = [0] From 8b2dc9a1b2ec96bc1f64b4e201a636eacc5cf083 Mon Sep 17 00:00:00 2001 From: Cameron Angliss <50247523+cameronangliss@users.noreply.github.com> Date: Fri, 3 Jan 2025 18:55:43 -0500 Subject: [PATCH 8/9] Add focus punch to _FROM_DATA (#670) --- src/poke_env/environment/effect.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/poke_env/environment/effect.py b/src/poke_env/environment/effect.py index 711b3327b..8d0a2c5b2 100644 --- a/src/poke_env/environment/effect.py +++ b/src/poke_env/environment/effect.py @@ -828,6 +828,7 @@ def is_from_move(self) -> bool: "FLOWERVEIL": Effect.FLOWER_VEIL, "FOCUSBAND": Effect.FOCUS_BAND, "FOCUSENERGY": Effect.FOCUS_ENERGY, + "FOCUSPUNCH": Effect.FOCUS_PUNCH, "FOLLOWME": Effect.FOLLOW_ME, "FORESIGHT": Effect.FORESIGHT, "FOREWARN": Effect.FOREWARN, From ea4b5f33a9f64852eaa135ce6e93a8633e17b6a8 Mon Sep 17 00:00:00 2001 From: Cameron Angliss <50247523+cameronangliss@users.noreply.github.com> Date: Fri, 3 Jan 2025 19:34:00 -0500 Subject: [PATCH 9/9] organize (#669) --- src/poke_env/player/gymnasium_api.py | 262 ++++++++++++++------------- 1 file changed, 136 insertions(+), 126 deletions(-) diff --git a/src/poke_env/player/gymnasium_api.py b/src/poke_env/player/gymnasium_api.py index 3592ffa09..555de609a 100644 --- a/src/poke_env/player/gymnasium_api.py +++ b/src/poke_env/player/gymnasium_api.py @@ -244,6 +244,140 @@ def __init__( self._challenge_loop(), POKE_LOOP ) + ################################################################################### + # PettingZoo API + # https://pettingzoo.farama.org/api/parallel/#parallelenv + + def step(self, actions: Dict[str, ActionType]) -> Tuple[ + Dict[str, ObsType], + Dict[str, float], + Dict[str, bool], + Dict[str, bool], + Dict[str, Dict[str, Any]], + ]: + assert self.current_battle1 is not None + assert self.current_battle2 is not None + if self.current_battle1.finished: + raise RuntimeError("Battle is already finished, call reset") + battle1 = copy.copy(self.current_battle1) + battle1.logger = None + battle2 = copy.copy(self.current_battle2) + battle2.logger = None + self.last_battle1 = battle1 + self.last_battle2 = battle2 + if self.agent1.waiting: + self._actions1.put(actions[self.agents[0]]) + if self.agent2.waiting: + self._actions2.put(actions[self.agents[1]]) + observations = { + self.agents[0]: self._observations1.get( + timeout=0.1, default=self.embed_battle(self.last_battle1) + ), + self.agents[1]: self._observations2.get( + timeout=0.1, default=self.embed_battle(self.last_battle2) + ), + } + assert self.current_battle1 == self.agent1.current_battle + reward = { + self.agents[0]: self.calc_reward(self.last_battle1, self.current_battle1), + self.agents[1]: self.calc_reward(self.last_battle2, self.current_battle2), + } + term1, trunc1 = self.calc_term_trunc(self.current_battle1) + term2, trunc2 = self.calc_term_trunc(self.current_battle2) + terminated = {self.agents[0]: term1, self.agents[1]: term2} + truncated = {self.agents[0]: trunc1, self.agents[1]: trunc2} + if self.current_battle1.finished: + self.agents = [] + return observations, reward, terminated, truncated, self.get_additional_info() + + def reset( + self, + seed: Optional[int] = None, + options: Optional[Dict[str, Any]] = None, + ) -> Tuple[Dict[str, ObsType], Dict[str, Dict[str, Any]]]: + self.agents = [self.agent1.username, self.agent2.username] + # TODO: use the seed + if not self.agent1.current_battle or not self.agent2.current_battle: + count = self._INIT_RETRIES + while not self.agent1.current_battle or not self.agent2.current_battle: + if count == 0: + raise RuntimeError("Agent is not challenging") + count -= 1 + time.sleep(self._TIME_BETWEEN_RETRIES) + if self.current_battle1 and not self.current_battle1.finished: + if self.current_battle1 == self.agent1.current_battle: + self._actions1.put(-1) + self._actions2.put(0) + self._observations1.get() + self._observations2.get() + else: + raise RuntimeError( + "Environment and agent aren't synchronized. Try to restart" + ) + while self.current_battle1 == self.agent1.current_battle: + time.sleep(0.01) + observations = { + self.agents[0]: self._observations1.get(), + self.agents[1]: self._observations2.get(), + } + self.current_battle1 = self.agent1.current_battle + self.current_battle1.logger = None + self.current_battle2 = self.agent2.current_battle + self.current_battle2.logger = None + self.last_battle1 = self.current_battle1 + self.last_battle2 = self.current_battle2 + return observations, self.get_additional_info() + + def render(self, mode: str = "human"): + if self.current_battle1 is not None: + print( + " Turn %4d. | [%s][%3d/%3dhp] %10.10s - %10.10s [%3d%%hp][%s]" + % ( + self.current_battle1.turn, + "".join( + [ + "⦻" if mon.fainted else "●" + for mon in self.current_battle1.team.values() + ] + ), + self.current_battle1.active_pokemon.current_hp or 0, + self.current_battle1.active_pokemon.max_hp or 0, + self.current_battle1.active_pokemon.species, + self.current_battle1.opponent_active_pokemon.species, + self.current_battle1.opponent_active_pokemon.current_hp or 0, + "".join( + [ + "⦻" if mon.fainted else "●" + for mon in self.current_battle1.opponent_team.values() + ] + ), + ), + end="\n" if self.current_battle1.finished else "\r", + ) + + def close(self, purge: bool = True): + if self.current_battle1 is None or self.current_battle1.finished: + time.sleep(1) + if self.current_battle1 != self.agent1.current_battle: + self.current_battle1 = self.agent1.current_battle + if self.current_battle2 is None or self.current_battle2.finished: + time.sleep(1) + if self.current_battle2 != self.agent2.current_battle: + self.current_battle2 = self.agent2.current_battle + closing_task = asyncio.run_coroutine_threadsafe( + self._stop_challenge_loop(purge=purge), POKE_LOOP + ) + closing_task.result() + + def observation_space(self, agent: str) -> Space: + return self.observation_spaces[agent] + + def action_space(self, agent: str): + return self.action_spaces[agent] + + ################################################################################### + # Abstract methods + @abstractmethod def calc_reward( self, last_battle: AbstractBattle, current_battle: AbstractBattle @@ -312,43 +446,8 @@ def action_space_size(self) -> int: """ pass - def reset( - self, - seed: Optional[int] = None, - options: Optional[Dict[str, Any]] = None, - ) -> Tuple[Dict[str, ObsType], Dict[str, Dict[str, Any]]]: - self.agents = [self.agent1.username, self.agent2.username] - # TODO: use the seed - if not self.agent1.current_battle or not self.agent2.current_battle: - count = self._INIT_RETRIES - while not self.agent1.current_battle or not self.agent2.current_battle: - if count == 0: - raise RuntimeError("Agent is not challenging") - count -= 1 - time.sleep(self._TIME_BETWEEN_RETRIES) - if self.current_battle1 and not self.current_battle1.finished: - if self.current_battle1 == self.agent1.current_battle: - self._actions1.put(-1) - self._actions2.put(0) - self._observations1.get() - self._observations2.get() - else: - raise RuntimeError( - "Environment and agent aren't synchronized. Try to restart" - ) - while self.current_battle1 == self.agent1.current_battle: - time.sleep(0.01) - observations = { - self.agents[0]: self._observations1.get(), - self.agents[1]: self._observations2.get(), - } - self.current_battle1 = self.agent1.current_battle - self.current_battle1.logger = None - self.current_battle2 = self.agent2.current_battle - self.current_battle2.logger = None - self.last_battle1 = self.current_battle1 - self.last_battle2 = self.current_battle2 - return observations, self.get_additional_info() + ################################################################################### + # Helper methods def get_additional_info(self) -> Dict[str, Dict[str, Any]]: """ @@ -360,48 +459,6 @@ def get_additional_info(self) -> Dict[str, Dict[str, Any]]: """ return {self.possible_agents[0]: {}, self.possible_agents[1]: {}} - def step(self, actions: Dict[str, ActionType]) -> Tuple[ - Dict[str, ObsType], - Dict[str, float], - Dict[str, bool], - Dict[str, bool], - Dict[str, Dict[str, Any]], - ]: - assert self.current_battle1 is not None - assert self.current_battle2 is not None - if self.current_battle1.finished: - raise RuntimeError("Battle is already finished, call reset") - battle1 = copy.copy(self.current_battle1) - battle1.logger = None - battle2 = copy.copy(self.current_battle2) - battle2.logger = None - self.last_battle1 = battle1 - self.last_battle2 = battle2 - if self.agent1.waiting: - self._actions1.put(actions[self.agents[0]]) - if self.agent2.waiting: - self._actions2.put(actions[self.agents[1]]) - observations = { - self.agents[0]: self._observations1.get( - timeout=0.1, default=self.embed_battle(self.last_battle1) - ), - self.agents[1]: self._observations2.get( - timeout=0.1, default=self.embed_battle(self.last_battle2) - ), - } - assert self.current_battle1 == self.agent1.current_battle - reward = { - self.agents[0]: self.calc_reward(self.last_battle1, self.current_battle1), - self.agents[1]: self.calc_reward(self.last_battle2, self.current_battle2), - } - term1, trunc1 = self.calc_term_trunc(self.current_battle1) - term2, trunc2 = self.calc_term_trunc(self.current_battle2) - terminated = {self.agents[0]: term1, self.agents[1]: term2} - truncated = {self.agents[0]: trunc1, self.agents[1]: trunc2} - if self.current_battle1.finished: - self.agents = [] - return observations, reward, terminated, truncated, self.get_additional_info() - @staticmethod def calc_term_trunc(battle: AbstractBattle): terminated = False @@ -420,53 +477,6 @@ def calc_term_trunc(battle: AbstractBattle): truncated = True return terminated, truncated - def render(self, mode: str = "human"): - if self.current_battle1 is not None: - print( - " Turn %4d. | [%s][%3d/%3dhp] %10.10s - %10.10s [%3d%%hp][%s]" - % ( - self.current_battle1.turn, - "".join( - [ - "⦻" if mon.fainted else "●" - for mon in self.current_battle1.team.values() - ] - ), - self.current_battle1.active_pokemon.current_hp or 0, - self.current_battle1.active_pokemon.max_hp or 0, - self.current_battle1.active_pokemon.species, - self.current_battle1.opponent_active_pokemon.species, - self.current_battle1.opponent_active_pokemon.current_hp or 0, - "".join( - [ - "⦻" if mon.fainted else "●" - for mon in self.current_battle1.opponent_team.values() - ] - ), - ), - end="\n" if self.current_battle1.finished else "\r", - ) - - def close(self, purge: bool = True): - if self.current_battle1 is None or self.current_battle1.finished: - time.sleep(1) - if self.current_battle1 != self.agent1.current_battle: - self.current_battle1 = self.agent1.current_battle - if self.current_battle2 is None or self.current_battle2.finished: - time.sleep(1) - if self.current_battle2 != self.agent2.current_battle: - self.current_battle2 = self.agent2.current_battle - closing_task = asyncio.run_coroutine_threadsafe( - self._stop_challenge_loop(purge=purge), POKE_LOOP - ) - closing_task.result() - - def observation_space(self, agent: str) -> Space: - return self.observation_spaces[agent] - - def action_space(self, agent: str): - return self.action_spaces[agent] - def background_send_challenge(self, username: str): """ Sends a single challenge specified player. The function immediately returns