Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert #1154 (Fix NumPy DocTests) #1180

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gymnasium/spaces/dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class Dict(Space[typing.Dict[str, Any]], typing.Mapping[str, Space[Any]]):
>>> from gymnasium.spaces import Dict, Box, Discrete
>>> observation_space = Dict({"position": Box(-1, 1, shape=(2,)), "color": Discrete(3)}, seed=42)
>>> observation_space.sample()
{'color': np.int64(0), 'position': array([-0.3991573 , 0.21649833], dtype=float32)}
{'color': 0, 'position': array([-0.3991573 , 0.21649833], dtype=float32)}

With a nested dict:

Expand Down
4 changes: 2 additions & 2 deletions gymnasium/spaces/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ class Discrete(Space[np.int64]):
>>> from gymnasium.spaces import Discrete
>>> observation_space = Discrete(2, seed=42) # {0, 1}
>>> observation_space.sample()
np.int64(0)
0
>>> observation_space = Discrete(3, start=-1, seed=42) # {-1, 0, 1}
>>> observation_space.sample()
np.int64(-1)
-1
"""

def __init__(
Expand Down
4 changes: 2 additions & 2 deletions gymnasium/spaces/oneof.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ class OneOf(Space[Any]):
>>> from gymnasium.spaces import OneOf, Box, Discrete
>>> observation_space = OneOf((Discrete(2), Box(-1, 1, shape=(2,))), seed=123)
>>> observation_space.sample() # the first element is the space index (Box in this case) and the second element is the sample from Box
(np.int64(0), np.int64(0))
(0, 0)
>>> observation_space.sample() # this time the Discrete space was sampled as index=0
(np.int64(1), array([-0.00711833, -0.7257502 ], dtype=float32))
(1, array([-0.00711833, -0.7257502 ], dtype=float32))
>>> observation_space[0]
Discrete(2)
>>> observation_space[1]
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/spaces/tuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class Tuple(Space[typing.Tuple[Any, ...]], typing.Sequence[Any]):
>>> from gymnasium.spaces import Tuple, Box, Discrete
>>> observation_space = Tuple((Discrete(2), Box(-1, 1, shape=(2,))), seed=42)
>>> observation_space.sample()
(np.int64(0), array([-0.3991573 , 0.21649833], dtype=float32))
(0, array([-0.3991573 , 0.21649833], dtype=float32))
"""

def __init__(
Expand Down
4 changes: 2 additions & 2 deletions gymnasium/wrappers/stateful_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,9 +557,9 @@ class MaxAndSkipObservation(
>>> wrapped_obs0, *_ = wrapped_env.reset(seed=123)
>>> wrapped_obs1, *_ = wrapped_env.step(1)
>>> np.all(obs0 == wrapped_obs0)
np.True_
True
>>> np.all(wrapped_obs1 == skip_and_max_obs)
np.True_
True

Change logs:
* v1.0.0 - Initially add
Expand Down
4 changes: 2 additions & 2 deletions gymnasium/wrappers/stateful_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class NormalizeReward(
...
>>> env.close()
>>> np.var(episode_rewards)
np.float64(0.0008876301247721108)
0.0008876301247721108

Example with the normalize reward wrapper:
>>> import numpy as np
Expand All @@ -76,7 +76,7 @@ class NormalizeReward(
>>> env.close()
>>> # will approach 0.99 with more episodes
>>> np.var(episode_rewards)
np.float64(0.010162116476634746)
0.010162116476634746

Change logs:
* v0.21.0 - Initially added
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/wrappers/transform_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ class RescaleAction(
>>> wrapped_env = RescaleAction(env, min_action=min_action, max_action=max_action)
>>> wrapped_env_obs, _, _, _, _ = wrapped_env.step(max_action)
>>> np.all(obs == wrapped_env_obs)
np.True_
True

Change logs:
* v0.15.4 - Initially added
Expand Down
8 changes: 4 additions & 4 deletions gymnasium/wrappers/transform_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,11 +594,11 @@ class AddRenderObservation(
>>> obs, _ = env.reset(seed=123)
>>> image = env.render()
>>> np.all(obs == image)
np.True_
True
>>> obs, *_ = env.step(env.action_space.sample())
>>> image = env.render()
>>> np.all(obs == image)
np.True_
True

Example - Add the rendered image to the original observation as a dictionary item:
>>> env = gym.make("CartPole-v1", render_mode="rgb_array")
Expand All @@ -611,11 +611,11 @@ class AddRenderObservation(
>>> obs["state"]
array([ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], dtype=float32)
>>> np.all(obs["pixels"] == env.render())
np.True_
True
>>> obs, reward, terminates, truncates, info = env.step(env.action_space.sample())
>>> image = env.render()
>>> np.all(obs["pixels"] == image)
np.True_
True

Change logs:
* v0.15.0 - Initially added as ``PixelObservationWrapper``
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/wrappers/transform_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class ClipReward(TransformReward[ObsType, ActType], gym.utils.RecordConstructorA
>>> _ = env.reset()
>>> _, rew, _, _, _ = env.step(1)
>>> rew
np.float64(0.5)
0.5

Change logs:
* v1.0.0 - Initially added
Expand Down
4 changes: 2 additions & 2 deletions gymnasium/wrappers/vector/dict_info_to_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ class DictInfoToList(VectorWrapper):
>>> _ = envs.action_space.seed(123)
>>> _, _, _, _, infos = envs.step(envs.action_space.sample())
>>> infos
{'x_position': array([0.03332211, 0.10172355]), '_x_position': array([ True, True]), 'x_velocity': array([-0.06296527, 0.89345848]), '_x_velocity': array([ True, True]), 'reward_run': array([-0.06296527, 0.89345848]), '_reward_run': array([ True, True]), 'reward_ctrl': array([-0.24503504, -0.21944423], dtype=float32), '_reward_ctrl': array([ True, True])}
{'x_position': array([0.03332211, 0.10172355]), '_x_position': array([ True, True]), 'x_velocity': array([-0.06296527, 0.89345848]), '_x_velocity': array([ True, True]), 'reward_run': array([-0.06296527, 0.89345848]), '_reward_run': array([ True, True]), 'reward_ctrl': array([-0.24503503, -0.21944423]), '_reward_ctrl': array([ True, True])}
>>> envs = DictInfoToList(envs)
>>> _ = envs.reset(seed=123)
>>> _ = envs.action_space.seed(123)
>>> _, _, _, _, infos = envs.step(envs.action_space.sample())
>>> infos
[{'x_position': np.float64(0.0333221090036294), 'x_velocity': np.float64(-0.06296527291998574), 'reward_run': np.float64(-0.06296527291998574), 'reward_ctrl': np.float32(-0.24503504)}, {'x_position': np.float64(0.10172354684460168), 'x_velocity': np.float64(0.8934584807363618), 'reward_run': np.float64(0.8934584807363618), 'reward_ctrl': np.float32(-0.21944423)}]
[{'x_position': 0.0333221090036294, 'x_velocity': -0.06296527291998574, 'reward_run': -0.06296527291998574, 'reward_ctrl': -0.2450350284576416}, {'x_position': 0.10172354684460168, 'x_velocity': 0.8934584807363618, 'reward_run': 0.8934584807363618, 'reward_ctrl': -0.21944422721862794}]

Change logs:
* v0.24.0 - Initially added as ``VectorListInfo``
Expand Down
6 changes: 3 additions & 3 deletions gymnasium/wrappers/vector/stateful_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
>>> for _ in range(100):
... obs, *_ = envs.step(envs.action_space.sample())
>>> np.mean(obs)
np.float32(0.024251968)
0.024251968
>>> np.std(obs)
np.float32(0.62259156)
0.62259156
>>> envs.close()

Example with the normalize reward wrapper:
Expand All @@ -49,7 +49,7 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
>>> for _ in range(100):
... obs, *_ = envs.step(envs.action_space.sample())
>>> np.mean(obs)
np.float32(-0.2359734)
-0.2359734
>>> np.std(obs)
np.float32(1.1938739)
>>> envs.close()
Expand Down
8 changes: 4 additions & 4 deletions gymnasium/wrappers/vector/stateful_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs):
...
>>> envs.close()
>>> np.mean(episode_rewards)
np.float64(-0.03359492141887935)
-0.03359492141887935
>>> np.std(episode_rewards)
np.float64(0.029028230434438706)
0.029028230434438706

Example with the normalize reward wrapper:
>>> import gymnasium as gym
Expand All @@ -68,9 +68,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs):
...
>>> envs.close()
>>> np.mean(episode_rewards)
np.float64(-0.1598639586606745)
-0.1598639586606745
>>> np.std(episode_rewards)
np.float64(0.27800309628058434)
0.27800309628058434
"""

def __init__(
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/wrappers/vector/vectorize_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class TransformAction(VectorActionWrapper):
>>> obs
array([[-0.46553135, -0.00142543],
[-0.498371 , -0.00715587],
[-0.46515748, -0.00624371]], dtype=float32)
[-0.4651575 , -0.00624371]], dtype=float32)

Example - With action transformation:
>>> import gymnasium as gym
Expand Down
8 changes: 4 additions & 4 deletions gymnasium/wrappers/vector/vectorize_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,15 +321,15 @@ class RescaleObservation(VectorizeTransformObservation):
>>> envs = gym.make_vec("MountainCar-v0", num_envs=3, vectorization_mode="sync")
>>> obs, info = envs.reset(seed=123)
>>> obs.min()
np.float32(-0.46352962)
-0.46352962
>>> obs.max()
np.float32(0.0)
0.0
>>> envs = RescaleObservation(envs, min_obs=-5.0, max_obs=5.0)
>>> obs, info = envs.reset(seed=123)
>>> obs.min()
np.float32(-0.90849805)
-0.90849805
>>> obs.max()
np.float32(0.0)
0.0
>>> envs.close()
"""

Expand Down
Loading