Skip to content

Commit

Permalink
Revert #1154 (Fix NumPy DocTests)
Browse files Browse the repository at this point in the history
  • Loading branch information
pseudo-rnd-thoughts committed Sep 28, 2024
1 parent c8de1c7 commit be18c38
Show file tree
Hide file tree
Showing 14 changed files with 30 additions and 30 deletions.
2 changes: 1 addition & 1 deletion gymnasium/spaces/dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class Dict(Space[typing.Dict[str, Any]], typing.Mapping[str, Space[Any]]):
>>> from gymnasium.spaces import Dict, Box, Discrete
>>> observation_space = Dict({"position": Box(-1, 1, shape=(2,)), "color": Discrete(3)}, seed=42)
>>> observation_space.sample()
{'color': np.int64(0), 'position': array([-0.3991573 , 0.21649833], dtype=float32)}
{'color': 0, 'position': array([-0.3991573 , 0.21649833], dtype=float32)}
With a nested dict:
Expand Down
4 changes: 2 additions & 2 deletions gymnasium/spaces/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ class Discrete(Space[np.int64]):
>>> from gymnasium.spaces import Discrete
>>> observation_space = Discrete(2, seed=42) # {0, 1}
>>> observation_space.sample()
np.int64(0)
0
>>> observation_space = Discrete(3, start=-1, seed=42) # {-1, 0, 1}
>>> observation_space.sample()
np.int64(-1)
-1
"""

def __init__(
Expand Down
4 changes: 2 additions & 2 deletions gymnasium/spaces/oneof.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ class OneOf(Space[Any]):
>>> from gymnasium.spaces import OneOf, Box, Discrete
>>> observation_space = OneOf((Discrete(2), Box(-1, 1, shape=(2,))), seed=123)
>>> observation_space.sample() # the first element is the space index (Box in this case) and the second element is the sample from Box
(np.int64(0), np.int64(0))
(0, 0)
>>> observation_space.sample() # this time the Discrete space was sampled as index=0
(np.int64(1), array([-0.00711833, -0.7257502 ], dtype=float32))
(1, array([-0.00711833, -0.7257502 ], dtype=float32))
>>> observation_space[0]
Discrete(2)
>>> observation_space[1]
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/spaces/tuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class Tuple(Space[typing.Tuple[Any, ...]], typing.Sequence[Any]):
>>> from gymnasium.spaces import Tuple, Box, Discrete
>>> observation_space = Tuple((Discrete(2), Box(-1, 1, shape=(2,))), seed=42)
>>> observation_space.sample()
(np.int64(0), array([-0.3991573 , 0.21649833], dtype=float32))
(0, array([-0.3991573 , 0.21649833], dtype=float32))
"""

def __init__(
Expand Down
4 changes: 2 additions & 2 deletions gymnasium/wrappers/stateful_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,9 +557,9 @@ class MaxAndSkipObservation(
>>> wrapped_obs0, *_ = wrapped_env.reset(seed=123)
>>> wrapped_obs1, *_ = wrapped_env.step(1)
>>> np.all(obs0 == wrapped_obs0)
np.True_
True
>>> np.all(wrapped_obs1 == skip_and_max_obs)
np.True_
True
Change logs:
* v1.0.0 - Initially add
Expand Down
4 changes: 2 additions & 2 deletions gymnasium/wrappers/stateful_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class NormalizeReward(
...
>>> env.close()
>>> np.var(episode_rewards)
np.float64(0.0008876301247721108)
0.0008876301247721108
Example with the normalize reward wrapper:
>>> import numpy as np
Expand All @@ -76,7 +76,7 @@ class NormalizeReward(
>>> env.close()
>>> # will approach 0.99 with more episodes
>>> np.var(episode_rewards)
np.float64(0.010162116476634746)
0.010162116476634746
Change logs:
* v0.21.0 - Initially added
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/wrappers/transform_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ class RescaleAction(
>>> wrapped_env = RescaleAction(env, min_action=min_action, max_action=max_action)
>>> wrapped_env_obs, _, _, _, _ = wrapped_env.step(max_action)
>>> np.all(obs == wrapped_env_obs)
np.True_
True
Change logs:
* v0.15.4 - Initially added
Expand Down
8 changes: 4 additions & 4 deletions gymnasium/wrappers/transform_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,11 +594,11 @@ class AddRenderObservation(
>>> obs, _ = env.reset(seed=123)
>>> image = env.render()
>>> np.all(obs == image)
np.True_
True
>>> obs, *_ = env.step(env.action_space.sample())
>>> image = env.render()
>>> np.all(obs == image)
np.True_
True
Example - Add the rendered image to the original observation as a dictionary item:
>>> env = gym.make("CartPole-v1", render_mode="rgb_array")
Expand All @@ -611,11 +611,11 @@ class AddRenderObservation(
>>> obs["state"]
array([ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], dtype=float32)
>>> np.all(obs["pixels"] == env.render())
np.True_
True
>>> obs, reward, terminates, truncates, info = env.step(env.action_space.sample())
>>> image = env.render()
>>> np.all(obs["pixels"] == image)
np.True_
True
Change logs:
* v0.15.0 - Initially added as ``PixelObservationWrapper``
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/wrappers/transform_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class ClipReward(TransformReward[ObsType, ActType], gym.utils.RecordConstructorA
>>> _ = env.reset()
>>> _, rew, _, _, _ = env.step(1)
>>> rew
np.float64(0.5)
0.5
Change logs:
* v1.0.0 - Initially added
Expand Down
4 changes: 2 additions & 2 deletions gymnasium/wrappers/vector/dict_info_to_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ class DictInfoToList(VectorWrapper):
>>> _ = envs.action_space.seed(123)
>>> _, _, _, _, infos = envs.step(envs.action_space.sample())
>>> infos
{'x_position': array([0.03332211, 0.10172355]), '_x_position': array([ True, True]), 'x_velocity': array([-0.06296527, 0.89345848]), '_x_velocity': array([ True, True]), 'reward_run': array([-0.06296527, 0.89345848]), '_reward_run': array([ True, True]), 'reward_ctrl': array([-0.24503504, -0.21944423], dtype=float32), '_reward_ctrl': array([ True, True])}
{'x_position': array([0.03332211, 0.10172355]), '_x_position': array([ True, True]), 'x_velocity': array([-0.06296527, 0.89345848]), '_x_velocity': array([ True, True]), 'reward_run': array([-0.06296527, 0.89345848]), '_reward_run': array([ True, True]), 'reward_ctrl': array([-0.24503503, -0.21944423]), '_reward_ctrl': array([ True, True])}
>>> envs = DictInfoToList(envs)
>>> _ = envs.reset(seed=123)
>>> _ = envs.action_space.seed(123)
>>> _, _, _, _, infos = envs.step(envs.action_space.sample())
>>> infos
[{'x_position': np.float64(0.0333221090036294), 'x_velocity': np.float64(-0.06296527291998574), 'reward_run': np.float64(-0.06296527291998574), 'reward_ctrl': np.float32(-0.24503504)}, {'x_position': np.float64(0.10172354684460168), 'x_velocity': np.float64(0.8934584807363618), 'reward_run': np.float64(0.8934584807363618), 'reward_ctrl': np.float32(-0.21944423)}]
[{'x_position': 0.0333221090036294, 'x_velocity': -0.06296527291998574, 'reward_run': -0.06296527291998574, 'reward_ctrl': -0.2450350284576416}, {'x_position': 0.10172354684460168, 'x_velocity': 0.8934584807363618, 'reward_run': 0.8934584807363618, 'reward_ctrl': -0.21944422721862794}]
Change logs:
* v0.24.0 - Initially added as ``VectorListInfo``
Expand Down
6 changes: 3 additions & 3 deletions gymnasium/wrappers/vector/stateful_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
>>> for _ in range(100):
... obs, *_ = envs.step(envs.action_space.sample())
>>> np.mean(obs)
np.float32(0.024251968)
0.024251968
>>> np.std(obs)
np.float32(0.62259156)
0.62259156
>>> envs.close()
Example with the normalize reward wrapper:
Expand All @@ -49,7 +49,7 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
>>> for _ in range(100):
... obs, *_ = envs.step(envs.action_space.sample())
>>> np.mean(obs)
np.float32(-0.2359734)
-0.2359734
>>> np.std(obs)
np.float32(1.1938739)
>>> envs.close()
Expand Down
8 changes: 4 additions & 4 deletions gymnasium/wrappers/vector/stateful_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs):
...
>>> envs.close()
>>> np.mean(episode_rewards)
np.float64(-0.03359492141887935)
-0.03359492141887935
>>> np.std(episode_rewards)
np.float64(0.029028230434438706)
0.029028230434438706
Example with the normalize reward wrapper:
>>> import gymnasium as gym
Expand All @@ -68,9 +68,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs):
...
>>> envs.close()
>>> np.mean(episode_rewards)
np.float64(-0.1598639586606745)
-0.1598639586606745
>>> np.std(episode_rewards)
np.float64(0.27800309628058434)
0.27800309628058434
"""

def __init__(
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/wrappers/vector/vectorize_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class TransformAction(VectorActionWrapper):
>>> obs
array([[-0.46553135, -0.00142543],
[-0.498371 , -0.00715587],
[-0.46515748, -0.00624371]], dtype=float32)
[-0.4651575 , -0.00624371]], dtype=float32)
Example - With action transformation:
>>> import gymnasium as gym
Expand Down
8 changes: 4 additions & 4 deletions gymnasium/wrappers/vector/vectorize_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,15 +321,15 @@ class RescaleObservation(VectorizeTransformObservation):
>>> envs = gym.make_vec("MountainCar-v0", num_envs=3, vectorization_mode="sync")
>>> obs, info = envs.reset(seed=123)
>>> obs.min()
np.float32(-0.46352962)
-0.46352962
>>> obs.max()
np.float32(0.0)
0.0
>>> envs = RescaleObservation(envs, min_obs=-5.0, max_obs=5.0)
>>> obs, info = envs.reset(seed=123)
>>> obs.min()
np.float32(-0.90849805)
-0.90849805
>>> obs.max()
np.float32(0.0)
0.0
>>> envs.close()
"""

Expand Down

0 comments on commit be18c38

Please sign in to comment.