-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathgym_env_wrapper.py
653 lines (575 loc) · 30.3 KB
/
gym_env_wrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
import gymnasium as gym
import copy
import numpy as np
import sys
from gymnasium.spaces import Box, Tuple
from gymnasium.wrappers import AtariPreprocessing
from mdp_playground.envs.rl_toy_env import RLToyEnv
import warnings
import PIL.ImageDraw as ImageDraw
import PIL.Image as Image
from PIL.Image import FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM
import logging
# Needed from Gymnasium v1.0.0 onwards
import ale_py
gym.register_envs(ale_py) # optional, helpful for IDEs or pre-commit
# def get_gym_wrapper(base_class):
class GymEnvWrapper(gym.Env):
"""Wraps an OpenAI Gym environment to be able to modify its dimensions corresponding to MDP Playground. Please see [`example.py`](example.py) for some simple examples of how to use this class. The values for these dimensions are passed in a config dict as for mdp_playground.envs.RLToyEnv. The description for the supported dimensions below can be found in mdp_playground/envs/rl_toy_env.py.
Currently supported dimensions:
transition noise
reward delay
reward noise
reward scale
reward shift
terminal state reward
image_transforms (for discrete environments with image observations)
The wrapper is pretty general and can be applied to any Gym Environment. The environment should be instantiated and passed as the 1st argument to the __init__ method of this class. If using this wrapper with Atari, additional keys may be added specifying either atari_preprocessing = True or wrap_deepmind_ray = True. These would use the AtariPreprocessing wrapper from OpenAI Gym or wrap_deepmind() wrapper from Ray Rllib.
For AtariPreprocessing, additional key-value pairs specifying grayscale_obs and frame_skip may be provided. These have the same meaning as in AtariPreprocessing.
"""
# Should not be a gym.Wrapper because 1) gym.Wrapper has member variables
# observation_space and action_space while here with irrelevant_features
# we would have multiple observation_spaces and this could cause conflict
# with code that assumes any subclass of gym.Wrapper should have these member
# variables. However, it _should_ be at least a gym.Env.
# Following comment based on the old get_gym_wrapper(base_class) code:
# Does it need to be a subclass of base_class because some external code
# may check if it's an AtariEnv, for instance, and do further stuff based
# on that?
def __init__(self, env, **config):
self.config = copy.deepcopy(config)
# self.env = config["env"]
self.env = env
if "log_level" not in config:
self.log_level = logging.NOTSET
else:
self.log_level = config["log_level"]
self.logger = logging.getLogger(__name__)
self.logger.setLevel(self.log_level)
if "log_filename" in config:
if not self.logger.handlers:
self.log_filename = config["log_filename"]
log_file_handler = logging.FileHandler(self.log_filename)
self.logger.addHandler(log_file_handler)
print("Logger logging to:", self.log_filename)
seed_int = None
if "seed" in config:
seed_int = config["seed"]
self.seed(seed_int) # #seed
# IMP Move below code from here to seed()? Because if seed is called
# during the run of an env, the expectation is that all obs., act. space,
# etc. seeds are set during that call? Only Atari in Gym seems to do something
# similar, the others I saw there don't seem to set seed for obs., act. spaces.
if "seed" in dir(self.env): # hack
self.env.seed(
seed_int
) # #seed ###IMP Apparently Atari also has a seed. :/ Without this, for beam_rider(?), about 1 in 5 times I got reward of 88.0 and 44.0 the remaining times with the same action sequence!! With setting this seed, I got the same reward of 44.0 when I ran about 20 times.; ##TODO If this is really a wrapper, should it be modifying the seed of the env?
obs_space_seed = self._np_random.integers(sys.maxsize).item() # random
act_space_seed = self._np_random.integers(sys.maxsize).item() # random
self.env.observation_space.seed(obs_space_seed) # seed
self.env.action_space.seed(act_space_seed) # seed
# if "dummy_eval" in config: #hack
# del config["dummy_eval"]
if "delay" in config:
self.delay = config["delay"]
assert config["delay"] >= 0
self.reward_buffer = [0.0] * (self.delay)
else:
self.delay = 0
if "transition_noise" in config:
if config["state_space_type"] == "continuous":
if callable(config["transition_noise"]):
self.transition_noise = config["transition_noise"]
else:
p_noise_std = config["transition_noise"]
self.transition_noise = lambda s, a, rng: rng.normal(0, p_noise_std, size=s.shape)
else: # discrete env
self.transition_noise = config["transition_noise"]
assert self.transition_noise <= 1.0 and self.transition_noise >= 0.0, (
"transition_noise must be a value in [0.0, 1.0] when env is discrete, it was:"
+ str(self.transition_noise)
)
else:
self.transition_noise = None
if "reward_noise" in config:
if callable(config["reward_noise"]):
self.reward_noise = config["reward_noise"]
else:
reward_noise_std = config["reward_noise"]
self.reward_noise = lambda s, a, rng: rng.normal(0, reward_noise_std)
else:
self.reward_noise = None
if "reward_scale" not in config:
self.reward_scale = 1.0
else:
self.reward_scale = config["reward_scale"]
if "reward_shift" not in config:
self.reward_shift = 0.0
else:
self.reward_shift = config["reward_shift"]
if "term_state_reward" not in config:
self.term_state_reward = 0.0
else:
self.term_state_reward = config["term_state_reward"]
if "image_transforms" not in config:
self.image_transforms = False
else:
assert (
config["state_space_type"] == "discrete"
), "Image transforms are only supported for discrete envs with \
image observations."
self.image_transforms = config["image_transforms"]
if len(self.env.observation_space.shape) != 3:
warnings.warn(
"The length of observation_space.shape ="
+ self.env.observation_space.shape
+ "It was expected to be 3 (width, height, channels) "
+ "for environments with image representations."
)
if "image_padding" in config:
self.image_padding = config["image_padding"]
else:
self.image_padding = 20
if "image_sh_quant" not in config:
if "shift" in self.image_transforms:
warnings.warn(
"Setting image shift quantisation to the \
default of 1, since no config value was provided for it."
)
self.image_sh_quant = 1
else:
self.image_sh_quant = None
else:
self.image_sh_quant = config["image_sh_quant"]
if "image_ro_quant" not in config:
if "rotate" in self.image_transforms:
warnings.warn(
"Setting image rotate quantisation to the \
default of 1, since no config value was provided for it."
)
self.image_ro_quant = 1
else:
self.image_ro_quant = None
else:
self.image_ro_quant = config["image_ro_quant"]
if "image_scale_range" not in config:
if "scale" in self.image_transforms:
warnings.warn(
"Setting image scale range to the default \
of (0.5, 1.5), since no config value was provided for it."
)
self.image_scale_range = (0.5, 1.5)
else:
self.image_scale_range = None
else:
self.image_scale_range = config["image_scale_range"]
if (
"wrap_deepmind_ray" in config and config["wrap_deepmind_ray"]
): # hack ##TODO remove?
from ray.rllib.env.atari_wrappers import wrap_deepmind, is_atari
self.env = wrap_deepmind(self.env, dim=42, framestack=True)
elif "atari_preprocessing" in config and config["atari_preprocessing"]:
self.frame_skip = 4 # default for AtariPreprocessing
if "frame_skip" in config:
self.frame_skip = config["frame_skip"]
self.grayscale_obs = False
if "grayscale_obs" in config:
self.grayscale_obs = config["grayscale_obs"]
if "image_width" in config:
self.image_width = config["image_width"]
else:
self.image_width = 84 # Atari default
# Use AtariPreprocessing with frame_skip
# noop_max set to 1 because we want to keep the vanilla env as
# deterministic as possible and setting it 0 was not allowed. ##TODO
# noop_max=0 is poosible in new Gym version, so update Gym version.
self.env = AtariPreprocessing(
self.env,
frame_skip=self.frame_skip,
grayscale_obs=self.grayscale_obs,
noop_max=1,
screen_size=self.image_width,
)
print("self.env.noop_max set to: ", self.env.noop_max)
if "irrelevant_features" in config:
# self.irrelevant_features = config["irrelevant_features"]
irr_toy_env_conf = config["irrelevant_features"]
if "seed" not in irr_toy_env_conf:
irr_toy_env_conf["seed"] = self._np_random.integers(sys.maxsize).item() # random
if config["state_space_type"] == "discrete":
pass
else: # cont. env
# This is a bit hacky because we need to define the state_space_dim
# of the irrelevant toy env in the "base" config and not the nested irrelevant_features
# dict inside the base config to be compatible with the config_processor of MDPP
# which requires variable config to be in the "base" config.
irr_toy_env_conf["state_space_dim"] = config[
"irr_state_space_dim"
] # #hack
self.irr_toy_env = RLToyEnv(**irr_toy_env_conf)
if config["state_space_type"] == "discrete":
self.action_space = Tuple(
(self.env.action_space, self.irr_toy_env.action_space)
)
self.observation_space = Tuple(
(self.env.observation_space, self.irr_toy_env.observation_space)
) # TODO for image observations, concatenate to 1 obs. space here and in step() and reset()?
else: # cont. env # TODO Check the test case added for cont. irr features case and code for it in run_experiments.py.
env_obs_low = self.env.observation_space.low
env_obs_high = self.env.observation_space.high
env_obs_dtype = env_obs_low.dtype
env_obs_shape = env_obs_low.shape
irr_env_obs_low = self.irr_toy_env.observation_space.low
irr_env_obs_high = self.irr_toy_env.observation_space.high
irr_env_obs_dtype = self.irr_toy_env.observation_space.low.dtype
assert env_obs_dtype == irr_env_obs_dtype, (
"Datatypes of base env and irrelevant toy env should match. Were: "
+ str(env_obs_dtype)
+ ", "
+ str(irr_env_obs_dtype)
)
ext_low = np.concatenate((env_obs_low, irr_env_obs_low))
ext_high = np.concatenate((env_obs_high, irr_env_obs_high))
self.observation_space = Box(
low=ext_low, high=ext_high, dtype=env_obs_dtype
)
env_act_low = self.env.action_space.low
env_act_high = self.env.action_space.high
env_act_dtype = env_act_low.dtype
self.env_act_shape = env_act_low.shape
assert (
len(self.env_act_shape) == 1
), "Length of shape of action space should be 1."
irr_env_act_low = self.irr_toy_env.action_space.low
irr_env_act_high = self.irr_toy_env.action_space.high
irr_env_act_dtype = irr_env_act_low.dtype
# assert env_obs_dtype == env_act_dtype, "Datatypes of obs. and act. of
# base env should match. Were: " + str(env_obs_dtype) + ", " +
# str(env_act_dtype) #TODO Apparently, observations are np.float64 and
# actions np.float32 for Mujoco.
ext_low = np.concatenate((env_act_low, irr_env_act_low))
ext_high = np.concatenate((env_act_high, irr_env_act_high))
self.action_space = Box(
low=ext_low, high=ext_high, dtype=env_act_dtype
) # TODO Use BoxExtended here and above?
self.observation_space.seed(obs_space_seed) # #seed
self.action_space.seed(act_space_seed) # #seed
else: # no irrelevant features
self.action_space = self.env.action_space
if self.image_transforms:
env_obs_low = self.env.observation_space.low
env_obs_high = self.env.observation_space.high
env_obs_dtype = env_obs_low.dtype
env_obs_shape = env_obs_low.shape
ext_low_shape = (
env_obs_shape[0] + self.image_padding * 2,
env_obs_shape[1] + self.image_padding * 2,
env_obs_shape[2],
)
# #hardcoded stuff next
ext_low = np.zeros(shape=(ext_low_shape))
ext_high = np.ones(shape=(ext_low_shape)) * 255
self.observation_space = Box(
low=ext_low, high=ext_high, dtype=env_obs_dtype
)
else: # no image transforms
self.observation_space = self.env.observation_space
self.total_episodes = 0
# if "action_loss_weight" in config: #hack
# del config["action_loss_weight"]
# if "action_space_max" in config: #hack
# action_space_max = config["action_space_max"]
# del config["action_space_max"]
# if "time_unit" in config: #hack
# time_unit = config["time_unit"]
# del config["time_unit"]
# if "dummy_seed" in config: #hack
# del config["dummy_seed"]
super(GymEnvWrapper, self).__init__()
# if "action_space_max" in locals():
# print("Setting Mujoco self.action_space.low, self.action_space.high from:", self.action_space.low, self.action_space.high)
# self.action_space.low *= action_space_max
# self.action_space.high *= action_space_max
# print("to:", self.action_space.low, self.action_space.high)
# if base_class == HalfCheetahEnv and action_space_max >= 4: #hack
# self.model.opt.timestep /= 2 # 0.005
# self.frame_skip *= 2
# print("Setting Mujoco timestep to", self.model.opt.timestep, "half of the usual to avoid instabilities. At the same time action repeat increased to twice its usual.")
# if "time_unit" in locals(): #hack In HalfCheetah, this is needed because the reward function is dependent on the time_unit because it depends on velocity achieved which depends on amount of time torque was applied. In Pusher, Reacher, it is also needed because the reward is similar to the distance from current position to goal at _each_ step, which means if we calculate the reward multiple times in the same amount of "real" time, we'd need to average out the reward the more times we calculate the reward in the same amount of "real" time (i.e., when we have shorter acting timesteps). This is not the case with the toy enviroments because there the reward is amount of distance moved from current position to goal in the current timestep, so it's dependent on "real" time and not on acting timesteps.
# self.frame_skip *= time_unit
# self.frame_skip = int(self.frame_skip)
# self._ctrl_cost_weight *= time_unit
# self._forward_reward_weight *= time_unit
# print("Setting Mujoco self.frame_skip, self._ctrl_cost_weight, self._forward_reward_weight to", self.frame_skip, self._ctrl_cost_weight, self._forward_reward_weight, "corresponding to time_unit in config.")
def step(self, action):
# next_state, reward, done, trunc, info = super(GymEnvWrapper, self).step(action)
self.total_transitions_episode += 1
if self.config["state_space_type"] == "discrete":
if self.transition_noise:
probs = (
np.ones(shape=(self.env.action_space.n,))
* self.transition_noise
/ (self.env.action_space.n - 1)
)
probs[action] = 1 - self.transition_noise
old_action = action
action = self._np_random.choice(self.env.action_space.n, size=1, p=probs).item() # random
if old_action != action:
# print("NOISE inserted", old_action, action)
self.total_noisy_transitions_episode += 1
else: # cont. envs
noise_in_transition = (
self.transition_noise(self.curr_state, action, self._np_random)
if self.transition_noise
else 0.0
) # #random
self.total_abs_noise_in_transition_episode += np.abs(
noise_in_transition
)
self.logger.debug("total_transitions_episode: " + str(self.total_transitions_episode)
+ " Noise in transition: " + str(noise_in_transition))
if "irrelevant_features" in self.config:
if self.config["state_space_type"] == "discrete":
next_state, reward, done, trunc, info = self.env.step(action[0])
next_state_irr, _, done_irr, trunc_irr, _ = self.irr_toy_env.step(action[1])
next_state = tuple([next_state, next_state_irr])
next_obs = next_state
else: # cont. env
# env_act_shape is the shape of the underlying env's action space and we
# sub-select those dimensions from the total action space next and apply
# to the underlying env:
next_state, reward, done, trunc, info = self.env.step(
action[: self.env_act_shape[0]]
)
next_state_irr, _, done_irr, trunc_irr, _ = self.irr_toy_env.step(
action[self.env_act_shape[0] :]
)
next_state = np.concatenate((next_state, next_state_irr))
next_obs = next_state.copy()
else: # no irrelevant features
next_state, reward, done, trunc, info = self.env.step(action)
if self.config["state_space_type"] == "discrete":
next_obs = next_state
else: # cont. env
next_obs = next_state.copy()
# I think this adds noise whether or not irrelevant features are present. #TODO Add test
if self.config["state_space_type"] == "continuous":
next_obs += noise_in_transition
if self.image_transforms:
next_obs = self.get_transformed_image(next_state)
if done:
# if episode is finished return the rewards that were delayed and not
# handed out before ##TODO add test case for this
reward += np.sum(self.reward_buffer * self.reward_scale + self.reward_shift)
reward += (
self.term_state_reward * self.reward_scale
) # Scale before or after?
else:
self.reward_buffer.append(reward)
old_reward = reward
reward = self.reward_buffer[0]
# print("rewards:", self.reward_buffer, old_reward, reward)
del self.reward_buffer[0]
# random ###TODO Would be better to parameterise this in terms of state,
# action and time_step as well. Would need to change implementation to
# have a queue for the rewards achieved and then pick the reward that was
# generated delay timesteps ago.
noise_in_reward = self.reward_noise(self.curr_state, action, self._np_random) if self.reward_noise else 0
self.logger.info("Noise in reward: " + str(noise_in_reward))
self.total_abs_noise_in_reward_episode += np.abs(noise_in_reward)
self.total_reward_episode += reward
reward += noise_in_reward
reward *= self.reward_scale
reward += self.reward_shift
self.logger.debug("sas'o'r: " + str(self.curr_state) + "\n" + str(action) + "\n" + str(next_state) + "\n" + str(next_obs) + " \n" + str(reward))
self.curr_state = next_state
return next_obs, reward, done, trunc, info
def reset(self, seed=None):
'''
From Gymnasium v26, the reset method has a seed parameter.
'''
# on episode "end" stuff (to not be invoked when reset() called when
# self.total_episodes = 0; end is in quotes because it may not be a true
# episode end reached by reaching a terminal state, but reset() may have
# been called in the middle of an episode):
if not self.total_episodes == 0:
self.logger.info(
"Noise stats for previous episode num.: "
+ str(self.total_episodes)
+ " (total abs. noise in rewards, total abs. noise in transitions, total reward, total noisy transitions, total transitions): "
+ str(self.total_abs_noise_in_reward_episode)
+ " "
+ str(self.total_abs_noise_in_transition_episode)
+ " "
+ str(self.total_reward_episode)
+ " "
+ str(self.total_noisy_transitions_episode)
+ " "
+ str(self.total_transitions_episode)
)
# on episode start stuff:
self.reward_buffer = [0.0] * (self.delay)
self.total_episodes += 1
self.total_abs_noise_in_reward_episode = 0
self.total_abs_noise_in_transition_episode = (
0 # only present in continuous spaces
)
self.total_noisy_transitions_episode = 0 # only present in discrete spaces
self.total_reward_episode = 0
self.total_transitions_episode = 0
if "irrelevant_features" in self.config:
if self.config["state_space_type"] == "discrete":
reset_state, reset_state_info = self.env.reset(seed=seed)
reset_state_irr, reset_state_irr_info = self.irr_toy_env.reset(seed=seed)
reset_state = tuple([reset_state, reset_state_irr]), tuple([reset_state_info, reset_state_irr_info])
else:
reset_state, reset_state_info = self.env.reset(seed=seed)
reset_state_irr, reset_state_irr_info = self.irr_toy_env.reset(seed=seed)
reset_state = np.concatenate((reset_state, reset_state_irr)), tuple([reset_state_info, reset_state_irr_info])
else:
reset_state = self.env.reset(seed=seed)
if self.image_transforms:
reset_state = (self.get_transformed_image(reset_state[0]), reset_state[1])
# Need to store the state to be able to calculate the state- and action-dependent noise in step()
self.curr_state = reset_state[0]
return reset_state
# return super(GymEnvWrapper, self).reset()
def seed(self, seed=None):
"""Initialises the Numpy RNG for the environment by calling a utility for this in Gym.
Parameters
----------
seed : int
seed to initialise the _np_random instance held by the environment. Cannot use numpy.int64 or similar because Gym doesn't accept it.
Returns
-------
int
The seed returned by Gym
"""
# If seed is None, you get a randomly generated seed from gymnasium.utils...
self._np_random, self.seed_ = gym.utils.seeding.np_random(seed) # random
print(
"Env SEED set to: "
+ str(seed)
+ ". Returned seed from Gym: "
+ str(self.seed_)
)
return self.seed_
def get_transformed_image(self, env_img):
# ###TODO write tests
height = self.env.observation_space.shape[0]
width = self.env.observation_space.shape[1]
image_padding = self.image_padding
tot_width = width + image_padding * 2
tot_height = height + image_padding * 2
assert height == width, "Currently only square images are supported."
if len(self.env.observation_space.shape) == 3:
channels = self.env.observation_space.shape[2]
elif len(self.env.observation_space.shape) == 2:
channels = 1
else:
raise ValueError()
sh_quant = self.image_sh_quant
ro_quant = self.image_ro_quant
scale_range = self.image_scale_range
# Assumes that if 3rd tensor dim is 3 that image is RGB
if channels == 3: # #hardcoded
image_ = Image.new(
"RGB", (tot_width, tot_height)
) # Use RGB for textures / custom images
else:
image_ = Image.new(
"L", (tot_width, tot_height)
) # Use L for black and white 8-bit pixels instead of RGB in case not using custom images
draw = ImageDraw.Draw(image_)
# Currently assumes image width = height ###TODO rename R variable
R = width
# Assume COM of env_img is in centre of whole image
shift_w = int(tot_width / 2)
shift_h = int(tot_height / 2)
#
# if "scale" in self.transforms:
# # max_R = 0.6 * min(self.width, self.height) / 2 # Not sure whether to make this depend on provided R as well
# # min_R = 0.1 * min(self.width, self.height) / 2 # /2 because it's R, 0.6
# # and 0.1 to allow some wiggle for shift below and not make too small
# max_R = scale_range[1] * R
# if int(max_R) > min(self.width, self.height) / 2:
# warnings.warn(
# "Maximum possible size of polygon might be too big for the given resolution. It's set to: "
# + str(max_R)
# )
# max_R = np.log(max_R)
# min_R = scale_range[0] * R
# if int(min_R) < 3:
# warnings.warn(
# "Minimum possible size of polygon might be too small and lead too much noise in image. It's set to: "
# + str(min_R)
# )
# min_R = np.log(min_R)
# log_sample = min_R + self._np_random.random() * (max_R - min_R)
# sample_ = np.exp(log_sample)
# R = int(sample_)
# # print("R", min_R, max_R)
#
if "shift" in self.image_transforms:
max_shift_w = (tot_width - R) // 2
max_shift_h = (tot_height - R) // 2
add_shift_w = self._np_random.integers(-max_shift_w + 1, max_shift_w).item()
add_shift_h = self._np_random.integers(-max_shift_h + 1, max_shift_h).item()
# print("add_shift_w, add_shift_h", add_shift_w, add_shift_h)
add_shift_w = int(add_shift_w / sh_quant) * sh_quant
add_shift_h = int(add_shift_h / sh_quant) * sh_quant
# print("add_shift_w, add_shift_h", add_shift_w, add_shift_h)
shift_w += add_shift_w
shift_h += add_shift_h
img_arr_ = np.array(image_)
sq_width = R
if (
sq_width % 2 == 1
): # If sq_width is not even, it causes errors with the //2 below.
sq_width += 1
# tex_img = tex_img.resize((sq_width, sq_width))
# tex_arr = np.array(tex_img)
top_left = (
shift_h - height // 2,
shift_w - width // 2,
)
bottom_right = (
shift_h + height // 2,
shift_w + width // 2,
)
img_arr_[top_left[0] : bottom_right[0], top_left[1] : bottom_right[1]] = env_img
image_ = Image.fromarray(img_arr_, "RGB")
# Because numpy is row-major and Image is column major, need to transpose
ret_arr = np.transpose(np.array(image_), axes=(1, 0, 2))
# ret_arr = np.array(image_)
return ret_arr
# return GymEnvWrapper
# from mdp_playground.envs.gym_env_wrapper import get_gym_wrapper
# from gymnasium.envs.atari import AtariEnv
# from gymnasium.wrappers import AtariPreprocessing
# AtariPreprocessing()
# AtariEnvWrapper = get_gym_wrapper(AtariEnv)
# from ray.tune.registry import register_env
# register_env("AtariEnvWrapper", lambda config: AtariEnvWrapper(**config))
# aew = AtariEnvWrapper(**{'game': 'breakout', 'obs_type': 'image', 'frameskip': 4})
# ob = aew.reset()
# from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
# from gymnasium.envs.atari import AtariEnv
# ae = AtariEnv(**{'game': 'beam_rider', 'obs_type': 'image', 'frameskip': 1})
# aew = GymEnvWrapper(ae, **{'reward_noise': lambda a: a.normal(0, 0.1), 'transition_noise': 0.1, 'delay': 1, 'frame_skip': 4, "atari_preprocessing": True, "state_space_type": "discrete", 'seed': 0})
# ob = aew.reset()
# print(ob.shape)
# print(ob)
# total_reward = 0.0
# for i in range(200):
# act = aew.action_space.sample()
# next_state, reward, done, trunc, info = aew.step(act)
# print(reward, done, act)
# if reward > 10:
# print("reward in step:", i, reward)
# total_reward += reward
# print("total_reward:", total_reward)
# aew.reset()
# # AtariPreprocessing()
# # from ray.tune.registry import register_env
# # register_env("AtariEnvWrapper", lambda config: AtariEnvWrapper(**config))