diff --git a/rubiks_cube_gym/__init__.py b/rubiks_cube_gym/__init__.py index 0b41c9c..ebb3d18 100644 --- a/rubiks_cube_gym/__init__.py +++ b/rubiks_cube_gym/__init__.py @@ -1,4 +1,4 @@ -from gym.envs.registration import register +from gymnasium.envs.registration import register register( id='rubiks-cube-222-v0', @@ -40,4 +40,4 @@ id='skewb-sarah-v1', entry_point='rubiks_cube_gym.envs:SkewbEnvSarah', max_episode_steps=250, -) \ No newline at end of file +) diff --git a/rubiks_cube_gym/envs/pyraminx_wo_tips.py b/rubiks_cube_gym/envs/pyraminx_wo_tips.py index c189a26..c4192b3 100644 --- a/rubiks_cube_gym/envs/pyraminx_wo_tips.py +++ b/rubiks_cube_gym/envs/pyraminx_wo_tips.py @@ -1,7 +1,6 @@ import pickle -import random -import gym -from gym import spaces +import gymnasium as gym +from gymnasium import spaces import os import cv2 import numpy as np @@ -10,7 +9,7 @@ class PyraminxWoTipsEnv(gym.Env): - metadata = {'render.modes': ['human', 'rgb_array', 'ansi']} + metadata = {'render_modes': ['human', 'rgb_array', 'ansi']} def __init__(self): self.cube = None @@ -44,10 +43,10 @@ def generate_scramble(self): layer_move_types = ['', "'"] while scramble_len < 11: - move = random.choice(layer_moves) + move = self.np_random.choice(layer_moves) while move == prev_move: - move = random.choice(layer_moves) - scramble += move + random.choice(layer_move_types) + " " + move = self.np_random.choice(layer_moves) + scramble += move + self.np_random.choice(layer_move_types) + " " prev_move = move scramble_len += 1 @@ -101,7 +100,7 @@ def step(self, action): observation = self.cube_state info = {"cube": self.cube, "cube_reduced": self.cube_reduced} - return observation, reward, done, info + return observation, reward, done, False, info def reward(self): if self.cube_reduced == "RRRRRGBBBBBRRRGGGBBBRGGGGGBYYYYYYYYY": @@ -109,9 +108,12 @@ def reward(self): else: return -1, False - def reset(self, scramble=None): + def reset(self, *, seed=None, options=None): + super().reset(seed=seed) + self.cube = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], dtype=np.uint8) + scramble = None if options is None else options.get("scramble") if scramble: self.algorithm(scramble) elif scramble == False: @@ -122,7 +124,9 @@ def reset(self, scramble=None): self.update_cube_reduced() self.update_cube_state() - return self.cube_state + info = {"cube": self.cube, "cube_reduced": self.cube_reduced} + + return self.cube_state, info def render(self, mode='human', render_time=100): if mode == 'ansi': diff --git a/rubiks_cube_gym/envs/pyraminx_wo_tips_lbl.py b/rubiks_cube_gym/envs/pyraminx_wo_tips_lbl.py index 14a258d..7492319 100644 --- a/rubiks_cube_gym/envs/pyraminx_wo_tips_lbl.py +++ b/rubiks_cube_gym/envs/pyraminx_wo_tips_lbl.py @@ -37,11 +37,11 @@ def reward(self): return reward, done - def reset(self, scramble=None): - super(PyraminxWoTipsEnvLBL, self).reset(scramble=scramble) + def reset(self, *, seed=None, options=None): + obs, info = super().reset(seed=seed, options=options) self.FL = self.check_FL() - return self.cube_state + return obs, info FL_POS = [[0, 1, 2, 3, 4, 11, 12, 13, 20, 5, 14, 15, 20, 21, 6, 7, 8, 9, 10, 27, 28, 32, 33, 35], diff --git a/rubiks_cube_gym/envs/rubiks_cube_222.py b/rubiks_cube_gym/envs/rubiks_cube_222.py index 020ae72..425b8a7 100644 --- a/rubiks_cube_gym/envs/rubiks_cube_222.py +++ b/rubiks_cube_gym/envs/rubiks_cube_222.py @@ -1,7 +1,6 @@ import pickle -import random -import gym -from gym import spaces +import gymnasium as gym +from gymnasium import spaces import os import cv2 import numpy as np @@ -10,7 +9,7 @@ class RubiksCube222Env(gym.Env): - metadata = {'render.modes': ['human', 'rgb_array', 'ansi']} + metadata = {'render_modes': ['human', 'rgb_array', 'ansi']} def __init__(self): self.cube = None @@ -44,10 +43,10 @@ def generate_scramble(self): move_type = ['', '2', "'"] while scramble_len < 11: - move = random.choice(moves) + move = self.np_random.choice(moves) while move == prev_move: - move = random.choice(moves) - scramble += move + random.choice(move_type) + " " + move = self.np_random.choice(moves) + scramble += move + self.np_random.choice(move_type) + " " prev_move = move scramble_len += 1 @@ -100,7 +99,7 @@ def step(self, action): observation = self.cube_state info = {"cube": self.cube, "cube_reduced": self.cube_reduced} - return observation, reward, done, info + return observation, reward, done, False, info def reward(self): if self.cube_reduced == "WWWWOOGGRRBBOOGGRRBBYYYY": @@ -108,9 +107,12 @@ def reward(self): else: return -1, False - def reset(self, scramble=None): + def reset(self, *, seed=None, options=None): + super().reset(seed=seed) + self.cube = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], dtype=np.uint8) + scramble = None if options is None else options.get("scramble") if scramble: self.algorithm(scramble) elif scramble == False: @@ -121,7 +123,9 @@ def reset(self, scramble=None): self.update_cube_reduced() self.update_cube_state() - return self.cube_state + info = {"cube": self.cube, "cube_reduced": self.cube_reduced} + + return self.cube_state, info def render(self, mode='human', render_time=100): if mode == 'ansi': diff --git a/rubiks_cube_gym/envs/rubiks_cube_222_lbl.py b/rubiks_cube_gym/envs/rubiks_cube_222_lbl.py index be1d0a3..9cd4908 100644 --- a/rubiks_cube_gym/envs/rubiks_cube_222_lbl.py +++ b/rubiks_cube_gym/envs/rubiks_cube_222_lbl.py @@ -50,12 +50,12 @@ def reward(self): return reward, done - def reset(self, scramble=None): - super(RubiksCube222EnvLBL, self).reset(scramble=scramble) + def reset(self, *, seed=None, options=None): + obs, info = super().reset(seed=seed, options=options) self.FL = self.check_FL() self.OLL = self.check_OLL() - return self.cube_state + return obs, info FL_POS = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12], [0, 2, 4, 5, 6, 11, 12, 13, 14, 19, 20, 22], diff --git a/rubiks_cube_gym/envs/rubiks_cube_222_ortega.py b/rubiks_cube_gym/envs/rubiks_cube_222_ortega.py index 78efb9b..6b81988 100644 --- a/rubiks_cube_gym/envs/rubiks_cube_222_ortega.py +++ b/rubiks_cube_gym/envs/rubiks_cube_222_ortega.py @@ -50,12 +50,12 @@ def reward(self): return reward, done - def reset(self, scramble=None): - super(RubiksCube222EnvOrtega, self).reset(scramble=scramble) + def reset(self, *, seed=None, options=None): + obs, info = super().reset(seed=seed, options=options) self.FF = self.check_FF() self.OLL = self.check_OLL() - return self.cube_state + return obs, info FF_POS = [[0, 1, 2, 3], [4, 5, 12, 13], [6, 7, 14, 15], [8, 9, 16, 17], [10, 11, 18, 19], [20, 21, 22, 23]] diff --git a/rubiks_cube_gym/envs/skewb.py b/rubiks_cube_gym/envs/skewb.py index 4f241b8..ca265bb 100644 --- a/rubiks_cube_gym/envs/skewb.py +++ b/rubiks_cube_gym/envs/skewb.py @@ -1,7 +1,6 @@ import pickle -import random -import gym -from gym import spaces +import gymnasium as gym +from gymnasium import spaces import os import cv2 import numpy as np @@ -10,7 +9,7 @@ class SkewbEnv(gym.Env): - metadata = {'render.modes': ['human', 'rgb_array', 'ansi']} + metadata = {'render_modes': ['human', 'rgb_array', 'ansi']} def __init__(self): self.cube = None @@ -44,10 +43,10 @@ def generate_scramble(self): layer_move_types = ['', "'"] while scramble_len < 11: - move = random.choice(layer_moves) + move = self.np_random.choice(layer_moves) while move == prev_move: - move = random.choice(layer_moves) - scramble += move + random.choice(layer_move_types) + " " + move = self.np_random.choice(layer_moves) + scramble += move + self.np_random.choice(layer_move_types) + " " prev_move = move scramble_len += 1 @@ -102,7 +101,7 @@ def step(self, action): observation = self.cube_state info = {"cube": self.cube, "cube_reduced": self.cube_reduced} - return observation, reward, done, info + return observation, reward, done, False, info def reward(self): if self.cube_reduced == "WWWWWOOOOOGGGGGRRRRRBBBBBYYYYY": @@ -110,9 +109,12 @@ def reward(self): else: return -1, False - def reset(self, scramble=None): + def reset(self, *, seed=None, options=None): + super().reset(seed=seed) + self.cube = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], dtype=np.uint8) + scramble = None if options is None else options.get("scramble") if scramble: self.algorithm(scramble) elif scramble == False: @@ -123,7 +125,9 @@ def reset(self, scramble=None): self.update_cube_reduced() self.update_cube_state() - return self.cube_state + info = {"cube": self.cube, "cube_reduced": self.cube_reduced} + + return self.cube_state, info def render(self, mode='human', render_time=100): if mode == 'ansi': diff --git a/rubiks_cube_gym/envs/skewb_sarah.py b/rubiks_cube_gym/envs/skewb_sarah.py index 3eecf3f..6bbeac2 100644 --- a/rubiks_cube_gym/envs/skewb_sarah.py +++ b/rubiks_cube_gym/envs/skewb_sarah.py @@ -37,11 +37,11 @@ def reward(self): return reward, done - def reset(self, scramble=None): - super(SkewbEnvSarah, self).reset(scramble=scramble) + def reset(self, *, seed=None, options=None): + obs, info = super().reset(seed=seed, options=options) self.FL = self.check_FL() - return self.cube_state + return obs, info FL_POS = [[0, 1, 2, 3, 4, 5, 6, 10, 11, 15, 16, 20, 21], [5, 6, 7, 8, 9, 0, 3, 10, 13, 25, 28, 21, 24], diff --git a/setup.py b/setup.py index ebcf7f5..234fc18 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ long_description = f.read() setup(name='rubiks_cube_gym', - version='0.4.0', + version='0.5.0', url="https://github.com/DoubleGremlin181/RubiksCubeGym/", description="OpenAI Gym environments for various twisty puzzles", long_description=long_description, @@ -13,6 +13,6 @@ author="Kavish Hukmani", author_email="khukmani@gmail.com", license="MIT", - install_requires=['gym', 'numpy', 'opencv-python', 'wget'], + install_requires=['gymnasium==0.29.1', 'numpy', 'opencv-python', 'wget'], packages=find_packages() )