diff --git a/airplane/__init__.py b/airplane/__init__.py new file mode 100644 index 0000000..7438356 --- /dev/null +++ b/airplane/__init__.py @@ -0,0 +1,8 @@ +from gymnasium.envs.registration import register + +# Register the first custom environment +register( + id='ReducedSymmetricGliderPullout-v0', + entry_point='airplane.reduced_symmetric_glider_pullout:ReducedSymmetricGliderPullout', + max_episode_steps=100, +) \ No newline at end of file diff --git a/airplane/airplane_env.py b/airplane/airplane_env.py new file mode 100644 index 0000000..3781343 --- /dev/null +++ b/airplane/airplane_env.py @@ -0,0 +1,47 @@ +import numpy as np +from gymnasium import Env + +class AirplaneEnv(Env): + metadata = {"render_modes": ["human", "ascii", "ansi"], "render_fps": 60} + # TODO(gtorre): use render fps + + def __init__(self, airplane, render_mode=None): + self.airplane = airplane + + self.visualiser = None + self.render_mode = render_mode + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.window = None + self.clock = None + + def seed(self, seed=None): + np.random.seed(seed) + + def render(self, mode: str | None = "ascii"): + """Renders the environment. + :param mode: str, the mode to render with: + - human: render to the current display or terminal and + return nothing. Usually for human consumption. + - ansi: Return a string (str) or StringIO.StringIO containing a + terminal-style text representation. The text can include newlines + and ANSI escape sequences (e.g. for colors). + """ + if mode == "human": + pass + #if not self.visualiser: + #self.visualiser = Visualizer(self.airplane) + #self.visualiser.plot() + + else: # ANSI or ASCII + # TODO: Add additional observations + print( + f"\u001b[34m Flight Path Angle (deg): {np.rad2deg(self.airplane.flight_path_angle):.2f}\u001b[37m" + ) + # TODO: Proper stall prediction + if self.airplane.flight_path_angle > 0.7: + print("\u001b[35m -- STALL --\u001b[37m") + + def close(self): + if self.window is not None: + # close + raise NotImplementedError \ No newline at end of file diff --git a/airplane/grumman.py b/airplane/grumman.py new file mode 100644 index 0000000..e451825 --- /dev/null +++ b/airplane/grumman.py @@ -0,0 +1,131 @@ +import numpy as np + + +class Grumman: + #################################### + ### Grumman American AA-1 Yankee ### + #################################### + """Base class for airplane parameters""" + + def __init__(self): + ###################### + ### Sim parameters ### + ###################### + self.TIME_STEP = 0.01 + self.GRAVITY = 9.81 + self.AIR_DENSITY = 1.225 # Density (ρ) [kg/m3] + + ########################### + ### Airplane parameters ### + ########################### + # Aerodynamic model: CL coefficients + self.CL_0 = 0.41 + self.CL_ALPHA = 4.6983 + self.CL_ELEVATOR = 0.361 + self.CL_QHAT = 2.42 + # Aerodynamic model: CD coefficients + self.CD_0 = 0.0525 + self.CD_ALPHA = 0.2068 + self.CD_ALPHA2 = 1.8712 + # Aerodynamic model: Cm coefficients + self.CM_0 = 0.076 + self.CM_ALPHA = -0.8938 + self.CM_ELEVATOR = -1.0313 + self.CM_QHAT = -7.15 + # Aerodynamic model: Cl coefficients + self.Cl_BETA = -0.1089 + self.Cl_PHAT = -0.52 + self.Cl_RHAT = 0.19 + self.Cl_AILERON = -0.1031 + self.Cl_RUDDER = 0.0143 + # Physical model + self.MASS = 697.18 # Mass (m) [kg] + self.WING_SURFACE_AREA = 9.1147 # Wing surface area (S) [m2] + self.CHORD = 1.22 # Chord (c) [m] + self.WING_SPAN = 7.46 # Wing Span (b) [m] + self.I_XX = 808.06 # Inertia [Kg.m^2] + self.I_YY = 1011.43 # Inertia [Kg.m^2] + self.ALPHA_STALL = np.deg2rad(15) # Stall angle of attack (αs) [rad] + self.ALPHA_NEGATIVE_STALL = np.deg2rad(-7) # Negative stall angle of attack (αs) [rad] + self.CL_STALL = self.CL_0 + self.CL_ALPHA * self.ALPHA_STALL + self.CL_REF = self.CL_STALL + # self.STALL_AIRSPEED = 32.19 # Stall air speed (Vs) [m/s] + self.STALL_AIRSPEED = np.sqrt(self.MASS * self.GRAVITY / (0.5 * self.AIR_DENSITY * \ + self.WING_SURFACE_AREA * self.CL_REF)) # Stall air speed (Vs) [m/s] + self.MAX_CRUISE_AIRSPEED = 2 * self.STALL_AIRSPEED # Maximum air speed (Vs) [m/s] + + # Throttle model + self.THROTTLE_LINEAR_MAPPING = None + self._initialize_throttle_model() + + def _update_state_from_derivative(self, value_to_update, value_derivative): + value_to_update += self.TIME_STEP * value_derivative + return value_to_update + + def _alpha_from_cl(self, c_lift): + alpha = (c_lift - self.CL_0) / self.CL_ALPHA + return alpha + + def _cl_from_lift_force_and_speed(self, lift_force, airspeed): + cl = 2 * lift_force / (self.AIR_DENSITY * self.WING_SURFACE_AREA * airspeed ** 2) + return cl + + def _cl_from_alpha(self, alpha, elevator, q_hat): + # TODO: review model + if alpha <= self.ALPHA_NEGATIVE_STALL: + c_lift = self.CL_0 + self.CL_ALPHA * self.ALPHA_NEGATIVE_STALL + elif alpha >= self.ALPHA_STALL: + # Stall model: Lift saturation + c_lift = self.CL_0 + self.CL_ALPHA * self.ALPHA_STALL + # Stall model: Lift reduction with opposite slope + # c_lift = - self.CL_ALPHA * alpha + self.CL_0 + 2 * self.CL_ALPHA * self.ALPHA_STALL + else: + c_lift = self.CL_0 + self.CL_ALPHA * alpha + self.CL_ELEVATOR * elevator + self.CL_QHAT * q_hat + return c_lift + + def _lift_force_at_speed_and_cl(self, airspeed, lift_coefficient): + return 0.5 * self.AIR_DENSITY * self.WING_SURFACE_AREA * airspeed ** 2 * lift_coefficient + + def _cd_from_alpha(self, alpha): + c_drag = self.CD_0 + self.CD_ALPHA * alpha + self.CD_ALPHA2 * (alpha ** 2) + return c_drag + + def _cd_from_cl(self, c_lift): + c_drag = self._cd_from_alpha(self._alpha_from_cl(c_lift)) + return c_drag + + def _drag_force_at_speed_and_cd(self, airspeed, drag_coefficient): + return 0.5 * self.AIR_DENSITY * self.WING_SURFACE_AREA * airspeed ** 2 * drag_coefficient + + def _drag_force_at_cruise_speed(self, airspeed): + cruise_lift_force = self.MASS * self.GRAVITY + cruise_cl = self._cl_from_lift_force_and_speed(cruise_lift_force, airspeed) + alpha = self._alpha_from_cl(cruise_cl) + cruise_cd = self._cd_from_alpha(alpha) + drag_force = self._drag_force_at_speed_and_cd(airspeed, cruise_cd) + return drag_force + + def _rolling_moment_coefficient(self, beta, p_hat, r_hat, aileron, rudder): + c_rolling_moment = self.Cl_BETA * beta + self.Cl_PHAT * p_hat + self.Cl_RHAT * r_hat + \ + self.Cl_AILERON * aileron + self.Cl_RUDDER * rudder + return c_rolling_moment + + def _rolling_moment_at_speed_and_cl(self, airspeed, rolling_moment_coefficient): + return 0.5 * self.AIR_DENSITY * self.WING_SURFACE_AREA * self.WING_SPAN * airspeed ** 2 * rolling_moment_coefficient + + def _pitching_moment_coefficient(self, alpha, elevator, q_hat): + c_pitch_moment = self.CM_0 + self.CM_ALPHA * alpha + self.CM_ELEVATOR * elevator + self.CM_QHAT * q_hat + return c_pitch_moment + + def _pitching_moment_at_speed_and_cm(self, airspeed, pitching_moment_coefficient): + return 0.5 * self.AIR_DENSITY * self.WING_SURFACE_AREA * self.CHORD * airspeed ** 2 * pitching_moment_coefficient + + def _initialize_throttle_model(self, ): + # Throttle model: Thrust force = Kt * δ_throttle + # Max Thrust -> Kt * 1 = Drag(V=Vmax) -> Kt = 0.5 ρ S (Vmax)^2 CD + # δ_throttle = 1.0 -> Max Cruise speed: V' = Vmax -> V_dot = 0 = Thrust Force - Drag Force + self.THROTTLE_LINEAR_MAPPING = self._drag_force_at_cruise_speed(self.MAX_CRUISE_AIRSPEED) + + def _thrust_force_at_throttle(self, throttle): + thrust_force = self.THROTTLE_LINEAR_MAPPING * throttle + return thrust_force \ No newline at end of file diff --git a/airplane/reduced_banked_glider_pullout.py b/airplane/reduced_banked_glider_pullout.py new file mode 100644 index 0000000..42e7718 --- /dev/null +++ b/airplane/reduced_banked_glider_pullout.py @@ -0,0 +1,57 @@ +import numpy as np +import gymnasium +from gymnasium import spaces +from matplotlib import pyplot as plt +from airplane.reduced_grumman import ReducedGrumman +from airplane.airplane_env import AirplaneEnv + + +class ReducedBankedGliderPullout(AirplaneEnv): + + def __init__(self, render_mode=None): + + self.airplane = ReducedGrumman() + super().__init__(self.airplane) + + # Observation space: Flight Path Angle (γ), Air Speed (V), Bank Angle (μ) + self.observation_space = spaces.Box(np.array([-np.pi, 0.9, np.deg2rad(-20)], np.float32), np.array([0, 4.0, np.deg2rad(200)], np.float32), shape=(3,), dtype=np.float32) + # Action space: Lift Coefficient (CL), Bank Rate (μ') + self.action_space = spaces.Box(np.array([-0.5, np.deg2rad(-30)], np.float32), np.array([1.0, np.deg2rad(30)], np.float32), shape=(2,), dtype=np.float32) + + def _get_obs(self): + return np.vstack([self.airplane.flight_path_angle, self.airplane.airspeed_norm, self.airplane.bank_angle], dtype=np.float32).T + + def _get_info(self): + return {} + + def reset(self, seed=None, options=None): + + # Choose the initial agent's state uniformly + [flight_path_angle, airspeed_norm, bank_angle] = np.random.uniform(self.observation_space.low, self.observation_space.high) + self.airplane.reset(flight_path_angle, airspeed_norm, bank_angle) + + observation = self._get_obs(), {} + + return observation + + def step(self, action: list): + # Update state + action = np.clip(action, self.action_space.low, self.action_space.high) + c_lift = action[0] + bank_rate = action[1] + + self.airplane.command_airplane(c_lift, bank_rate, 0) + + # Calculate step reward: Height Loss + # TODO: Analyze policy performance based on reward implementation. + reward = self.airplane.TIME_STEP * self.airplane.airspeed_norm * np.sin(self.airplane.flight_path_angle) + # reward = self.TIME_STEP * (self.airspeed_norm * self.STALL_AIRSPEED) * np.sin(self.Flight Path) - 0.01 * bank_rate ** 2 + terminated = self.termination() + observation = self._get_obs() + info = self._get_info() + return observation, reward, terminated, False, info + + + def termination(self,): + terminate = np.where((self.airplane.flight_path_angle >= 0.0) & (self.airplane.airspeed_norm >= 1) , True, False) + return terminate \ No newline at end of file diff --git a/airplane/reduced_grumman.py b/airplane/reduced_grumman.py new file mode 100644 index 0000000..365efdc --- /dev/null +++ b/airplane/reduced_grumman.py @@ -0,0 +1,56 @@ +import numpy as np +from airplane.grumman import Grumman + +class ReducedGrumman(Grumman): + #################################### + ### Grumman American AA-1 Yankee ### + #################################### + """Class for simplified airplane state and dynamics""" + + # NOTE: Commands as seperate objects? e.g. bank.rotate(airplane), + # throttle.accelerate(airplane), etc. + # NOTE: Use of α instead of Cl? + + def __init__(self): + super().__init__() + ########################## + ### Airplane variables ### + ########################## + self.flight_path_angle = np.zeros(10000, dtype=np.float32) # Flight Path Angle (γ) [rad] + self.airspeed_norm = np.ones_like(self.flight_path_angle, dtype=np.float32) # Air Speed (V/Vs) [1] + self.bank_angle = 0.0 # Bank Angle (μ) [rad] + # previous commands + self.last_c_lift = 0.0 + self.last_bank_rate = 0.0 + self.last_throttle = 0.0 + + def command_airplane(self, c_lift, bank_rate, delta_throttle): + self.last_c_lift = c_lift + self.last_bank_rate = bank_rate + self.last_throttle = delta_throttle + + c_drag = self._cd_from_cl(c_lift) + + # V_dot = - g sin γ - 0.5 * (ρ S V^2 CD / m) + (thrust force / m) + airspeed_dot = - self.GRAVITY * np.sin(self.flight_path_angle) - 0.5 * self.AIR_DENSITY * ( + self.WING_SURFACE_AREA / self.MASS) * (self.airspeed_norm * self.STALL_AIRSPEED) ** 2 * c_drag \ + + (self.THROTTLE_LINEAR_MAPPING * delta_throttle / self.MASS) + + # γ_dot = 0.5 * (ρ S V CL cos µ / m) - g cos γ / V + flight_path_angle_dot = 0.5 * self.AIR_DENSITY * (self.WING_SURFACE_AREA / self.MASS) * ( + self.airspeed_norm * self.STALL_AIRSPEED) * c_lift * np.cos(self.bank_angle) \ + - (self.GRAVITY / (self.airspeed_norm * self.STALL_AIRSPEED)) * np.cos( + self.flight_path_angle) + + # μ_dot = μ_dot_commanded + bank_angle_dot = bank_rate + + + self.airspeed_norm = self._update_state_from_derivative(self.airspeed_norm, airspeed_dot / self.STALL_AIRSPEED) + self.flight_path_angle = self._update_state_from_derivative(self.flight_path_angle, flight_path_angle_dot) + self.bank_angle = self._update_state_from_derivative(self.bank_angle, bank_angle_dot) + + def reset(self, flight_path_angle, airspeed_norm, bank_angle): + self.flight_path_angle = flight_path_angle + self.airspeed_norm = airspeed_norm + self.bank_angle = bank_angle \ No newline at end of file diff --git a/airplane/reduced_symmetric_glider_pullout.py b/airplane/reduced_symmetric_glider_pullout.py new file mode 100644 index 0000000..b189229 --- /dev/null +++ b/airplane/reduced_symmetric_glider_pullout.py @@ -0,0 +1,92 @@ +import numpy as np +from gymnasium import spaces + +from airplane.reduced_grumman import ReducedGrumman +from airplane.airplane_env import AirplaneEnv + +try: + import cupy as xp + if not xp.cuda.is_available(): + raise ImportError("CUDA is not available. Falling back to NumPy.") +except (ImportError, AttributeError): + xp = np + +class ReducedSymmetricGliderPullout(AirplaneEnv): + + def __init__(self, render_mode=None): + self.airplane = ReducedGrumman() + super().__init__(self.airplane) + + # Observation space: Flight Path Angle (γ), Air Speed (V) + self.observation_space = spaces.Box(np.array([-np.pi, 0.6], np.float32), + np.array([0, 4.0], np.float32), shape=(2,), dtype=np.float32) + # Action space: Lift Coefficient + self.action_space = spaces.Box(-0.5, 1.0, shape=(1,), dtype=np.float32) + + def _get_obs(self): + return np.vstack([self.airplane.flight_path_angle, self.airplane.airspeed_norm], dtype=np.float32).T + + def _get_info(self): + return {} + + def reset(self, seed=None, options=None): + + # Choose the initial agent's state uniformly + [flight_path_angle, airspeed_norm] = np.random.uniform(self.observation_space.low, self.observation_space.high) + self.airplane.reset(flight_path_angle, airspeed_norm, 0) + + observation = self._get_obs() + # clip the observation to the observation space + observation = np.clip(observation, self.observation_space.low, self.observation_space.high).flatten() + assert self.observation_space.contains(observation), "Observation is not within the observation space!" + return observation, {} + + def step(self, action: list): + # Update state + c_lift = action #action[0] + #self.airplane.command_airplane(c_lift, 0, 0) + + delta_throttle = 0 + bank_rate = 0 + + init_terminal = self.termination() + + self.airplane.last_c_lift = c_lift + self.airplane.last_bank_rate = bank_rate + self.airplane.last_throttle = delta_throttle + + c_drag = self.airplane._cd_from_cl(c_lift) + + # V_dot = - g sin γ - 0.5 * (ρ S V^2 CD / m) + (thrust force / m) + airspeed_dot = - self.airplane.GRAVITY * np.sin(self.airplane.flight_path_angle) - 0.5 * self.airplane.AIR_DENSITY * ( + self.airplane.WING_SURFACE_AREA / self.airplane.MASS) * (self.airplane.airspeed_norm * self.airplane.STALL_AIRSPEED) ** 2 * c_drag \ + + (self.airplane.THROTTLE_LINEAR_MAPPING * delta_throttle / self.airplane.MASS) + + # γ_dot = 0.5 * (ρ S V CL cos µ / m) - g cos γ / V + flight_path_angle_dot = 0.5 * self.airplane.AIR_DENSITY * (self.airplane.WING_SURFACE_AREA / self.airplane.MASS) * ( + self.airplane.airspeed_norm * self.airplane.STALL_AIRSPEED) * c_lift * np.cos(self.airplane.bank_angle) \ + - (self.airplane.GRAVITY / (self.airplane.airspeed_norm * self.airplane.STALL_AIRSPEED)) * np.cos( + self.airplane.flight_path_angle) + + # μ_dot = μ_dot_commanded + bank_angle_dot = bank_rate + + self.airplane.airspeed_norm += self.airplane.TIME_STEP * (airspeed_dot / self.airplane.STALL_AIRSPEED) + self.airplane.flight_path_angle += self.airplane.TIME_STEP * flight_path_angle_dot + #clip the state to the observation space + self.airplane.airspeed_norm = np.clip(self.airplane.airspeed_norm, self.observation_space.low[1], self.observation_space.high[1]) + self.airplane.flight_path_angle = np.clip(self.airplane.flight_path_angle, self.observation_space.low[0], self.observation_space.high[0]) + # Calculate step reward: Height Loss + reward = self.airplane.TIME_STEP * self.airplane.airspeed_norm * np.sin(self.airplane.flight_path_angle)*27.331231856346 + + # Get the next state + info = self._get_info() + terminated = self.termination() | init_terminal + reward = np.where(init_terminal, 0, reward) + + return np.vstack([self.airplane.flight_path_angle, self.airplane.airspeed_norm], dtype=np.float32).T, reward, terminated, False, info + + + def termination(self,): + terminate = np.where((self.airplane.flight_path_angle >= 0.0) & (self.airplane.airspeed_norm >= 1) , True, False) + return terminate diff --git a/src/PolicyIteration.py b/src/PolicyIteration.py index a9dc9f1..86bd127 100644 --- a/src/PolicyIteration.py +++ b/src/PolicyIteration.py @@ -1,12 +1,11 @@ import os import pickle +import numpy as np import gymnasium as gym from loguru import logger from scipy.spatial import Delaunay from utils.utils import plot_3D_value_function - -import numpy as np try: import cupy as cp @@ -15,8 +14,7 @@ logger.info("CUDA driver is available.") except (ImportError, AttributeError): - - import numpy as cp + cp = np logger.warning("CUDA is not available. Falling back to NumPy.") def asarray(arr, *args, **kwargs): """In NumPy, this just ensures the object is a NumPy array, with support for additional arguments.""" @@ -28,7 +26,7 @@ def asnumpy(arr, *args, **kwargs): np.asarray = asarray np.asnumpy = asnumpy - cp = np + class PolicyIteration(object): @@ -124,6 +122,12 @@ def __init__(self, env: gym.Env, self.grid = np.meshgrid(*self.bins_space.values(), indexing='ij') # Flatten and stack to create a list of points in the space self.states_space = np.vstack([g.ravel() for g in self.grid], dtype=np.float32).T + + # get x and y coordinates + x = self.states_space[:,0] + y = self.states_space[:,1] + self.terminal_states = np.where((x >= 0.0) & (y >= 1) , True, False) + self.terminal_reward = 0 # self.num_simplex_points:int = int(self.states_space[0].shape[0] + 1) # number of points in a simplex one more than the dimension self.space_dim:int = int(self.states_space[0].shape[0]) @@ -160,6 +164,7 @@ def __in_cell__(self, obs: cp.ndarray) -> cp.ndarray: Returns: np.ndarray: A boolean array indicating whether each observation is within the valid state bounds. """ + #return cp.all((obs >= self.cell_lower_bounds[:, None]) & (obs <= self.cell_upper_bounds[:, None]), axis=1) return cp.all((obs >= self.cell_lower_bounds) & (obs <= self.cell_upper_bounds), axis=1) def barycentric_coordinates(self, points:np.ndarray)->tuple: @@ -224,14 +229,22 @@ def calculate_transition_reward_table(self): "points_indexes": The indexes of the points in the simplex. """ for j, action in enumerate(self.action_space): - self.env.state = cp.asarray(self.states_space, dtype=cp.float32) - obs_gpu, reward_gpu, _, _, _ = self.env.step(action) + self.env.reset() + #self.env.state = cp.asarray(self.states_space, dtype=cp.float32) + state = np.array(self.states_space, dtype=np.float32) + self.env.airplane.flight_path_angle = state[:,0].copy() + self.env.airplane.airspeed_norm = state[:,1].copy() +# self.env.airplane.bank_angle = state[:,2].copy() + + obs_gpu, reward_gpu, terminated, _, _ = self.env.step(action) + # log if any state is outside the bounds of the environment states_outside_gpu = self.__in_cell__(obs_gpu) if bool(cp.any(~states_outside_gpu)): # get the indexes of the states outside the bounds - reward_gpu = cp.where(states_outside_gpu, reward_gpu, -100) logger.warning(f"Some states are outside the bounds of the environment.") + # if the state is terminal, set the reward to zero + reward_gpu = cp.where(terminated, 0, reward_gpu) # if any state is outside the bounds of the environment clip it to the bounds obs_gpu = cp.clip(obs_gpu, self.cell_lower_bounds, self.cell_upper_bounds) # get the barycentric coordinates of the resulting state in CPU for now. @@ -244,6 +257,7 @@ def calculate_transition_reward_table(self): self.lambdas[:,j] = cp.asarray(lambdas, dtype=cp.float32) self.simplexes[:,j] = cp.asarray(simplexes, dtype=cp.float32) self.points_indexes[:,j] = cp.asarray(points_indexes, dtype=cp.int32) + def get_value(self, lambdas:cp.ndarray, point_indexes:cp.ndarray, value_function:cp.ndarray)->cp.ndarray: @@ -280,12 +294,17 @@ def policy_evaluation(self): logger.info("Starting policy evaluation") while cp.abs(float(max_error)) > self.theta: # initialize the new value function to zeros - new_value_function = cp.zeros_like(self.value_function, dtype=cp.float32) + new_value_function = cp.zeros_like(self.value_function, dtype=cp.float32) + vf_next_state = cp.zeros_like(self.value_function, dtype=cp.float32) new_val = cp.zeros_like(self.value_function, dtype=cp.float32) + new_value_function[self.terminal_states] = self.terminal_reward + new_val[self.terminal_states] = self.terminal_reward + for j, _ in enumerate(self.action_space): # Checkout 'Variable Resolution Discretization in Optimal Control, eq 5' - next_state_value = self.get_value(self.lambdas[:, j], self.points_indexes[:, j], self.value_function) - new_val += self.policy[:,j] * (self.reward[:,j] + self.gamma * next_state_value) + vf_next_state[~self.terminal_states] = self.get_value(self.lambdas[:, j], self.points_indexes[:, j], self.value_function)[~self.terminal_states] + new_val[~self.terminal_states] += self.policy[~self.terminal_states,j] * (self.reward[~self.terminal_states,j] + self.gamma * vf_next_state[~self.terminal_states]) + new_value_function = new_val # update the error: the maximum difference between the new and old value functions errors = cp.fabs(new_value_function[:] - self.value_function[:]) diff --git a/src/utils/utils.py b/src/utils/utils.py index a378ed9..357383d 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -11,18 +11,24 @@ def plot_3D_value_function(vf: np.array, """ Plots a 3D value function in some color scale.""" # Assuming points is a 2D array where each row is a point [position, velocity] - X = points[:, 0] # x-axis (position) - Y = points[:, 1] # y-axis (velocity) + X = points[:, 0] # x-axis + # transformb X from rad to deg + X = np.rad2deg(X) + Y = points[:, 1] # y-axis vf = vf # z-axis (value function) vf_to_plot = (vf - vf.min()) / (vf.max() - vf.min()) if normalize else vf fig = plt.figure() ax = fig.add_subplot(111, projection='3d') # Use plot_trisurf for unstructured triangular surface plot surf = ax.plot_trisurf(X, Y, vf_to_plot, cmap=cmap, edgecolor='white', linewidth=0.2) + # Add title + ax.set_title('Reduced Symmetric Glider Value Function ', pad=20) # Add labels - ax.set_xlabel('position') - ax.set_ylabel('velocity') - ax.set_zlabel('Normalized Value Function') + ax.set_xlabel('Flight Path Angle (γ) [deg]', labelpad=10) + ax.set_ylabel('V/Vs', labelpad=10) + ax.set_zlabel('Normalized Value Function', labelpad=10) + ax.set_xticks(np.round(np.linspace(min(X), max(X), 5))) # 5 ticks on the x-axis + ax.set_yticks(np.round(np.linspace(min(Y), max(Y), 5))) # 5 ticks on the y-axis # Add color bar to represent the value range if path is not None: plt.savefig(path) # Show the plot @@ -70,7 +76,7 @@ def get_optimal_action(state:np.array, optimal_policy:np.array): Returns: action: The optimal action for the given state. """ - lambdas, simmplex_info = get_barycentric_coordinates(optimal_policy, state) + lambdas, simmplex_info = get_barycentric_coordinates(optimal_policy,state) simplex, points_indexes = simmplex_info actions = optimal_policy.action_space probabilities = np.zeros(len(actions), dtype=np.float32) diff --git a/test.py b/test.py index 8a5dd1a..8aa5879 100644 --- a/test.py +++ b/test.py @@ -1,30 +1,33 @@ import pickle +import airplane import numpy as np +import gymnasium as gym from utils.utils import plot_3D_value_function from PolicyIteration import PolicyIteration -from classic_control.continuous_mountain_car import Continuous_MountainCarEnv -env=Continuous_MountainCarEnv() +glider = gym.make('ReducedSymmetricGliderPullout-v0') bins_space = { - "x_space": np.linspace(env.min_position, env.max_position, 100, dtype=np.float32), # position space (0) - "x_dot_space": np.linspace(-abs(env.max_speed), abs(env.max_speed), 100, dtype=np.float32), # velocity space (1) + "flight_path_angle": np.linspace(-np.pi-0.01, 0.10, 100, dtype=np.float32), # Flight Path Angle (γ) (0) + "airspeed_norm": np.linspace(0.7, 4, 100, dtype=np.float32), # Air Speed (V) (1) } pi = PolicyIteration( - env=env, + env=glider, bins_space=bins_space, - action_space=np.linspace(-1.0, +1.0,9, dtype=np.float32), + action_space=np.linspace(-0.4, 1.0, 15, dtype=np.float32), gamma=0.99, theta=1e-3, ) -pi.run() -with open(env.__class__.__name__ + ".pkl", "rb") as f: + +#pi.run() + +with open(glider.__class__.__name__ + ".pkl", "rb") as f: pi: PolicyIteration = pickle.load(f) plot_3D_value_function(vf = pi.value_function, points = pi.states_space, - normalize=True, + normalize=False, show=True, path="./test_vf.png") \ No newline at end of file diff --git a/testing.ipynb b/testing.ipynb index b5a7f7d..35accb4 100644 --- a/testing.ipynb +++ b/testing.ipynb @@ -13,17 +13,398 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "002626e9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-10-18 16:15:26.697\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[33m\u001b[1mCUDA is not available. Falling back to NumPy.\u001b[0m\n" + ] + } + ], "source": [ "import pickle\n", "import numpy as np\n", + "import gymnasium as gym\n", "from utils.utils import test_enviroment\n", "from PolicyIteration import PolicyIteration" ] }, + { + "cell_type": "code", + "execution_count": 2, + "id": "95b5a85a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-10-18 16:15:28.739\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m119\u001b[0m - \u001b[1mLower bounds: [-3.1515927 0.7 ]\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m120\u001b[0m - \u001b[1mUpper bounds: [0.5 4. ]\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m138\u001b[0m - \u001b[1mThe action space is: [-4.000000e-01 -3.000000e-01 -2.000000e-01 -1.000000e-01 -5.551115e-17\n", + " 1.000000e-01 2.000000e-01 3.000000e-01 4.000000e-01 5.000000e-01\n", + " 6.000000e-01 7.000000e-01 8.000000e-01 9.000000e-01 1.000000e+00]\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m139\u001b[0m - \u001b[1mNumber of states: 10000\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mTotal states:150000\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mPolicy Iteration was correctly initialized.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m154\u001b[0m - \u001b[1mThe enviroment name is: TimeLimit\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m366\u001b[0m - \u001b[1mCreating Delaunay triangulation over the state space...\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m368\u001b[0m - \u001b[1mDelaunay triangulation created.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m378\u001b[0m - \u001b[1mGenerating transition and reward function table...\u001b[0m\n", + "/home/nromero/anaconda3/envs/DynamicProgramming/lib/python3.11/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.airplane to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.airplane` for environment variables or `env.get_wrapper_attr('airplane')` that will search the reminding wrappers.\u001b[0m\n", + " logger.warn(\n", + "/home/nromero/anaconda3/envs/DynamicProgramming/lib/python3.11/site-packages/gymnasium/utils/passive_env_checker.py:228: UserWarning: \u001b[33mWARN: Expects `terminated` signal to be a boolean, actual type: \u001b[0m\n", + " logger.warn(\n", + "/home/nromero/anaconda3/envs/DynamicProgramming/lib/python3.11/site-packages/gymnasium/utils/passive_env_checker.py:159: UserWarning: \u001b[33mWARN: The obs returned by the `step()` method is not within the observation space.\u001b[0m\n", + " logger.warn(f\"{pre} is not within the observation space.\")\n", + "/home/nromero/anaconda3/envs/DynamicProgramming/lib/python3.11/site-packages/gymnasium/utils/passive_env_checker.py:246: UserWarning: \u001b[33mWARN: The reward returned by `step()` must be a float, int, np.integer or np.floating, actual type: \u001b[0m\n", + " logger.warn(\n", + "\u001b[32m2024-10-18 16:15:28.849\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.920\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.929\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.938\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.948\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.959\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.969\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.981\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:28.993\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:29.005\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:29.014\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:29.026\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:29.039\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:29.049\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:29.060\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:29.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m380\u001b[0m - \u001b[1mTransition and reward function table generated.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:29.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m382\u001b[0m - \u001b[1msolving step 0\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:29.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:29.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 1.093000054359436 | Avg Error: 0.3490000069141388 | 1500<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:30.550\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.23999999463558197 | Avg Error: 0.09700000286102295 | 1311<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:31.787\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.05299999937415123 | Avg Error: 0.024000000208616257 | 1502<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:33.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.012000000104308128 | Avg Error: 0.004999999888241291 | 1973<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:34.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0020000000949949026 | Avg Error: 0.0010000000474974513 | 4176<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:35.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:35.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m332\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:35.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m341\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:35.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m354\u001b[0m - \u001b[1mThe number of updated different actions: 150000\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:35.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m357\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:35.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m382\u001b[0m - \u001b[1msolving step 1\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:35.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:35.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.5809999704360962 | Avg Error: 0.1340000033378601 | 1434<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:36.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.11299999803304672 | Avg Error: 0.028999999165534973 | 2117<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:37.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.023000000044703484 | Avg Error: 0.004999999888241291 | 3766<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:39.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.004999999888241291 | Avg Error: 0.0010000000474974513 | 5870<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:40.327\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0010000000474974513 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:41.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:41.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m332\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:41.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m341\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:41.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m354\u001b[0m - \u001b[1mThe number of updated different actions: 346\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:41.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m357\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:41.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m382\u001b[0m - \u001b[1msolving step 2\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:41.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:41.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 2.115000009536743 | Avg Error: 0.007000000216066837 | 9861<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:42.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0020000000949949026 | Avg Error: 0.0 | 9992<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:43.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:43.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m332\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:43.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m341\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:43.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m354\u001b[0m - \u001b[1mThe number of updated different actions: 246\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:43.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m357\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:43.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m382\u001b[0m - \u001b[1msolving step 3\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:43.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:43.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 1.4259999990463257 | Avg Error: 0.004999999888241291 | 9885<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:44.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:44.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m332\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:44.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m341\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:44.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m354\u001b[0m - \u001b[1mThe number of updated different actions: 212\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:44.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m357\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:44.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m382\u001b[0m - \u001b[1msolving step 4\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:44.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:44.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.7310000061988831 | Avg Error: 0.0020000000949949026 | 9898<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:45.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:45.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m332\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:45.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m341\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:45.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m354\u001b[0m - \u001b[1mThe number of updated different actions: 116\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:45.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m357\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:45.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m382\u001b[0m - \u001b[1msolving step 5\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:45.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:45.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.11999999731779099 | Avg Error: 0.0 | 9946<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:46.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:46.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m332\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:46.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m341\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:46.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m354\u001b[0m - \u001b[1mThe number of updated different actions: 18\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:46.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m357\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:46.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m382\u001b[0m - \u001b[1msolving step 6\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:46.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:46.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0010000000474974513 | Avg Error: 0.0 | 9996<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:47.255\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:47.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m332\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:47.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m341\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:47.262\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m357\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n", + "\u001b[32m2024-10-18 16:15:47.272\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36msave\u001b[0m:\u001b[36m401\u001b[0m - \u001b[1mPolicy and value function saved.\u001b[0m\n" + ] + } + ], + "source": [ + "import airplane\n", + "\n", + "glider = gym.make('ReducedSymmetricGliderPullout-v0')\n", + "\n", + "bins_space = {\n", + " \"flight_path_angle\": np.linspace(-np.pi-0.01, 0.5, 100, dtype=np.float32), # Flight Path Angle (γ) (0)\n", + " \"airspeed_norm\": np.linspace(0.7, 4.0, 100, dtype=np.float32), # Air Speed (V) (1)\n", + "}\n", + "\n", + "pi = PolicyIteration(\n", + " env=glider, \n", + " bins_space=bins_space,\n", + " action_space=np.linspace(-0.4, 1.0, 15, dtype=np.float32),\n", + " gamma=0.99,\n", + " theta=1e-3,\n", + ")\n", + "\n", + "pi.run()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5a2f30c2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Action: -0.4000000059604645 Reward: -174.1369079001539 Episode Length: 100\n", + "Action: -0.30000001192092896 Reward: -164.41724866465609 Episode Length: 100\n", + "Action: -0.20000000298023224 Reward: -154.62006619077087 Episode Length: 100\n", + "Action: -0.10000000149011612 Reward: -144.76020378697655 Episode Length: 100\n", + "Action: -5.551115123125783e-17 Reward: -134.8527057668743 Episode Length: 100\n", + "Action: 0.10000000149011612 Reward: -124.91275034308747 Episode Length: 100\n", + "Action: 0.20000000298023224 Reward: -114.95557843719415 Episode Length: 100\n", + "Action: 0.30000001192092896 Reward: -104.9964219683319 Episode Length: 100\n", + "Action: 0.4000000059604645 Reward: -95.05043663858261 Episode Length: 100\n", + "Action: 0.5 Reward: -85.13262608640899 Episode Length: 100\n", + "Action: 0.6000000238418579 Reward: -75.25777635358612 Episode Length: 100\n", + "Action: 0.699999988079071 Reward: -65.44040389458748 Episode Length: 100\n", + "Action: 0.800000011920929 Reward: -55.694660889340945 Episode Length: 100\n", + "Action: 0.8999999761581421 Reward: -46.034320424799766 Episode Length: 100\n", + "Action: 1.0 Reward: -36.47267481695943 Episode Length: 100\n" + ] + } + ], + "source": [ + "STALL_AIRSPEED = 27.331231856346\n", + "from utils.utils import get_optimal_action\n", + "\n", + "# rad to deg\n", + "rad_to_grad=np.pi /180\n", + "\n", + "for action in np.linspace(-0.4, 1.0, 15, dtype=np.float32):\n", + "\n", + " state = np.array([-0*rad_to_grad, 1.1])\n", + " glider.airplane.flight_path_angle = state[0]\n", + " glider.airplane.airspeed_norm = state[1]\n", + " \n", + " total_reward = 0\n", + " episode_length = 0\n", + "\n", + " while episode_length <100:\n", + " #action = get_optimal_action(state, pi)\n", + " prev_state = state.copy()\n", + " state, reward, _, _, _ = glider.step(action)\n", + " state = state[0]\n", + " total_reward += reward*STALL_AIRSPEED\n", + " episode_length += 1\n", + " \n", + " print(f\"Action: {action} Reward: {total_reward} Episode Length: {episode_length}\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cd13daf", + "metadata": {}, + "outputs": [], + "source": [ + "STALL_AIRSPEED = 27.331231856346\n", + "from utils.utils import get_optimal_action\n", + "from tqdm import tqdm\n", + "with open(glider.__class__.__name__ + \".pkl\", \"rb\") as f:\n", + " pi: PolicyIteration = pickle.load(f)\n", + "\n", + "prom_episode_lenght = 0\n", + "dict_result = {}\n", + "dict_episode_length = {}\n", + "state_spaces = [v for v in pi.states_space if v[0] > -np.pi/2]\n", + "for state in tqdm(state_spaces):\n", + " initial_state = state.copy()\n", + " prev_state = state.copy()\n", + " glider.reset()\n", + " glider.airplane.flight_path_angle = state[0]\n", + " glider.airplane.airspeed_norm = state[1]\n", + " done = False\n", + " episode_length = 0\n", + " total_reward = 0\n", + " while not done:\n", + " action = get_optimal_action(state, pi)\n", + " prev_state = state.copy()\n", + " state, reward, done, _, _ = glider.step(action)\n", + " done = bool(done)\n", + " if done:\n", + " break\n", + " state = state[0]\n", + " # check if our state is in the state space\n", + " index = pi.triangulation.find_simplex(state)\n", + " if not done:\n", + " total_reward -= reward#*STALL_AIRSPEED\n", + " if (index ==-1) or episode_length > 70: \n", + " done = True\n", + " episode_length += 1\n", + " \n", + " dict_result[tuple(initial_state)] = total_reward\n", + " dict_episode_length[tuple(initial_state)] = episode_length\n", + " prom_episode_lenght += episode_length / len(pi.states_space)\n", + " #print(f\"Initial state: {initial_state} - Total reward: {total_reward}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab055601", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "epsilon = 1*1e-1\n", + "#dict_result = dict_episode_length\n", + "# Your dictionary with keys as coordinates (x, y) and values as the color intensity (z)\n", + "# Extracting the x, y and z values\n", + "x = [np.degrees(coord[0]) for coord in dict_result.keys() if 0.05 >= coord[0] >= -np.pi/2 - epsilon and coord[1] > 0.7]\n", + "#convert x to grad\n", + "y = [coord[1] for coord in dict_result.keys() if 0.05 >= coord[0] >= -np.pi/2 - epsilon and coord[1] > 0.7]\n", + "#convert y to Vs\n", + "keys_list = list(dict_result.keys())\n", + "values_list = list(dict_result.values())\n", + "z = [v for e,v in zip(keys_list, values_list) if 0.05 >= e[0] >= -np.pi/2 - epsilon and e[1] > 0.7]\n", + "\n", + "# Creating a 2D scatter plot\n", + "cmap = plt.get_cmap('viridis', 2048)\n", + "plt.tricontourf(y, x, z, cmap=cmap, levels=18) # Change 'viridis' to any other colormap you like\n", + "plt.colorbar(label='', shrink=0.8, ) # Add color bar for the z values\n", + "\n", + "# Add labels and a title\n", + "plt.ylabel('Flight Path Angle (γ) [deg]')\n", + "plt.xlabel('V/Vs')\n", + "\n", + "x_min, x_max = min(x), max(x)\n", + "y_min, y_max = min(y), max(y)\n", + "\n", + "# Set the minor ticks for the grid, keeping the dense grid with smaller squares\n", + "ax = plt.gca() # Get current axes\n", + "ax.set_yticks(np.linspace(x_min, x_max, 60), minor=True) # 20 minor ticks for grid\n", + "ax.set_xticks(np.linspace(y_min, y_max, 60), minor=True) # 20 minor ticks for grid\n", + "# put line x = 1\n", + "plt.axvline(x=1, color='r', linestyle='--', linewidth=0.5)\n", + "# put line y = 0\n", + "#plt.axhline(y=0, color='k', linestyle='--')\n", + "\n", + "# Show the plot\n", + "vals = np.round(np.linspace(-90, 0,6))\n", + "plt.yticks(vals) # Only 5 labels on x-axis\n", + "plt.xticks(np.linspace(round(y_min), round(y_max), 6)) # Only 5 labels on y-axis\n", + "# Enable the minor grid lines\n", + "ax.grid(which='minor', color='gray', linestyle='-', linewidth=0.5)\n", + "\n", + "# Habilitar la cuadrícula en las marcas menores\n", + "ax.grid(which='minor', color='gray', linestyle='-', linewidth=0.5)\n", + "\n", + "# Guardar la imagen sin borde blanco\n", + "plt.savefig('output.png', bbox_inches='tight')\n", + "\n", + "# set size of the plot\n", + "plt.gcf().set_size_inches(9, 5)\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8416867", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from scipy.interpolate import griddata\n", + "\n", + "# Your dictionary with keys as coordinates (x, y) and values as the color intensity (z)\n", + "# Extracting the x, y and z values\n", + "x = [np.degrees(coord[0]) for coord in dict_result.keys() if 0.01 >= coord[0] >= -np.pi/2 and coord[1] > 0.7]\n", + "y = [coord[1] for coord in dict_result.keys() if 0.01 >= coord[0] >= -np.pi/2 and coord[1] > 0.7]\n", + "keys_list = list(dict_result.keys())\n", + "values_list = list(dict_result.values())\n", + "z = [v for e,v in zip(keys_list, values_list) if 0.01 >= e[0] >= -np.pi/2 and e[1] > 0.7]\n", + "\n", + "# Create a grid for interpolation\n", + "x_grid = np.linspace(min(x), max(x), 100)\n", + "y_grid = np.linspace(min(y), max(y), 100)\n", + "X, Y = np.meshgrid(x_grid, y_grid)\n", + "\n", + "# Interpolate the scattered data into the grid\n", + "Z = griddata((y, x), z, (Y, X), method='linear')\n", + "\n", + "# Create the scatter plot\n", + "cmap = plt.get_cmap('viridis', 2048)\n", + "plt.scatter(y, x, c=z, cmap=cmap)\n", + "\n", + "# Add color bar\n", + "plt.colorbar(label='', shrink=0.8)\n", + "contour_levels = np.linspace(np.min(z), np.max(z), 10) # 10 levels\n", + "contour_levels = contour_levels[contour_levels != 0] # Remove zero from levels\n", + "\n", + "# Add contour lines on top of the scatter plot\n", + "contour = plt.contour(Y, X, Z, levels=contour_levels, colors='black', linewidths=0.75)\n", + "plt.clabel(contour, inline=True, fontsize=8) # Optional: add labels to the contours\n", + "\n", + "# Add labels and a title\n", + "plt.ylabel('Flight Path Angle (γ) [deg]')\n", + "plt.xlabel('V/Vs')\n", + "\n", + "# Plot vertical line at x=1\n", + "plt.axvline(x=1, color='k', linestyle='--')\n", + "\n", + "# Configure the minor ticks and grid\n", + "x_min, x_max = min(x), max(x)\n", + "y_min, y_max = min(y), max(y)\n", + "ax = plt.gca() # Get current axes\n", + "ax.set_yticks(np.linspace(x_min, x_max, 60), minor=True)\n", + "ax.set_xticks(np.linspace(y_min, y_max, 60), minor=True)\n", + "plt.yticks(np.linspace(x_min, x_max, 6)) # Major ticks on y-axis\n", + "plt.xticks(np.linspace(y_min, y_max, 4)) # Major ticks on x-axis\n", + "\n", + "# Set size of the plot\n", + "plt.gcf().set_size_inches(9, 5)\n", + "\n", + "# Enable the minor grid lines\n", + "ax.grid(which='minor', color='white', linestyle='-', linewidth=0.25)\n", + "\n", + "# Habilitar la cuadrícula en las marcas menores\n", + "ax.grid(which='minor', color='white', linestyle='-', linewidth=0.25)\n", + "\n", + "# Save the image\n", + "plt.savefig('output_with_contours.png', bbox_inches='tight')\n", + "\n", + "# Show the plot\n", + "plt.show()\n" + ] + }, { "cell_type": "markdown", "id": "3c8fe4a4", @@ -164,6 +545,16 @@ "\n", "test_enviroment(Continuous_MountainCarEnv(render_mode=\"human\"), pi)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "babe9f7f", + "metadata": {}, + "outputs": [], + "source": [ + "import symbo" + ] } ], "metadata": {