diff --git a/airplane/__init__.py b/airplane/__init__.py
new file mode 100644
index 0000000..7438356
--- /dev/null
+++ b/airplane/__init__.py
@@ -0,0 +1,8 @@
+from gymnasium.envs.registration import register
+
+# Register the first custom environment
+register(
+    id='ReducedSymmetricGliderPullout-v0',
+    entry_point='airplane.reduced_symmetric_glider_pullout:ReducedSymmetricGliderPullout', 
+    max_episode_steps=100, 
+)
\ No newline at end of file
diff --git a/airplane/airplane_env.py b/airplane/airplane_env.py
new file mode 100644
index 0000000..3781343
--- /dev/null
+++ b/airplane/airplane_env.py
@@ -0,0 +1,47 @@
+import numpy as np
+from gymnasium import Env
+
+class AirplaneEnv(Env):
+    metadata = {"render_modes": ["human", "ascii", "ansi"], "render_fps": 60}
+    # TODO(gtorre): use render fps
+
+    def __init__(self, airplane, render_mode=None):
+        self.airplane = airplane
+
+        self.visualiser = None
+        self.render_mode = render_mode
+        assert render_mode is None or render_mode in self.metadata["render_modes"]
+        self.window = None
+        self.clock = None
+
+    def seed(self, seed=None):
+        np.random.seed(seed)
+
+    def render(self, mode: str | None = "ascii"):
+        """Renders the environment.
+        :param mode: str, the mode to render with:
+        - human: render to the current display or terminal and
+          return nothing. Usually for human consumption.
+        - ansi: Return a string (str) or StringIO.StringIO containing a
+          terminal-style text representation. The text can include newlines
+          and ANSI escape sequences (e.g. for colors).
+        """
+        if mode == "human":
+            pass
+            #if not self.visualiser:
+                #self.visualiser = Visualizer(self.airplane)
+            #self.visualiser.plot()
+
+        else:  # ANSI or ASCII
+            # TODO: Add additional observations
+            print(
+                f"\u001b[34m Flight Path Angle (deg): {np.rad2deg(self.airplane.flight_path_angle):.2f}\u001b[37m"
+            )
+            # TODO: Proper stall prediction
+            if self.airplane.flight_path_angle > 0.7:
+                print("\u001b[35m -- STALL --\u001b[37m")
+
+    def close(self):
+        if self.window is not None:
+            # close
+            raise NotImplementedError
\ No newline at end of file
diff --git a/airplane/grumman.py b/airplane/grumman.py
new file mode 100644
index 0000000..e451825
--- /dev/null
+++ b/airplane/grumman.py
@@ -0,0 +1,131 @@
+import numpy as np
+
+
+class Grumman:
+    ####################################
+    ### Grumman American AA-1 Yankee ###
+    ####################################
+    """Base class for airplane parameters"""
+
+    def __init__(self):
+        ######################
+        ### Sim parameters ###
+        ######################
+        self.TIME_STEP = 0.01
+        self.GRAVITY = 9.81
+        self.AIR_DENSITY = 1.225  # Density  (ρ)  [kg/m3]
+
+        ###########################
+        ### Airplane parameters ###
+        ###########################
+        # Aerodynamic model: CL coefficients
+        self.CL_0 = 0.41
+        self.CL_ALPHA = 4.6983
+        self.CL_ELEVATOR = 0.361
+        self.CL_QHAT = 2.42
+        # Aerodynamic model: CD coefficients
+        self.CD_0 = 0.0525
+        self.CD_ALPHA = 0.2068
+        self.CD_ALPHA2 = 1.8712
+        # Aerodynamic model: Cm coefficients
+        self.CM_0 = 0.076
+        self.CM_ALPHA = -0.8938
+        self.CM_ELEVATOR = -1.0313
+        self.CM_QHAT = -7.15
+        # Aerodynamic model: Cl coefficients
+        self.Cl_BETA = -0.1089
+        self.Cl_PHAT = -0.52
+        self.Cl_RHAT = 0.19
+        self.Cl_AILERON = -0.1031
+        self.Cl_RUDDER = 0.0143
+        # Physical model
+        self.MASS = 697.18  # Mass  (m)  [kg]
+        self.WING_SURFACE_AREA = 9.1147  # Wing surface area  (S)  [m2]
+        self.CHORD = 1.22  # Chord  (c)  [m]
+        self.WING_SPAN = 7.46  # Wing Span  (b)  [m]
+        self.I_XX = 808.06   # Inertia  [Kg.m^2]
+        self.I_YY = 1011.43  # Inertia  [Kg.m^2]
+        self.ALPHA_STALL = np.deg2rad(15)  # Stall angle of attack  (αs)  [rad]
+        self.ALPHA_NEGATIVE_STALL = np.deg2rad(-7)  # Negative stall angle of attack  (αs)  [rad]
+        self.CL_STALL = self.CL_0 + self.CL_ALPHA * self.ALPHA_STALL
+        self.CL_REF = self.CL_STALL 
+        # self.STALL_AIRSPEED = 32.19  # Stall air speed  (Vs)  [m/s]
+        self.STALL_AIRSPEED = np.sqrt(self.MASS * self.GRAVITY / (0.5 * self.AIR_DENSITY * \
+                                    self.WING_SURFACE_AREA * self.CL_REF))    # Stall air speed  (Vs)  [m/s]
+        self.MAX_CRUISE_AIRSPEED = 2 * self.STALL_AIRSPEED  # Maximum air speed  (Vs)  [m/s]
+
+        # Throttle model
+        self.THROTTLE_LINEAR_MAPPING = None
+        self._initialize_throttle_model()
+
+    def _update_state_from_derivative(self, value_to_update, value_derivative):
+        value_to_update += self.TIME_STEP * value_derivative
+        return value_to_update
+
+    def _alpha_from_cl(self, c_lift):
+        alpha = (c_lift - self.CL_0) / self.CL_ALPHA
+        return alpha
+
+    def _cl_from_lift_force_and_speed(self, lift_force, airspeed):
+        cl = 2 * lift_force / (self.AIR_DENSITY * self.WING_SURFACE_AREA * airspeed ** 2)
+        return cl
+
+    def _cl_from_alpha(self, alpha, elevator, q_hat):
+        # TODO: review model
+        if alpha <= self.ALPHA_NEGATIVE_STALL:
+            c_lift = self.CL_0 + self.CL_ALPHA * self.ALPHA_NEGATIVE_STALL
+        elif alpha >= self.ALPHA_STALL:
+            # Stall model: Lift saturation
+            c_lift = self.CL_0 + self.CL_ALPHA * self.ALPHA_STALL
+            # Stall model: Lift reduction with opposite slope
+            # c_lift = - self.CL_ALPHA * alpha + self.CL_0 + 2 * self.CL_ALPHA * self.ALPHA_STALL
+        else:
+            c_lift = self.CL_0 + self.CL_ALPHA * alpha + self.CL_ELEVATOR * elevator + self.CL_QHAT * q_hat
+        return c_lift
+
+    def _lift_force_at_speed_and_cl(self, airspeed, lift_coefficient):
+        return 0.5 * self.AIR_DENSITY * self.WING_SURFACE_AREA * airspeed ** 2 * lift_coefficient
+
+    def _cd_from_alpha(self, alpha):
+        c_drag = self.CD_0 + self.CD_ALPHA * alpha + self.CD_ALPHA2 * (alpha ** 2)
+        return c_drag
+
+    def _cd_from_cl(self, c_lift):
+        c_drag = self._cd_from_alpha(self._alpha_from_cl(c_lift))
+        return c_drag
+
+    def _drag_force_at_speed_and_cd(self, airspeed, drag_coefficient):
+        return 0.5 * self.AIR_DENSITY * self.WING_SURFACE_AREA * airspeed ** 2 * drag_coefficient
+
+    def _drag_force_at_cruise_speed(self, airspeed):
+        cruise_lift_force = self.MASS * self.GRAVITY
+        cruise_cl = self._cl_from_lift_force_and_speed(cruise_lift_force, airspeed)
+        alpha = self._alpha_from_cl(cruise_cl)
+        cruise_cd = self._cd_from_alpha(alpha)
+        drag_force = self._drag_force_at_speed_and_cd(airspeed, cruise_cd)
+        return drag_force
+
+    def _rolling_moment_coefficient(self, beta, p_hat, r_hat, aileron, rudder):
+        c_rolling_moment = self.Cl_BETA * beta + self.Cl_PHAT * p_hat + self.Cl_RHAT * r_hat + \
+                 self.Cl_AILERON * aileron + self.Cl_RUDDER * rudder
+        return c_rolling_moment
+
+    def _rolling_moment_at_speed_and_cl(self, airspeed, rolling_moment_coefficient):
+        return 0.5 * self.AIR_DENSITY * self.WING_SURFACE_AREA * self.WING_SPAN * airspeed ** 2 * rolling_moment_coefficient
+
+    def _pitching_moment_coefficient(self, alpha, elevator, q_hat):
+        c_pitch_moment = self.CM_0 + self.CM_ALPHA * alpha + self.CM_ELEVATOR * elevator + self.CM_QHAT * q_hat
+        return c_pitch_moment
+
+    def _pitching_moment_at_speed_and_cm(self, airspeed, pitching_moment_coefficient):
+        return 0.5 * self.AIR_DENSITY * self.WING_SURFACE_AREA * self.CHORD * airspeed ** 2 * pitching_moment_coefficient
+
+    def _initialize_throttle_model(self, ):
+        # Throttle model: Thrust force = Kt * δ_throttle
+        # Max Thrust -> Kt * 1 = Drag(V=Vmax) -> Kt = 0.5 ρ S (Vmax)^2 CD
+        # δ_throttle = 1.0  -> Max Cruise speed: V' = Vmax  ->  V_dot = 0 = Thrust Force - Drag Force
+        self.THROTTLE_LINEAR_MAPPING = self._drag_force_at_cruise_speed(self.MAX_CRUISE_AIRSPEED)
+
+    def _thrust_force_at_throttle(self, throttle):
+        thrust_force = self.THROTTLE_LINEAR_MAPPING * throttle
+        return thrust_force
\ No newline at end of file
diff --git a/airplane/reduced_banked_glider_pullout.py b/airplane/reduced_banked_glider_pullout.py
new file mode 100644
index 0000000..42e7718
--- /dev/null
+++ b/airplane/reduced_banked_glider_pullout.py
@@ -0,0 +1,57 @@
+import numpy as np
+import gymnasium
+from gymnasium import spaces
+from matplotlib import pyplot as plt
+from airplane.reduced_grumman import ReducedGrumman
+from airplane.airplane_env import AirplaneEnv
+
+
+class ReducedBankedGliderPullout(AirplaneEnv):
+
+    def __init__(self, render_mode=None):
+
+        self.airplane = ReducedGrumman()
+        super().__init__(self.airplane)
+
+        # Observation space: Flight Path Angle (γ), Air Speed (V), Bank Angle (μ)
+        self.observation_space = spaces.Box(np.array([-np.pi, 0.9, np.deg2rad(-20)], np.float32), np.array([0, 4.0, np.deg2rad(200)], np.float32), shape=(3,), dtype=np.float32) 
+        # Action space: Lift Coefficient (CL), Bank Rate (μ')
+        self.action_space = spaces.Box(np.array([-0.5, np.deg2rad(-30)], np.float32), np.array([1.0, np.deg2rad(30)], np.float32), shape=(2,), dtype=np.float32) 
+
+    def _get_obs(self):
+        return np.vstack([self.airplane.flight_path_angle, self.airplane.airspeed_norm, self.airplane.bank_angle], dtype=np.float32).T
+
+    def _get_info(self):
+        return {}
+
+    def reset(self, seed=None, options=None):
+
+        # Choose the initial agent's state uniformly
+        [flight_path_angle, airspeed_norm, bank_angle] = np.random.uniform(self.observation_space.low, self.observation_space.high)
+        self.airplane.reset(flight_path_angle, airspeed_norm, bank_angle)
+
+        observation = self._get_obs(), {}
+
+        return observation
+
+    def step(self, action: list):
+        # Update state
+        action = np.clip(action, self.action_space.low, self.action_space.high)
+        c_lift = action[0]
+        bank_rate = action[1]
+
+        self.airplane.command_airplane(c_lift, bank_rate, 0)
+
+        # Calculate step reward: Height Loss
+        # TODO: Analyze policy performance based on reward implementation.
+        reward = self.airplane.TIME_STEP * self.airplane.airspeed_norm * np.sin(self.airplane.flight_path_angle)
+        # reward = self.TIME_STEP * (self.airspeed_norm * self.STALL_AIRSPEED) * np.sin(self.Flight Path) - 0.01 * bank_rate ** 2
+        terminated = self.termination()
+        observation = self._get_obs()
+        info = self._get_info()
+        return observation, reward, terminated, False, info
+
+
+    def termination(self,):
+        terminate =  np.where((self.airplane.flight_path_angle >= 0.0) & (self.airplane.airspeed_norm >= 1) , True, False)
+        return terminate
\ No newline at end of file
diff --git a/airplane/reduced_grumman.py b/airplane/reduced_grumman.py
new file mode 100644
index 0000000..365efdc
--- /dev/null
+++ b/airplane/reduced_grumman.py
@@ -0,0 +1,56 @@
+import numpy as np
+from airplane.grumman import Grumman
+
+class ReducedGrumman(Grumman):
+    ####################################
+    ### Grumman American AA-1 Yankee ###
+    ####################################
+    """Class for simplified airplane state and dynamics"""
+
+    # NOTE: Commands as seperate objects? e.g. bank.rotate(airplane),
+    #       throttle.accelerate(airplane), etc.
+    # NOTE: Use of α instead of Cl?
+
+    def __init__(self):
+        super().__init__()
+        ##########################
+        ### Airplane variables ###
+        ##########################
+        self.flight_path_angle = np.zeros(10000, dtype=np.float32)                  # Flight Path Angle  (γ)  [rad]
+        self.airspeed_norm = np.ones_like(self.flight_path_angle, dtype=np.float32) # Air Speed  (V/Vs)  [1]
+        self.bank_angle = 0.0  # Bank Angle  (μ)  [rad]
+        # previous commands
+        self.last_c_lift = 0.0
+        self.last_bank_rate = 0.0
+        self.last_throttle = 0.0
+
+    def command_airplane(self, c_lift, bank_rate, delta_throttle):
+        self.last_c_lift = c_lift
+        self.last_bank_rate = bank_rate
+        self.last_throttle = delta_throttle
+        
+        c_drag = self._cd_from_cl(c_lift)
+
+        # V_dot = - g sin γ - 0.5 * (ρ S V^2 CD / m) +  (thrust force / m) 
+        airspeed_dot = - self.GRAVITY * np.sin(self.flight_path_angle) - 0.5 * self.AIR_DENSITY * (
+                self.WING_SURFACE_AREA / self.MASS) * (self.airspeed_norm * self.STALL_AIRSPEED) ** 2 * c_drag \
+                       + (self.THROTTLE_LINEAR_MAPPING * delta_throttle / self.MASS)
+
+        # γ_dot = 0.5 * (ρ S V CL cos µ / m) - g cos γ / V
+        flight_path_angle_dot = 0.5 * self.AIR_DENSITY * (self.WING_SURFACE_AREA / self.MASS) * (
+                self.airspeed_norm * self.STALL_AIRSPEED) * c_lift * np.cos(self.bank_angle) \
+                              - (self.GRAVITY / (self.airspeed_norm * self.STALL_AIRSPEED)) * np.cos(
+            self.flight_path_angle)
+
+        # μ_dot = μ_dot_commanded
+        bank_angle_dot = bank_rate
+
+        
+        self.airspeed_norm = self._update_state_from_derivative(self.airspeed_norm, airspeed_dot / self.STALL_AIRSPEED)
+        self.flight_path_angle = self._update_state_from_derivative(self.flight_path_angle, flight_path_angle_dot)
+        self.bank_angle = self._update_state_from_derivative(self.bank_angle, bank_angle_dot)
+
+    def reset(self, flight_path_angle, airspeed_norm, bank_angle):
+        self.flight_path_angle = flight_path_angle
+        self.airspeed_norm = airspeed_norm
+        self.bank_angle = bank_angle
\ No newline at end of file
diff --git a/airplane/reduced_symmetric_glider_pullout.py b/airplane/reduced_symmetric_glider_pullout.py
new file mode 100644
index 0000000..b189229
--- /dev/null
+++ b/airplane/reduced_symmetric_glider_pullout.py
@@ -0,0 +1,92 @@
+import numpy as np
+from gymnasium import spaces
+
+from airplane.reduced_grumman import ReducedGrumman
+from airplane.airplane_env import AirplaneEnv
+
+try:
+    import cupy as xp 
+    if not xp.cuda.is_available():
+        raise ImportError("CUDA is not available. Falling back to NumPy.")
+except (ImportError, AttributeError):
+    xp = np
+
+class ReducedSymmetricGliderPullout(AirplaneEnv):
+
+    def __init__(self, render_mode=None):
+        self.airplane = ReducedGrumman()
+        super().__init__(self.airplane)
+
+        # Observation space: Flight Path Angle (γ), Air Speed (V)
+        self.observation_space = spaces.Box(np.array([-np.pi, 0.6], np.float32), 
+                                            np.array([0, 4.0], np.float32), shape=(2,), dtype=np.float32)
+        # Action space: Lift Coefficient
+        self.action_space = spaces.Box(-0.5, 1.0, shape=(1,), dtype=np.float32)
+
+    def _get_obs(self):
+        return np.vstack([self.airplane.flight_path_angle, self.airplane.airspeed_norm], dtype=np.float32).T
+
+    def _get_info(self):
+        return {}
+
+    def reset(self, seed=None, options=None):
+
+        # Choose the initial agent's state uniformly
+        [flight_path_angle, airspeed_norm] = np.random.uniform(self.observation_space.low, self.observation_space.high)
+        self.airplane.reset(flight_path_angle, airspeed_norm, 0)
+
+        observation = self._get_obs()
+        # clip the observation to the observation space
+        observation = np.clip(observation, self.observation_space.low, self.observation_space.high).flatten()
+        assert self.observation_space.contains(observation), "Observation is not within the observation space!"
+        return observation, {}
+
+    def step(self, action: list):
+        # Update state
+        c_lift = action #action[0]
+        #self.airplane.command_airplane(c_lift, 0, 0)
+
+        delta_throttle = 0
+        bank_rate = 0
+
+        init_terminal = self.termination() 
+
+        self.airplane.last_c_lift = c_lift
+        self.airplane.last_bank_rate = bank_rate
+        self.airplane.last_throttle = delta_throttle
+        
+        c_drag = self.airplane._cd_from_cl(c_lift)
+
+        # V_dot = - g sin γ - 0.5 * (ρ S V^2 CD / m) +  (thrust force / m) 
+        airspeed_dot = - self.airplane.GRAVITY * np.sin(self.airplane.flight_path_angle) - 0.5 * self.airplane.AIR_DENSITY * (
+                self.airplane.WING_SURFACE_AREA / self.airplane.MASS) * (self.airplane.airspeed_norm * self.airplane.STALL_AIRSPEED) ** 2 * c_drag \
+                       + (self.airplane.THROTTLE_LINEAR_MAPPING * delta_throttle / self.airplane.MASS)
+
+        # γ_dot = 0.5 * (ρ S V CL cos µ / m) - g cos γ / V
+        flight_path_angle_dot = 0.5 * self.airplane.AIR_DENSITY * (self.airplane.WING_SURFACE_AREA / self.airplane.MASS) * (
+                self.airplane.airspeed_norm * self.airplane.STALL_AIRSPEED) * c_lift * np.cos(self.airplane.bank_angle) \
+                              - (self.airplane.GRAVITY / (self.airplane.airspeed_norm * self.airplane.STALL_AIRSPEED)) * np.cos(
+            self.airplane.flight_path_angle)
+
+        # μ_dot = μ_dot_commanded
+        bank_angle_dot = bank_rate
+
+        self.airplane.airspeed_norm     += self.airplane.TIME_STEP * (airspeed_dot / self.airplane.STALL_AIRSPEED)
+        self.airplane.flight_path_angle += self.airplane.TIME_STEP * flight_path_angle_dot
+        #clip the state to the observation space
+        self.airplane.airspeed_norm = np.clip(self.airplane.airspeed_norm, self.observation_space.low[1], self.observation_space.high[1])
+        self.airplane.flight_path_angle = np.clip(self.airplane.flight_path_angle, self.observation_space.low[0], self.observation_space.high[0])
+        # Calculate step reward: Height Loss
+        reward = self.airplane.TIME_STEP * self.airplane.airspeed_norm * np.sin(self.airplane.flight_path_angle)*27.331231856346
+        
+        # Get the next state
+        info = self._get_info()
+        terminated = self.termination() | init_terminal
+        reward = np.where(init_terminal, 0, reward)
+
+        return np.vstack([self.airplane.flight_path_angle, self.airplane.airspeed_norm], dtype=np.float32).T, reward, terminated, False, info
+
+
+    def termination(self,):
+        terminate =  np.where((self.airplane.flight_path_angle >= 0.0) & (self.airplane.airspeed_norm >= 1) , True, False)
+        return terminate
diff --git a/src/PolicyIteration.py b/src/PolicyIteration.py
index a9dc9f1..86bd127 100644
--- a/src/PolicyIteration.py
+++ b/src/PolicyIteration.py
@@ -1,12 +1,11 @@
 import os
 import pickle
+import numpy as np
 import gymnasium as gym
 from loguru import logger
 from scipy.spatial import Delaunay
 from utils.utils import plot_3D_value_function
 
-
-import numpy as np
 try:
 
     import cupy as cp 
@@ -15,8 +14,7 @@
     logger.info("CUDA driver is available.")
 
 except (ImportError, AttributeError):
-
-    import numpy as cp
+    cp = np
     logger.warning("CUDA is not available. Falling back to NumPy.")
     def asarray(arr, *args, **kwargs):
         """In NumPy, this just ensures the object is a NumPy array, with support for additional arguments."""
@@ -28,7 +26,7 @@ def asnumpy(arr, *args, **kwargs):
            
     np.asarray = asarray
     np.asnumpy = asnumpy
-    cp = np
+    
 
 
 class PolicyIteration(object):
@@ -124,6 +122,12 @@ def __init__(self, env: gym.Env,
         self.grid = np.meshgrid(*self.bins_space.values(), indexing='ij')
         # Flatten and stack to create a list of points in the space
         self.states_space = np.vstack([g.ravel() for g in self.grid], dtype=np.float32).T
+        
+        # get x and y coordinates
+        x = self.states_space[:,0]
+        y = self.states_space[:,1]
+        self.terminal_states = np.where((x >= 0.0) & (y >= 1) , True, False)
+        self.terminal_reward = 0
         # 
         self.num_simplex_points:int = int(self.states_space[0].shape[0] + 1) # number of points in a simplex one more than the dimension
         self.space_dim:int          = int(self.states_space[0].shape[0])
@@ -160,6 +164,7 @@ def __in_cell__(self, obs: cp.ndarray) -> cp.ndarray:
         Returns:
             np.ndarray: A boolean array indicating whether each observation is within the valid state bounds. """
         
+        #return cp.all((obs >= self.cell_lower_bounds[:, None]) & (obs <= self.cell_upper_bounds[:, None]), axis=1)
         return cp.all((obs >= self.cell_lower_bounds) & (obs <= self.cell_upper_bounds), axis=1)
   
     def barycentric_coordinates(self, points:np.ndarray)->tuple:
@@ -224,14 +229,22 @@ def calculate_transition_reward_table(self):
             "points_indexes": The indexes of the points in the simplex. """   
            
         for j, action in enumerate(self.action_space):
-            self.env.state = cp.asarray(self.states_space, dtype=cp.float32)   
-            obs_gpu, reward_gpu, _, _, _ = self.env.step(action)
+            self.env.reset()
+            #self.env.state = cp.asarray(self.states_space, dtype=cp.float32) 
+            state = np.array(self.states_space, dtype=np.float32)
+            self.env.airplane.flight_path_angle = state[:,0].copy()
+            self.env.airplane.airspeed_norm = state[:,1].copy()
+#            self.env.airplane.bank_angle = state[:,2].copy()    
+
+            obs_gpu, reward_gpu, terminated, _, _ = self.env.step(action)
+
             # log if any state is outside the bounds of the environment
             states_outside_gpu = self.__in_cell__(obs_gpu)
             if bool(cp.any(~states_outside_gpu)):
                 # get the indexes of the states outside the bounds 
-                reward_gpu = cp.where(states_outside_gpu, reward_gpu, -100)
                 logger.warning(f"Some states are outside the bounds of the environment.")
+            # if the state is terminal, set the reward to zero
+            reward_gpu = cp.where(terminated, 0, reward_gpu)
             # if any state is outside the bounds of the environment clip it to the bounds
             obs_gpu = cp.clip(obs_gpu, self.cell_lower_bounds, self.cell_upper_bounds)
             # get the barycentric coordinates of the resulting state in CPU for now.
@@ -244,6 +257,7 @@ def calculate_transition_reward_table(self):
             self.lambdas[:,j]        = cp.asarray(lambdas, dtype=cp.float32)
             self.simplexes[:,j]      = cp.asarray(simplexes, dtype=cp.float32)
             self.points_indexes[:,j] = cp.asarray(points_indexes, dtype=cp.int32) 
+            
 
     def get_value(self, lambdas:cp.ndarray,  point_indexes:cp.ndarray,  value_function:cp.ndarray)->cp.ndarray:
 
@@ -280,12 +294,17 @@ def policy_evaluation(self):
         logger.info("Starting policy evaluation")
         while cp.abs(float(max_error)) > self.theta:
             # initialize the new value function to zeros
-            new_value_function = cp.zeros_like(self.value_function, dtype=cp.float32) 
+            new_value_function = cp.zeros_like(self.value_function, dtype=cp.float32)
+            vf_next_state = cp.zeros_like(self.value_function, dtype=cp.float32)
             new_val = cp.zeros_like(self.value_function, dtype=cp.float32)
+            new_value_function[self.terminal_states] = self.terminal_reward
+            new_val[self.terminal_states] = self.terminal_reward
+
             for j, _ in enumerate(self.action_space):                
                 # Checkout 'Variable Resolution Discretization in Optimal Control, eq 5'
-                next_state_value = self.get_value(self.lambdas[:, j], self.points_indexes[:, j], self.value_function)
-                new_val += self.policy[:,j] * (self.reward[:,j] + self.gamma * next_state_value)
+                vf_next_state[~self.terminal_states] = self.get_value(self.lambdas[:, j], self.points_indexes[:, j], self.value_function)[~self.terminal_states]
+                new_val[~self.terminal_states] += self.policy[~self.terminal_states,j] * (self.reward[~self.terminal_states,j] + self.gamma * vf_next_state[~self.terminal_states])
+
             new_value_function = new_val
             # update the error: the maximum difference between the new and old value functions
             errors = cp.fabs(new_value_function[:] - self.value_function[:])
diff --git a/src/utils/utils.py b/src/utils/utils.py
index a378ed9..357383d 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -11,18 +11,24 @@ def plot_3D_value_function(vf: np.array,
     
     """ Plots a 3D value function in some color scale."""
     # Assuming points is a 2D array where each row is a point [position, velocity]
-    X  = points[:, 0]  # x-axis (position)
-    Y  = points[:, 1]  # y-axis (velocity)
+    X  = points[:, 0]  # x-axis
+    # transformb X from rad to deg
+    X = np.rad2deg(X)
+    Y  = points[:, 1]  # y-axis 
     vf = vf            # z-axis (value function)
     vf_to_plot = (vf - vf.min()) / (vf.max() - vf.min()) if normalize else vf
     fig = plt.figure()
     ax = fig.add_subplot(111, projection='3d')
     # Use plot_trisurf for unstructured triangular surface plot
     surf = ax.plot_trisurf(X, Y, vf_to_plot, cmap=cmap, edgecolor='white', linewidth=0.2)
+    # Add title
+    ax.set_title('Reduced Symmetric Glider Value Function ', pad=20)
     # Add labels
-    ax.set_xlabel('position')
-    ax.set_ylabel('velocity')
-    ax.set_zlabel('Normalized Value Function')
+    ax.set_xlabel('Flight Path Angle (γ) [deg]', labelpad=10)
+    ax.set_ylabel('V/Vs', labelpad=10)
+    ax.set_zlabel('Normalized Value Function', labelpad=10)
+    ax.set_xticks(np.round(np.linspace(min(X), max(X), 5)))  # 5 ticks on the x-axis
+    ax.set_yticks(np.round(np.linspace(min(Y), max(Y), 5)))  # 5 ticks on the y-axis
     # Add color bar to represent the value range
     if path is not None: plt.savefig(path)
     # Show the plot
@@ -70,7 +76,7 @@ def get_optimal_action(state:np.array, optimal_policy:np.array):
     Returns:
     action: The optimal action for the given state.
     """    
-    lambdas, simmplex_info  = get_barycentric_coordinates(optimal_policy, state)
+    lambdas, simmplex_info  = get_barycentric_coordinates(optimal_policy,state)
     simplex, points_indexes = simmplex_info
     actions = optimal_policy.action_space
     probabilities = np.zeros(len(actions), dtype=np.float32)
diff --git a/test.py b/test.py
index 8a5dd1a..8aa5879 100644
--- a/test.py
+++ b/test.py
@@ -1,30 +1,33 @@
 import pickle
+import airplane
 import numpy as np
+import gymnasium as gym
 from utils.utils import plot_3D_value_function   
 from PolicyIteration import PolicyIteration
-from classic_control.continuous_mountain_car import Continuous_MountainCarEnv
 
-env=Continuous_MountainCarEnv()
+glider = gym.make('ReducedSymmetricGliderPullout-v0')
 
 bins_space = {
-    "x_space":     np.linspace(env.min_position, env.max_position, 100,      dtype=np.float32),    # position space    (0)
-    "x_dot_space": np.linspace(-abs(env.max_speed), abs(env.max_speed), 100, dtype=np.float32),    # velocity space    (1)
+    "flight_path_angle": np.linspace(-np.pi-0.01, 0.10, 100,      dtype=np.float32),    # Flight Path Angle (γ)    (0)
+    "airspeed_norm":     np.linspace(0.7, 4,       100,      dtype=np.float32),    # Air Speed         (V)    (1)
 }
 
 pi = PolicyIteration(
-    env=env, 
+    env=glider, 
     bins_space=bins_space,
-    action_space=np.linspace(-1.0, +1.0,9, dtype=np.float32),
+    action_space=np.linspace(-0.4, 1.0, 15, dtype=np.float32),
     gamma=0.99,
     theta=1e-3,
 )
-pi.run()
 
-with open(env.__class__.__name__ + ".pkl", "rb") as f:
+
+#pi.run()
+
+with open(glider.__class__.__name__ + ".pkl", "rb") as f:
     pi: PolicyIteration = pickle.load(f)
 
 plot_3D_value_function(vf = pi.value_function,
                        points = pi.states_space,
-                       normalize=True,
+                       normalize=False,
                        show=True,
                        path="./test_vf.png")
\ No newline at end of file
diff --git a/testing.ipynb b/testing.ipynb
index b5a7f7d..35accb4 100644
--- a/testing.ipynb
+++ b/testing.ipynb
@@ -13,17 +13,398 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 1,
       "id": "002626e9",
       "metadata": {},
-      "outputs": [],
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "\u001b[32m2024-10-18 16:15:26.697\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m18\u001b[0m - \u001b[33m\u001b[1mCUDA is not available. Falling back to NumPy.\u001b[0m\n"
+          ]
+        }
+      ],
       "source": [
         "import pickle\n",
         "import numpy as np\n",
+        "import gymnasium as gym\n",
         "from utils.utils import test_enviroment\n",
         "from PolicyIteration import PolicyIteration"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "id": "95b5a85a",
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "\u001b[32m2024-10-18 16:15:28.739\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m119\u001b[0m - \u001b[1mLower bounds: [-3.1515927  0.7      ]\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.740\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m120\u001b[0m - \u001b[1mUpper bounds: [0.5 4. ]\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.742\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m138\u001b[0m - \u001b[1mThe action space is: [-4.000000e-01 -3.000000e-01 -2.000000e-01 -1.000000e-01 -5.551115e-17\n",
+            "  1.000000e-01  2.000000e-01  3.000000e-01  4.000000e-01  5.000000e-01\n",
+            "  6.000000e-01  7.000000e-01  8.000000e-01  9.000000e-01  1.000000e+00]\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.742\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m139\u001b[0m - \u001b[1mNumber of states: 10000\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.744\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mTotal states:150000\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.746\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mPolicy Iteration was correctly initialized.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.746\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m154\u001b[0m - \u001b[1mThe enviroment name is: TimeLimit\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.747\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m366\u001b[0m - \u001b[1mCreating Delaunay triangulation over the state space...\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.847\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m368\u001b[0m - \u001b[1mDelaunay triangulation created.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.847\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m378\u001b[0m - \u001b[1mGenerating transition and reward function table...\u001b[0m\n",
+            "/home/nromero/anaconda3/envs/DynamicProgramming/lib/python3.11/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.airplane to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.airplane` for environment variables or `env.get_wrapper_attr('airplane')` that will search the reminding wrappers.\u001b[0m\n",
+            "  logger.warn(\n",
+            "/home/nromero/anaconda3/envs/DynamicProgramming/lib/python3.11/site-packages/gymnasium/utils/passive_env_checker.py:228: UserWarning: \u001b[33mWARN: Expects `terminated` signal to be a boolean, actual type: <class 'numpy.ndarray'>\u001b[0m\n",
+            "  logger.warn(\n",
+            "/home/nromero/anaconda3/envs/DynamicProgramming/lib/python3.11/site-packages/gymnasium/utils/passive_env_checker.py:159: UserWarning: \u001b[33mWARN: The obs returned by the `step()` method is not within the observation space.\u001b[0m\n",
+            "  logger.warn(f\"{pre} is not within the observation space.\")\n",
+            "/home/nromero/anaconda3/envs/DynamicProgramming/lib/python3.11/site-packages/gymnasium/utils/passive_env_checker.py:246: UserWarning: \u001b[33mWARN: The reward returned by `step()` must be a float, int, np.integer or np.floating, actual type: <class 'numpy.ndarray'>\u001b[0m\n",
+            "  logger.warn(\n",
+            "\u001b[32m2024-10-18 16:15:28.849\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.920\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.929\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.938\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.948\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.959\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.969\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.981\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:28.993\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:29.005\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:29.014\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:29.026\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:29.039\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:29.049\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:29.060\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mcalculate_transition_reward_table\u001b[0m:\u001b[36m243\u001b[0m - \u001b[33m\u001b[1mSome states are outside the bounds of the environment.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:29.069\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m380\u001b[0m - \u001b[1mTransition and reward function table generated.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:29.070\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m382\u001b[0m - \u001b[1msolving step 0\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:29.071\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:29.084\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 1.093000054359436 | Avg Error: 0.3490000069141388 | 1500<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:30.550\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.23999999463558197 | Avg Error: 0.09700000286102295 | 1311<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:31.787\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.05299999937415123 | Avg Error: 0.024000000208616257 | 1502<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:33.017\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.012000000104308128 | Avg Error: 0.004999999888241291 | 1973<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:34.243\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0020000000949949026 | Avg Error: 0.0010000000474974513 | 4176<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:35.437\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:35.438\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m332\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:35.438\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m341\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:35.446\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m354\u001b[0m - \u001b[1mThe number of updated different actions: 150000\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:35.446\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m357\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:35.447\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m382\u001b[0m - \u001b[1msolving step 1\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:35.447\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:35.460\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.5809999704360962 | Avg Error: 0.1340000033378601 | 1434<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:36.689\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.11299999803304672 | Avg Error: 0.028999999165534973 | 2117<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:37.904\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.023000000044703484 | Avg Error: 0.004999999888241291 | 3766<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:39.107\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.004999999888241291 | Avg Error: 0.0010000000474974513 | 5870<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:40.327\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0010000000474974513 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:41.360\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:41.361\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m332\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:41.361\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m341\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:41.370\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m354\u001b[0m - \u001b[1mThe number of updated different actions: 346\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:41.370\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m357\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:41.371\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m382\u001b[0m - \u001b[1msolving step 2\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:41.371\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:41.382\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 2.115000009536743 | Avg Error: 0.007000000216066837 | 9861<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:42.376\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0020000000949949026 | Avg Error: 0.0 | 9992<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:43.316\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:43.317\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m332\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:43.317\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m341\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:43.322\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m354\u001b[0m - \u001b[1mThe number of updated different actions: 246\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:43.322\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m357\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:43.322\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m382\u001b[0m - \u001b[1msolving step 3\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:43.323\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:43.332\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 1.4259999990463257 | Avg Error: 0.004999999888241291 | 9885<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:44.310\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:44.310\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m332\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:44.311\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m341\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:44.316\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m354\u001b[0m - \u001b[1mThe number of updated different actions: 212\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:44.316\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m357\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:44.317\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m382\u001b[0m - \u001b[1msolving step 4\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:44.317\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:44.324\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.7310000061988831 | Avg Error: 0.0020000000949949026 | 9898<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:45.295\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:45.296\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m332\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:45.296\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m341\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:45.301\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m354\u001b[0m - \u001b[1mThe number of updated different actions: 116\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:45.302\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m357\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:45.302\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m382\u001b[0m - \u001b[1msolving step 5\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:45.303\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:45.312\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.11999999731779099 | Avg Error: 0.0 | 9946<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:46.293\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:46.293\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m332\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:46.294\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m341\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:46.298\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m354\u001b[0m - \u001b[1mThe number of updated different actions: 18\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:46.299\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m357\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:46.299\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mrun\u001b[0m:\u001b[36m382\u001b[0m - \u001b[1msolving step 6\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:46.299\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mStarting policy evaluation\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:46.309\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0010000000474974513 | Avg Error: 0.0 | 9996<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:47.255\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mMax Error: 0.0 | Avg Error: 0.0 | 10000<0.001\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:47.256\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_evaluation\u001b[0m:\u001b[36m332\u001b[0m - \u001b[1mPolicy evaluation finished.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:47.256\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m341\u001b[0m - \u001b[1mStarting policy improvement\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:47.262\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36mpolicy_improvement\u001b[0m:\u001b[36m357\u001b[0m - \u001b[1mPolicy improvement finished.\u001b[0m\n",
+            "\u001b[32m2024-10-18 16:15:47.272\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mPolicyIteration\u001b[0m:\u001b[36msave\u001b[0m:\u001b[36m401\u001b[0m - \u001b[1mPolicy and value function saved.\u001b[0m\n"
+          ]
+        }
+      ],
+      "source": [
+        "import airplane\n",
+        "\n",
+        "glider = gym.make('ReducedSymmetricGliderPullout-v0')\n",
+        "\n",
+        "bins_space = {\n",
+        "    \"flight_path_angle\": np.linspace(-np.pi-0.01, 0.5,    100,      dtype=np.float32),     # Flight Path Angle (γ)    (0)\n",
+        "    \"airspeed_norm\":     np.linspace(0.7, 4.0,       100,      dtype=np.float32),     # Air Speed         (V)    (1)\n",
+        "}\n",
+        "\n",
+        "pi = PolicyIteration(\n",
+        "    env=glider, \n",
+        "    bins_space=bins_space,\n",
+        "    action_space=np.linspace(-0.4, 1.0, 15, dtype=np.float32),\n",
+        "    gamma=0.99,\n",
+        "    theta=1e-3,\n",
+        ")\n",
+        "\n",
+        "pi.run()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "id": "5a2f30c2",
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Action: -0.4000000059604645 Reward: -174.1369079001539 Episode Length: 100\n",
+            "Action: -0.30000001192092896 Reward: -164.41724866465609 Episode Length: 100\n",
+            "Action: -0.20000000298023224 Reward: -154.62006619077087 Episode Length: 100\n",
+            "Action: -0.10000000149011612 Reward: -144.76020378697655 Episode Length: 100\n",
+            "Action: -5.551115123125783e-17 Reward: -134.8527057668743 Episode Length: 100\n",
+            "Action: 0.10000000149011612 Reward: -124.91275034308747 Episode Length: 100\n",
+            "Action: 0.20000000298023224 Reward: -114.95557843719415 Episode Length: 100\n",
+            "Action: 0.30000001192092896 Reward: -104.9964219683319 Episode Length: 100\n",
+            "Action: 0.4000000059604645 Reward: -95.05043663858261 Episode Length: 100\n",
+            "Action: 0.5 Reward: -85.13262608640899 Episode Length: 100\n",
+            "Action: 0.6000000238418579 Reward: -75.25777635358612 Episode Length: 100\n",
+            "Action: 0.699999988079071 Reward: -65.44040389458748 Episode Length: 100\n",
+            "Action: 0.800000011920929 Reward: -55.694660889340945 Episode Length: 100\n",
+            "Action: 0.8999999761581421 Reward: -46.034320424799766 Episode Length: 100\n",
+            "Action: 1.0 Reward: -36.47267481695943 Episode Length: 100\n"
+          ]
+        }
+      ],
+      "source": [
+        "STALL_AIRSPEED = 27.331231856346\n",
+        "from utils.utils import get_optimal_action\n",
+        "\n",
+        "# rad to deg\n",
+        "rad_to_grad=np.pi /180\n",
+        "\n",
+        "for action in np.linspace(-0.4, 1.0, 15, dtype=np.float32):\n",
+        "\n",
+        "    state = np.array([-0*rad_to_grad, 1.1])\n",
+        "    glider.airplane.flight_path_angle = state[0]\n",
+        "    glider.airplane.airspeed_norm = state[1]\n",
+        "    \n",
+        "    total_reward = 0\n",
+        "    episode_length = 0\n",
+        "\n",
+        "    while episode_length <100:\n",
+        "        #action = get_optimal_action(state, pi)\n",
+        "        prev_state  = state.copy()\n",
+        "        state, reward, _, _, _ = glider.step(action)\n",
+        "        state = state[0]\n",
+        "        total_reward += reward*STALL_AIRSPEED\n",
+        "        episode_length += 1\n",
+        "    \n",
+        "    print(f\"Action: {action} Reward: {total_reward} Episode Length: {episode_length}\")\n",
+        "        "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "5cd13daf",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "STALL_AIRSPEED = 27.331231856346\n",
+        "from utils.utils import get_optimal_action\n",
+        "from tqdm import tqdm\n",
+        "with open(glider.__class__.__name__ + \".pkl\", \"rb\") as f:\n",
+        "    pi: PolicyIteration = pickle.load(f)\n",
+        "\n",
+        "prom_episode_lenght = 0\n",
+        "dict_result = {}\n",
+        "dict_episode_length = {}\n",
+        "state_spaces = [v for v in pi.states_space if v[0] > -np.pi/2]\n",
+        "for state in tqdm(state_spaces):\n",
+        "    initial_state = state.copy()\n",
+        "    prev_state = state.copy()\n",
+        "    glider.reset()\n",
+        "    glider.airplane.flight_path_angle = state[0]\n",
+        "    glider.airplane.airspeed_norm = state[1]\n",
+        "    done = False\n",
+        "    episode_length = 0\n",
+        "    total_reward = 0\n",
+        "    while not done:\n",
+        "        action = get_optimal_action(state, pi)\n",
+        "        prev_state  = state.copy()\n",
+        "        state, reward, done, _, _ = glider.step(action)\n",
+        "        done  = bool(done)\n",
+        "        if done:\n",
+        "            break\n",
+        "        state = state[0]\n",
+        "        # check if our state is in the state space\n",
+        "        index = pi.triangulation.find_simplex(state)\n",
+        "        if not done:\n",
+        "            total_reward -= reward#*STALL_AIRSPEED\n",
+        "        if (index ==-1) or episode_length > 70: \n",
+        "            done = True\n",
+        "        episode_length += 1\n",
+        "        \n",
+        "    dict_result[tuple(initial_state)] = total_reward\n",
+        "    dict_episode_length[tuple(initial_state)] = episode_length\n",
+        "    prom_episode_lenght += episode_length / len(pi.states_space)\n",
+        "    #print(f\"Initial state: {initial_state} - Total reward: {total_reward}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "ab055601",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "import matplotlib.pyplot as plt\n",
+        "epsilon = 1*1e-1\n",
+        "#dict_result = dict_episode_length\n",
+        "# Your dictionary with keys as coordinates (x, y) and values as the color intensity (z)\n",
+        "# Extracting the x, y and z values\n",
+        "x = [np.degrees(coord[0]) for coord in dict_result.keys() if 0.05 >= coord[0] >= -np.pi/2 - epsilon and coord[1] > 0.7]\n",
+        "#convert x to grad\n",
+        "y = [coord[1] for coord in dict_result.keys() if 0.05 >= coord[0] >= -np.pi/2 - epsilon and coord[1] > 0.7]\n",
+        "#convert y to Vs\n",
+        "keys_list = list(dict_result.keys())\n",
+        "values_list = list(dict_result.values())\n",
+        "z = [v for e,v in zip(keys_list, values_list) if 0.05 >= e[0] >= -np.pi/2 - epsilon and e[1] > 0.7]\n",
+        "\n",
+        "# Creating a 2D scatter plot\n",
+        "cmap = plt.get_cmap('viridis', 2048)\n",
+        "plt.tricontourf(y, x, z, cmap=cmap, levels=18)   # Change 'viridis' to any other colormap you like\n",
+        "plt.colorbar(label='', shrink=0.8,  )  # Add color bar for the z values\n",
+        "\n",
+        "# Add labels and a title\n",
+        "plt.ylabel('Flight Path Angle (γ) [deg]')\n",
+        "plt.xlabel('V/Vs')\n",
+        "\n",
+        "x_min, x_max = min(x), max(x)\n",
+        "y_min, y_max = min(y), max(y)\n",
+        "\n",
+        "# Set the minor ticks for the grid, keeping the dense grid with smaller squares\n",
+        "ax = plt.gca()  # Get current axes\n",
+        "ax.set_yticks(np.linspace(x_min, x_max, 60), minor=True)  # 20 minor ticks for grid\n",
+        "ax.set_xticks(np.linspace(y_min, y_max, 60), minor=True)  # 20 minor ticks for grid\n",
+        "# put line x = 1\n",
+        "plt.axvline(x=1, color='r', linestyle='--', linewidth=0.5)\n",
+        "# put line y = 0\n",
+        "#plt.axhline(y=0, color='k', linestyle='--')\n",
+        "\n",
+        "# Show the plot\n",
+        "vals = np.round(np.linspace(-90, 0,6))\n",
+        "plt.yticks(vals)  # Only 5 labels on x-axis\n",
+        "plt.xticks(np.linspace(round(y_min), round(y_max), 6))  # Only 5 labels on y-axis\n",
+        "# Enable the minor grid lines\n",
+        "ax.grid(which='minor', color='gray', linestyle='-', linewidth=0.5)\n",
+        "\n",
+        "# Habilitar la cuadrícula en las marcas menores\n",
+        "ax.grid(which='minor', color='gray', linestyle='-', linewidth=0.5)\n",
+        "\n",
+        "# Guardar la imagen sin borde blanco\n",
+        "plt.savefig('output.png', bbox_inches='tight')\n",
+        "\n",
+        "# set size of the plot\n",
+        "plt.gcf().set_size_inches(9, 5)\n",
+        "\n",
+        "plt.show()\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "c8416867",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "import matplotlib.pyplot as plt\n",
+        "from scipy.interpolate import griddata\n",
+        "\n",
+        "# Your dictionary with keys as coordinates (x, y) and values as the color intensity (z)\n",
+        "# Extracting the x, y and z values\n",
+        "x = [np.degrees(coord[0]) for coord in dict_result.keys() if 0.01 >= coord[0] >= -np.pi/2 and coord[1] > 0.7]\n",
+        "y = [coord[1] for coord in dict_result.keys() if 0.01 >= coord[0] >= -np.pi/2 and coord[1] > 0.7]\n",
+        "keys_list = list(dict_result.keys())\n",
+        "values_list = list(dict_result.values())\n",
+        "z = [v for e,v in zip(keys_list, values_list) if 0.01 >= e[0] >= -np.pi/2 and e[1] > 0.7]\n",
+        "\n",
+        "# Create a grid for interpolation\n",
+        "x_grid = np.linspace(min(x), max(x), 100)\n",
+        "y_grid = np.linspace(min(y), max(y), 100)\n",
+        "X, Y = np.meshgrid(x_grid, y_grid)\n",
+        "\n",
+        "# Interpolate the scattered data into the grid\n",
+        "Z = griddata((y, x), z, (Y, X), method='linear')\n",
+        "\n",
+        "# Create the scatter plot\n",
+        "cmap = plt.get_cmap('viridis', 2048)\n",
+        "plt.scatter(y, x, c=z, cmap=cmap)\n",
+        "\n",
+        "# Add color bar\n",
+        "plt.colorbar(label='', shrink=0.8)\n",
+        "contour_levels = np.linspace(np.min(z), np.max(z), 10)  # 10 levels\n",
+        "contour_levels = contour_levels[contour_levels != 0]    # Remove zero from levels\n",
+        "\n",
+        "# Add contour lines on top of the scatter plot\n",
+        "contour = plt.contour(Y, X, Z, levels=contour_levels, colors='black', linewidths=0.75)\n",
+        "plt.clabel(contour, inline=True, fontsize=8)  # Optional: add labels to the contours\n",
+        "\n",
+        "# Add labels and a title\n",
+        "plt.ylabel('Flight Path Angle (γ) [deg]')\n",
+        "plt.xlabel('V/Vs')\n",
+        "\n",
+        "# Plot vertical line at x=1\n",
+        "plt.axvline(x=1, color='k', linestyle='--')\n",
+        "\n",
+        "# Configure the minor ticks and grid\n",
+        "x_min, x_max = min(x), max(x)\n",
+        "y_min, y_max = min(y), max(y)\n",
+        "ax = plt.gca()  # Get current axes\n",
+        "ax.set_yticks(np.linspace(x_min, x_max, 60), minor=True)\n",
+        "ax.set_xticks(np.linspace(y_min, y_max, 60), minor=True)\n",
+        "plt.yticks(np.linspace(x_min, x_max, 6))  # Major ticks on y-axis\n",
+        "plt.xticks(np.linspace(y_min, y_max, 4))  # Major ticks on x-axis\n",
+        "\n",
+        "# Set size of the plot\n",
+        "plt.gcf().set_size_inches(9, 5)\n",
+        "\n",
+        "# Enable the minor grid lines\n",
+        "ax.grid(which='minor', color='white', linestyle='-', linewidth=0.25)\n",
+        "\n",
+        "# Habilitar la cuadrícula en las marcas menores\n",
+        "ax.grid(which='minor', color='white', linestyle='-', linewidth=0.25)\n",
+        "\n",
+        "# Save the image\n",
+        "plt.savefig('output_with_contours.png', bbox_inches='tight')\n",
+        "\n",
+        "# Show the plot\n",
+        "plt.show()\n"
+      ]
+    },
     {
       "cell_type": "markdown",
       "id": "3c8fe4a4",
@@ -164,6 +545,16 @@
         "\n",
         "test_enviroment(Continuous_MountainCarEnv(render_mode=\"human\"), pi)"
       ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "babe9f7f",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import symbo"
+      ]
     }
   ],
   "metadata": {