diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..c387ff5b
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,18 @@
+fail_fast: false
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+    -   id: check-yaml
+        exclude: "docs"
+    -   id: end-of-file-fixer
+        exclude: "docs"
+    -   id: trailing-whitespace
+        exclude: "docs"
+
+-   repo: https://github.com/psf/black
+    rev: 23.10.0
+    hooks:
+    -   id: black
+        args: [ '--config', 'pyproject.toml' ]
+        verbose: true
diff --git a/Dockerfile b/Dockerfile
index 5ab8844e..e5e38efd 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -17,4 +17,4 @@ RUN cd pomdp-py/ && pip install -e.
 WORKDIR /app/pomdp-py
 
 # activate 'pomdp' environment by default
-RUN echo "conda activate pomdp" >> ~/.bashrc
\ No newline at end of file
+RUN echo "conda activate pomdp" >> ~/.bashrc
diff --git a/LICENSE b/LICENSE
index 48e75b68..a76ea6b0 100644
--- a/LICENSE
+++ b/LICENSE
@@ -6,4 +6,4 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of
 
 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/Makefile b/Makefile
index a263dfd4..22339d43 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,3 @@ clean:
 .PHONY: build
 build:
 	python setup.py build_ext --inplace
-
-
-
diff --git a/pomdp_py/__init__.py b/pomdp_py/__init__.py
index b2ce26e2..057c552e 100644
--- a/pomdp_py/__init__.py
+++ b/pomdp_py/__init__.py
@@ -11,7 +11,12 @@
 from pomdp_py.representations.distribution.gaussian import Gaussian
 from pomdp_py.representations.belief.histogram import update_histogram_belief
 from pomdp_py.representations.belief.particles import update_particles_belief
-from pomdp_py.utils.interfaces.conversion import to_pomdp_file, to_pomdpx_file, AlphaVectorPolicy, PolicyGraph
+from pomdp_py.utils.interfaces.conversion import (
+    to_pomdp_file,
+    to_pomdpx_file,
+    AlphaVectorPolicy,
+    PolicyGraph,
+)
 from pomdp_py.utils.interfaces.solvers import vi_pruning, sarsop
 
 # Algorithms
@@ -19,8 +24,15 @@
 from pomdp_py.algorithms.value_function import value, qvalue, belief_update
 from pomdp_py.algorithms.pomcp import POMCP
 from pomdp_py.algorithms.po_rollout import PORollout
-from pomdp_py.algorithms.po_uct import POUCT, QNode, VNode, RootVNode,\
-    RolloutPolicy, RandomRollout, ActionPrior
+from pomdp_py.algorithms.po_uct import (
+    POUCT,
+    QNode,
+    VNode,
+    RootVNode,
+    RolloutPolicy,
+    RandomRollout,
+    ActionPrior,
+)
 from pomdp_py.algorithms.bsp.blqr import BLQR
 
 # Templates & Utilities
diff --git a/pomdp_py/__main__.py b/pomdp_py/__main__.py
index 832dd93d..7ca8e469 100644
--- a/pomdp_py/__main__.py
+++ b/pomdp_py/__main__.py
@@ -1,42 +1,47 @@
 import argparse
 
-available_problems = [
-    'tiger',
-    'rocksample',
-    'mos',
-    'tag',
-    'load_unload'
-]
+available_problems = ["tiger", "rocksample", "mos", "tag", "load_unload"]
+
 
 def parse_args():
     parser = argparse.ArgumentParser(description="pomdp_py CLI")
-    parser.add_argument("-r", "--run", type=str,
-                        help="run a pomdp under pomdp_py.problems."
-                        "Available options: {}".format(available_problems))
+    parser.add_argument(
+        "-r",
+        "--run",
+        type=str,
+        help="run a pomdp under pomdp_py.problems."
+        "Available options: {}".format(available_problems),
+    )
     args = parser.parse_args()
     return parser, args
 
+
 if __name__ == "__main__":
     parser, args = parse_args()
     if args.run:
         if args.run.lower() == "tiger":
             from pomdp_py.problems.tiger.tiger_problem import main
+
             main()
 
         elif args.run.lower() == "rocksample":
             from pomdp_py.problems.rocksample.rocksample_problem import main
+
             main()
 
         elif args.run.lower() == "mos":
             from pomdp_py.problems.multi_object_search.problem import unittest
+
             unittest()
 
         elif args.run.lower() == "tag":
             from pomdp_py.problems.tag.problem import main
+
             main()
 
         elif args.run.lower() == "load_unload":
             from pomdp_py.problems.load_unload.load_unload import main
+
             main()
 
         else:
diff --git a/pomdp_py/algorithms/bsp/blqr.py b/pomdp_py/algorithms/bsp/blqr.py
index c39b6cc7..6059c959 100644
--- a/pomdp_py/algorithms/bsp/blqr.py
+++ b/pomdp_py/algorithms/bsp/blqr.py
@@ -6,21 +6,31 @@
 import numpy as np
 from scipy import optimize
 
-class BLQR(pomdp_py.Planner):
 
-    def __init__(self,
-                 func_sysd, func_obs, jac_sysd, jac_obs, jac_sysd_u,
-                 noise_obs, noise_sysd,
-                 Qlarge, L, Q, R,
-                 planning_horizon=15):
+class BLQR(pomdp_py.Planner):
+    def __init__(
+        self,
+        func_sysd,
+        func_obs,
+        jac_sysd,
+        jac_obs,
+        jac_sysd_u,
+        noise_obs,
+        noise_sysd,
+        Qlarge,
+        L,
+        Q,
+        R,
+        planning_horizon=15,
+    ):
         """To initialize the planenr of B-LQR, one needs to supply parameters
         that describe the underlying linear gaussian system, and also
         matrices that describe the cost function. Note that math symbols
-        (e.g. xt, ut) are vectors or matrices represented as np.arrays. 
+        (e.g. xt, ut) are vectors or matrices represented as np.arrays.
         The B-LQR cost function (Eq.14 in the paper):
 
         :math:`J(b_{t:T},u_{t:T}) = \bar{m}_T^TQ_{large}\bar{m}_T+\bar{s}_T^T\Lambda\bar{s}_T + \sum_{k=t}^{T-1}\bar{m}_k^TQ\bar{m}_k+\bar{u}_t^TR\bar{u}_t`
-                    
+
         Args:
             func_sysd (function): f: (xt, ut) -> xt+1
             func_obs (function): g: (xt) -> zt
@@ -49,7 +59,7 @@ def __init__(self,
         self._jac_obs = jac_obs
         self._jac_sysd = jac_sysd_u
         self._noise_obs = noise_obs
-        self._noise_sysd = noise_sysd        
+        self._noise_sysd = noise_sysd
         self._Qlarge = Qlarge
         self._L = L  # Lambda
         self._Q = Q
@@ -58,7 +68,6 @@ def __init__(self,
         self._dim_state = self._Q.shape[0]
         self._dim_control = self._R.shape[0]
 
-        
     def ekf_update_mlo(self, bt, ut):
         """
         Performs the ekf belief update assuming maximum likelihood observation.
@@ -79,13 +88,15 @@ def ekf_update_mlo(self, bt, ut):
                 observation function. This corresponds to Wt in equation 13.
         """
         # TODO: FIX
-        mt, Cov_t = bt        
+        mt, Cov_t = bt
         At = self._jac_sysd(mt, ut)
         Ct = self._jac_obs(mt)  # based on maximum likelihood observation
         Wt = self._noise_obs(mt).cov
         Gat = np.dot(np.dot(At, Cov_t), At.transpose())  # Gamma_t = At*Cov_t*At^T
         CGC_W_inv = np.linalg.inv(np.dot(np.dot(Ct, Gat), Ct.transpose()) + Wt)
-        Cov_next = Gat - np.dot(np.dot(np.dot(np.dot(Gat, Ct.transpose()), CGC_W_inv), Ct), Gat)
+        Cov_next = Gat - np.dot(
+            np.dot(np.dot(np.dot(Gat, Ct.transpose()), CGC_W_inv), Ct), Gat
+        )
         m_next = self._func_sysd(mt, ut) + self._noise_sysd(mt).random()
         return (m_next, Cov_next)
 
@@ -94,13 +105,13 @@ def _b_u_seg(self, x, i):
         as a tuple (m_i, Covi, u_i)"""
         d = self._dim_state
         c = self._dim_control
-        start = i*(d + (d*d) + c)
-        end = (i+1)*(d + (d*d) + c)
-        m_i_end = i*(d + (d*d) + c) + d
-        Covi_end = i*(d + (d*d) + c) + d + d*d
-        u_end = i*(d + (d*d) + c) + d + d*d + c
+        start = i * (d + (d * d) + c)
+        end = (i + 1) * (d + (d * d) + c)
+        m_i_end = i * (d + (d * d) + c) + d
+        Covi_end = i * (d + (d * d) + c) + d + d * d
+        u_end = i * (d + (d * d) + c) + d + d * d + c
         return x[start:m_i_end], x[m_i_end:Covi_end], x[Covi_end:u_end]
-    
+
     def _control_max_constraint(self, x, i, max_val):
         _, _, u_i = self._b_u_seg(x, i)
         return max_val - u_i
@@ -110,9 +121,9 @@ def _control_min_constraint(self, x, i, min_val):
         return u_i - min_val
 
     def _mean_final_constraint(self, x, m_des, num_segments):
-        m_k, _, _ = self._b_u_seg(x, num_segments-1)
+        m_k, _, _ = self._b_u_seg(x, num_segments - 1)
         return m_k - m_des
-    
+
     def _mean_start_constraint(self, x, m_0):
         m_k, _, _ = self._b_u_seg(x, 0)
         return m_k - m_0
@@ -121,14 +132,16 @@ def _belief_constraint(self, x, i, num_segments):
         m_i, s_i, u_i = self._b_u_seg(x, i)
         Cov_i = s_i.reshape(self._dim_state, self._dim_state).transpose()
         m_ip1, Cov_ip1 = self.integrate_belief_segment((m_i, Cov_i), u_i, num_segments)
-        s_ip1 = Cov_ip1.transpose().reshape(-1,)
+        s_ip1 = Cov_ip1.transpose().reshape(
+            -1,
+        )
         b_i_vec = np.append(m_i, s_i)
         b_ip1_vec = np.append(m_ip1, s_ip1)
         return b_i_vec - b_ip1_vec
 
     def integrate_belief_segment(self, b_i, u_i, num_segments):
         """This is to represent equation 18.
-             
+
         phi(b_i', u_i') = F(b_i', u_i') +   sum    F(b_{t+1}, u_i) - F(b_t, u_i)
                                           t in seg
 
@@ -137,16 +150,15 @@ def integrate_belief_segment(self, b_i, u_i, num_segments):
         m_i, Cov_i = b_i
         b_seg = [self.ekf_update_mlo((m_i, Cov_i), u_i)]
         for t in range(1, self._planning_horizon // num_segments):
-            b_seg.append(self.ekf_update_mlo(b_seg[t-1], u_i))
+            b_seg.append(self.ekf_update_mlo(b_seg[t - 1], u_i))
         sum_mean = b_seg[0][0]
         sum_Cov = b_seg[0][1]
         for t in range(len(b_seg)):
             if t + 1 < len(b_seg) - 1:
-                sum_mean += (b_seg[t+1][0] - b_seg[t][0])
-                sum_Cov += (b_seg[t+1][1] - b_seg[t][1])
+                sum_mean += b_seg[t + 1][0] - b_seg[t][0]
+                sum_Cov += b_seg[t + 1][1] - b_seg[t][1]
         return sum_mean, sum_Cov
 
-    
     def _opt_cost_func_seg(self, x, b_des, u_des, num_segments):
         """This will be used as part of the objective function for scipy
         optimizer. Therefore we only take in one input vector, x.  We require
@@ -164,7 +176,7 @@ def _opt_cost_func_seg(self, x, b_des, u_des, num_segments):
         return self.segmented_cost_function(bu_traj, b_des, u_des, num_segments)
 
     def segmented_cost_function(self, bu_traj, b_des, u_des, num_segments):
-        """The cost function in eq 17. 
+        """The cost function in eq 17.
 
         Args:
             b_des (tuple): The desired belief (mT, CovT). This is needed to compute the cost function.
@@ -172,13 +184,17 @@ def segmented_cost_function(self, bu_traj, b_des, u_des, num_segments):
                 If this information is not available, pass in an empty list.
         """
         if len(u_des) > 0 and len(u_des) != num_segments:
-            raise ValueError("The list of desired controls, if provided, must have one control"\
-                             "per segment")
+            raise ValueError(
+                "The list of desired controls, if provided, must have one control"
+                "per segment"
+            )
 
         b_T, u_T = bu_traj[-1]
         m_T, Cov_T = b_T
-        s_T = Cov_T.transpose().reshape(-1,)
-        
+        s_T = Cov_T.transpose().reshape(
+            -1,
+        )
+
         sLs = np.dot(np.dot(s_T.transpose(), self._L), s_T)
         Summation = 0
         for i in range(num_segments):
@@ -190,12 +206,21 @@ def segmented_cost_function(self, bu_traj, b_des, u_des, num_segments):
             else:
                 u_i_des = np.zeros(self._dim_control)
             u_i_diff = u_i - u_i_des
-            Summation += np.dot(np.dot(m_i_diff.transpose(), self._Q), m_i_diff)\
-                + np.dot(np.dot(u_i_diff.transpose(), self._R), u_i_diff)
+            Summation += np.dot(
+                np.dot(m_i_diff.transpose(), self._Q), m_i_diff
+            ) + np.dot(np.dot(u_i_diff.transpose(), self._R), u_i_diff)
         return sLs + Summation
-    
-    def create_plan(self, b_0, b_des, u_init, num_segments=5, control_bounds=None,
-                    opt_options={}, opt_callback=None):
+
+    def create_plan(
+        self,
+        b_0,
+        b_des,
+        u_init,
+        num_segments=5,
+        control_bounds=None,
+        opt_options={},
+        opt_callback=None,
+    ):
         """Solves the SQP problem using direct transcription, and produce belief points
         and controls at segments.
         Reference: https://docs.scipy.org/doc/scipy/reference/tutorial/optimize.html"""
@@ -204,51 +229,66 @@ def create_plan(self, b_0, b_des, u_init, num_segments=5, control_bounds=None,
         b_i = b_0
         for i in range(num_segments):
             m_i, Cov_i = b_i
-            s_i = Cov_i.transpose().reshape(-1,)
+            s_i = Cov_i.transpose().reshape(
+                -1,
+            )
             u_i = u_init[i]
             x_0.extend(m_i)
             x_0.extend(s_i)
             x_0.extend(u_i)
             b_i = self.integrate_belief_segment(b_i, u_i, num_segments)
 
-        # constraints        
+        # constraints
         constraints = []
         ## initial belief constraint
-        cons_start = {'type': 'eq',
-                      'fun': self._mean_start_constraint,
-                      'args': [b_0[0]]}
+        cons_start = {
+            "type": "eq",
+            "fun": self._mean_start_constraint,
+            "args": [b_0[0]],
+        }
         constraints.append(cons_start)
         ## belief dynamics constraints and control bounds
         for i in range(num_segments):
-            if i + 1 < num_segments-1:
-                cons_belief = {'type': 'eq',
-                               'fun': self._belief_constraint,
-                               'args': [i, num_segments]}
+            if i + 1 < num_segments - 1:
+                cons_belief = {
+                    "type": "eq",
+                    "fun": self._belief_constraint,
+                    "args": [i, num_segments],
+                }
                 constraints.append(cons_belief)
 
                 # control bounds
                 if control_bounds is not None:
-                    cons_control_min = {'type': 'ineq',
-                                        'fun': self._control_min_constraint,
-                                        'args': [i, control_bounds[0]]}                
-                    cons_control_max = {'type': 'ineq',
-                                        'fun': self._control_max_constraint,
-                                        'args': [i, control_bounds[1]]}
+                    cons_control_min = {
+                        "type": "ineq",
+                        "fun": self._control_min_constraint,
+                        "args": [i, control_bounds[0]],
+                    }
+                    cons_control_max = {
+                        "type": "ineq",
+                        "fun": self._control_max_constraint,
+                        "args": [i, control_bounds[1]],
+                    }
                     constraints.append(cons_control_min)
                     constraints.append(cons_control_max)
 
         # final belief constraint
-        cons_final = {'type': 'eq',
-                      'fun': self._mean_final_constraint,
-                      'args': [b_des[0], num_segments]}
+        cons_final = {
+            "type": "eq",
+            "fun": self._mean_final_constraint,
+            "args": [b_des[0], num_segments],
+        }
         constraints.append(cons_final)
 
-        opt_res = optimize.minimize(self._opt_cost_func_seg, x_0,
-                                    method="SLSQP",
-                                    args=(b_des, u_init, num_segments),
-                                    constraints=constraints,
-                                    options=opt_options,
-                                    callback=opt_callback)
+        opt_res = optimize.minimize(
+            self._opt_cost_func_seg,
+            x_0,
+            method="SLSQP",
+            args=(b_des, u_init, num_segments),
+            constraints=constraints,
+            options=opt_options,
+            callback=opt_callback,
+        )
         return opt_res
 
     def interpret_sqp_plan(self, opt_res, num_segments):
diff --git a/pomdp_py/algorithms/pomcp.pxd b/pomdp_py/algorithms/pomcp.pxd
index 74212a9b..b3d48fe9 100644
--- a/pomdp_py/algorithms/pomcp.pxd
+++ b/pomdp_py/algorithms/pomcp.pxd
@@ -5,4 +5,3 @@ cdef class VNodeParticles(VNode):
     cdef public Particles belief
 cdef class RootVNodeParticles(RootVNode):
     cdef public Particles belief
-
diff --git a/pomdp_py/algorithms/value_function.py b/pomdp_py/algorithms/value_function.py
index f300b805..7b96c88e 100644
--- a/pomdp_py/algorithms/value_function.py
+++ b/pomdp_py/algorithms/value_function.py
@@ -2,8 +2,10 @@
 for small POMDPs with enumerable S, A, O spaces.  This can also be
 used as an exact value iteration algorithm.
 """
+
 import pomdp_py
 
+
 def value(b, *args, horizon=1):
     """
     Computes the value of a POMDP at belief state b
@@ -18,9 +20,18 @@ def value(b, *args, horizon=1):
         return _value(b, *args, horizon=horizon)
     else:
         pomdp = args[0]
-        return _value(b, pomdp["S"], pomdp["A"], pomdp["Z"],
-                      pomdp["T"], pomdp["O"], pomdp["R"], pomdp["gamma"],
-                      horizon=horizon)
+        return _value(
+            b,
+            pomdp["S"],
+            pomdp["A"],
+            pomdp["Z"],
+            pomdp["T"],
+            pomdp["O"],
+            pomdp["R"],
+            pomdp["gamma"],
+            horizon=horizon,
+        )
+
 
 def _value(b, S, A, Z, T, O, R, gamma, horizon=1):
     """
@@ -44,7 +55,7 @@ def _value(b, S, A, Z, T, O, R, gamma, horizon=1):
     assert type(S) == list, "S must be a list"
     assert type(A) == list, "A must be a list"
     assert type(Z) == list, "Z must be a list"
-    max_qvalue = float('-inf')
+    max_qvalue = float("-inf")
     for a in A:
         # Compute Q(b,a)
         qv = qvalue(b, a, S, A, Z, T, O, R, gamma, horizon=horizon)
@@ -52,6 +63,7 @@ def _value(b, S, A, Z, T, O, R, gamma, horizon=1):
             max_qvalue = qv
     return max_qvalue
 
+
 def qvalue(b, a, S, A, Z, T, O, R, gamma, horizon=1):
     """Compute Q(v,a)"""
     r = expected_reward(b, R, a, T)
@@ -64,13 +76,13 @@ def qvalue(b, a, S, A, Z, T, O, R, gamma, horizon=1):
             # If o has non-zero probability
             if prob_o > 0:
                 next_b = belief_update(b, a, o, T, O)
-                next_value = value(next_b, S, A, Z, T, O, R, gamma,
-                                   horizon=horizon-1)
+                next_value = value(next_b, S, A, Z, T, O, R, gamma, horizon=horizon - 1)
                 value_o = prob_o * next_value
                 expected_future_value += value_o
 
     return r + gamma * expected_future_value
 
+
 def expected_reward(b, R, a, T=None):
     """Returns the expected reward at a given belief"""
     r = 0.0
@@ -82,6 +94,7 @@ def expected_reward(b, R, a, T=None):
             r += b[s] * R.sample(s, a, None)
     return r
 
+
 def belief_observation_model(o, b, a, T, O):
     """Returns the probability of Pr(o|b,a)"""
     prob = 0.0
@@ -92,6 +105,7 @@ def belief_observation_model(o, b, a, T, O):
             prob += obsrv_prob * trans_prob * b[s]
     return prob
 
+
 def belief_update(b, a, o, T, O):
     """Returns the updated belief of `b` given
     action `a` and observation `o`."""
@@ -116,9 +130,14 @@ def belief_update(b, a, o, T, O):
 def _create_case(noise=0.15, init_state="tiger-left"):
     """Create a tiger problem instance with uniform belief"""
     from pomdp_problems.tiger.tiger_problem import TigerProblem, TigerState
-    tiger = TigerProblem(noise, TigerState(init_state),
-                         pomdp_py.Histogram({TigerState("tiger-left"): 0.5,
-                                             TigerState("tiger-right"): 0.5}))
+
+    tiger = TigerProblem(
+        noise,
+        TigerState(init_state),
+        pomdp_py.Histogram(
+            {TigerState("tiger-left"): 0.5, TigerState("tiger-right"): 0.5}
+        ),
+    )
     T = tiger.agent.transition_model
     O = tiger.agent.observation_model
     S = list(T.get_all_states())
@@ -131,9 +150,9 @@ def _create_case(noise=0.15, init_state="tiger-left"):
     s0 = tiger.env.state
     return b0, s0, S, A, Z, T, O, R, gamma
 
+
 def _test_basic():
-    b0, s0, S, A, Z, T, O, R, gamma = _create_case(noise=0.15,
-                                                   init_state="tiger-left")
+    b0, s0, S, A, Z, T, O, R, gamma = _create_case(noise=0.15, init_state="tiger-left")
     horizon = 3
     qvs = {}
     for a in A:
@@ -145,8 +164,7 @@ def _test_basic():
 
 
 def _test_planning():
-    b0, s0, S, A, Z, T, O, R, gamma = _create_case(noise=0.15,
-                                                   init_state="tiger-left")
+    b0, s0, S, A, Z, T, O, R, gamma = _create_case(noise=0.15, init_state="tiger-left")
     horizon = 3
 
     # Do planning with qvalue
@@ -156,7 +174,7 @@ def _test_planning():
     _actions = []
     for step in range(10):
         a_best = None
-        maxq = float('-inf')
+        maxq = float("-inf")
         for a in A:
             q = qvalue(b, a, S, A, Z, T, O, R, gamma, horizon=horizon)
             if maxq < q:
@@ -165,8 +183,11 @@ def _test_planning():
         sp = T.sample(s, a_best)
         o = O.sample(sp, a_best)
         b = belief_update(b, a_best, o, T, O)
-        print("[Step {}] action={}, value={}, observation={},  belief={}"\
-              .format(step, a_best, maxq, o, b))
+        print(
+            "[Step {}] action={}, value={}, observation={},  belief={}".format(
+                step, a_best, maxq, o, b
+            )
+        )
         _actions.append(a_best.name)
     assert "open-right" in _actions
     print("Pass.")
diff --git a/pomdp_py/framework/__init__.py b/pomdp_py/framework/__init__.py
index 8b137891..e69de29b 100644
--- a/pomdp_py/framework/__init__.py
+++ b/pomdp_py/framework/__init__.py
@@ -1 +0,0 @@
-
diff --git a/pomdp_py/framework/planner.pxd b/pomdp_py/framework/planner.pxd
index f46a296f..86e56397 100644
--- a/pomdp_py/framework/planner.pxd
+++ b/pomdp_py/framework/planner.pxd
@@ -4,4 +4,3 @@ cdef class Planner:
 
     cpdef public plan(self, Agent agent)
     cpdef public update(self, Agent agent, Action real_action, Observation real_observation)
-    
diff --git a/pomdp_py/problems/light_dark/agent/belief.py b/pomdp_py/problems/light_dark/agent/belief.py
index 689de8a9..409dacec 100644
--- a/pomdp_py/problems/light_dark/agent/belief.py
+++ b/pomdp_py/problems/light_dark/agent/belief.py
@@ -30,7 +30,7 @@
 
 #     def __init__(self, mean, sigma):
 #         """initialize isotropic Gaussian distribution.
-        
+
 #         Args:
 #             mean (list): 2d vector of robot position
 #             sigma (float): sigma defining the standard deviation of the Gaussian.
@@ -41,12 +41,12 @@
 #         super().__init__(mean,
 #                          [[sigma**2, 0,
 #                            0, sigma**2]])
-        
+
 #     @property
 #     def sigma(self):
 #         return self._sigma
 
-    
+
 # class BeliefState(pomdp_py.State):
 #     """Belief state;
 
@@ -68,19 +68,18 @@
 #     @property
 #     def belief(self):
 #         return self._belief
-        
+
 #     def __hash__(self):
 #         return self._hashcode
-    
+
 #     def __eq__(self, other):
 #         if isinstance(other, BeliefState):
 #             return self._belief == other.belief
 #         else:
 #             return False
-        
+
 #     def __str__(self):
 #         return self.__repr__()
-    
+
 #     def __repr__(self):
 #         return "BeliefState(%s)" % (str(self._belief.mean, self._belief.sigma))
-        
diff --git a/pomdp_py/problems/light_dark/domain/action.py b/pomdp_py/problems/light_dark/domain/action.py
index 575e478e..7c511161 100644
--- a/pomdp_py/problems/light_dark/domain/action.py
+++ b/pomdp_py/problems/light_dark/domain/action.py
@@ -2,16 +2,19 @@
 
 Origin: Belief space planning assuming maximum likelihood observations
 
-Action space: 
+Action space:
 
     :math:`U\subseteq\mathbb{R}^2`. Quote from the paper: "The robot is
         modeled as a first-order system such that the robot velocity is determined
         by the control actions, :math:`u\in\mathbb{R}^2`.
 """
+
 import pomdp_py
 
+
 class Action(pomdp_py.Action):
     """The action is a vector of velocities"""
+
     def __init__(self, control):
         """
         Initializes a state in light dark domain.
@@ -20,21 +23,20 @@ def __init__(self, control):
             control (tuple): velocity
         """
         if len(control) != 2:
-            raise ValueError("Action control must be a vector of length 2")        
+            raise ValueError("Action control must be a vector of length 2")
         self.control = control
 
     def __hash__(self):
         return hash(self.control)
-    
+
     def __eq__(self, other):
         if isinstance(other, Action):
             return self.control == other.control
         else:
             return False
-        
+
     def __str__(self):
         return self.__repr__()
-    
+
     def __repr__(self):
         return "Action(%s)" % (str(self.control))
-        
diff --git a/pomdp_py/problems/light_dark/domain/observation.py b/pomdp_py/problems/light_dark/domain/observation.py
index 79157252..5ebfd469 100644
--- a/pomdp_py/problems/light_dark/domain/observation.py
+++ b/pomdp_py/problems/light_dark/domain/observation.py
@@ -1,17 +1,19 @@
 import pomdp_py
 
+
 class Observation(pomdp_py.Observation):
     """Defines the Observation for the continuous light-dark domain;
 
-    Observation space: 
+    Observation space:
 
         :math:`\Omega\subseteq\mathbb{R}^2` the observation of the robot is
             an estimate of the robot position :math:`g(x_t)\in\Omega`.
 
     """
+
     # the number of decimals to round up an observation when it is discrete.
-    PRECISION=2
-    
+    PRECISION = 2
+
     def __init__(self, position, discrete=False):
         """
         Initializes a observation in light dark domain.
@@ -25,23 +27,25 @@ def __init__(self, position, discrete=False):
         if self._discrete:
             self.position = position
         else:
-            self.position = (round(position[0], Observation.PRECISION),
-                             round(position[1], Observation.PRECISION))
+            self.position = (
+                round(position[0], Observation.PRECISION),
+                round(position[1], Observation.PRECISION),
+            )
 
     def discretize(self):
         return Observation(self.position, discrete=True)
 
     def __hash__(self):
         return hash(self.position)
-    
+
     def __eq__(self, other):
         if isinstance(other, Observation):
             return self.position == other.position
         else:
             return False
-        
+
     def __str__(self):
         return self.__repr__()
-    
+
     def __repr__(self):
         return "Observation(%s)" % (str(self.position))
diff --git a/pomdp_py/problems/light_dark/domain/state.py b/pomdp_py/problems/light_dark/domain/state.py
index 07e29e4b..a73d3cb3 100644
--- a/pomdp_py/problems/light_dark/domain/state.py
+++ b/pomdp_py/problems/light_dark/domain/state.py
@@ -2,15 +2,18 @@
 
 Origin: Belief space planning assuming maximum likelihood observations
 
-State space: 
+State space:
 
     :math:`X\subseteq\mathbb{R}^2` the state of the robot
 """
+
 import pomdp_py
 import numpy as np
 
+
 class State(pomdp_py.State):
     """The state of the problem is just the robot position"""
+
     def __init__(self, position):
         """
         Initializes a state in light dark domain.
@@ -24,15 +27,15 @@ def __init__(self, position):
 
     def __hash__(self):
         return hash(self.position)
-    
+
     def __eq__(self, other):
         if isinstance(other, State):
             return self.position == other.position
         else:
             return False
-        
+
     def __str__(self):
         return self.__repr__()
-    
+
     def __repr__(self):
         return "State(%s)" % (str(self.position))
diff --git a/pomdp_py/problems/light_dark/env/env.py b/pomdp_py/problems/light_dark/env/env.py
index da4930dd..d88942bc 100644
--- a/pomdp_py/problems/light_dark/env/env.py
+++ b/pomdp_py/problems/light_dark/env/env.py
@@ -7,13 +7,9 @@
 import pomdp_py.problems.light_dark as ld
 import numpy as np
 
-class LightDarkEnvironment(pomdp_py.Environment):
 
-    def __init__(self,
-                 init_state,
-                 light,
-                 const,
-                 reward_model=None):
+class LightDarkEnvironment(pomdp_py.Environment):
+    def __init__(self, init_state, light, const, reward_model=None):
         """
         Args:
             init_state (light_dark.domain.State or np.ndarray):
@@ -33,9 +29,7 @@ def __init__(self,
         transition_model = ld.TransitionModel()
         if type(init_state) == np.ndarray:
             init_state = ld.State(init_state)
-        super().__init__(init_state,
-                         transition_model,
-                         reward_model)
+        super().__init__(init_state, transition_model, reward_model)
 
     @property
     def light(self):
diff --git a/pomdp_py/problems/light_dark/env/plotting.py b/pomdp_py/problems/light_dark/env/plotting.py
index f347dac0..4728b9b1 100644
--- a/pomdp_py/problems/light_dark/env/plotting.py
+++ b/pomdp_py/problems/light_dark/env/plotting.py
@@ -1,9 +1,21 @@
 """Plotting utilties"""
+
 import matplotlib.pyplot as plt
 
-def plot_points(xvals, yvals, color=None,
-                size=1.5, label=None, connected=True, style="--", linewidth=1.5,
-                xlabel='x', ylabel='f(x)', loc="lower right"):
+
+def plot_points(
+    xvals,
+    yvals,
+    color=None,
+    size=1.5,
+    label=None,
+    connected=True,
+    style="--",
+    linewidth=1.5,
+    xlabel="x",
+    ylabel="f(x)",
+    loc="lower right",
+):
     if not connected:
         plt.scatter(xvals, yvals, s=size, c=color, label=label)
     else:
@@ -27,34 +39,71 @@ def plot_polygons(verts, colors, ax=None, edgecolor=None):
     Creates a PolygonCollection object in the axis `ax`."""
     if ax is None:
         fig = plt.gcf()
-        ax = fig.add_subplot(1,1,1)
+        ax = fig.add_subplot(1, 1, 1)
     pc = PolyCollection(verts)
     pc.set_edgecolor(edgecolor)
     pc.set_facecolor(colors)
     ax.add_collection(pc)
-    ax.set_xlabel('X axis')
-    ax.set_ylabel('Y axis')
+    ax.set_xlabel("X axis")
+    ax.set_ylabel("Y axis")
 
 
-def plot_line(ax, p1, p2,
-              linewidth=1, color='black', zorder=0, alpha=1.0, linestyle="-"):
+def plot_line(
+    ax, p1, p2, linewidth=1, color="black", zorder=0, alpha=1.0, linestyle="-"
+):
     p1x, p1y = p1
     p2x, p2y = p2
-    line = lines.Line2D([p1x, p2x], [p1y, p2y],
-                        linewidth=linewidth, color=color, zorder=zorder,
-                        alpha=alpha, linestyle=linestyle)
+    line = lines.Line2D(
+        [p1x, p2x],
+        [p1y, p2y],
+        linewidth=linewidth,
+        color=color,
+        zorder=zorder,
+        alpha=alpha,
+        linestyle=linestyle,
+    )
     ax.add_line(line)
 
-def plot_circle(ax, center, radius, color="blue",
-                fill=False, zorder=0, linewidth=0,
-                edgecolor=None, label_text=None,
-                alpha=1.0, text_color="white"):
+
+def plot_circle(
+    ax,
+    center,
+    radius,
+    color="blue",
+    fill=False,
+    zorder=0,
+    linewidth=0,
+    edgecolor=None,
+    label_text=None,
+    alpha=1.0,
+    text_color="white",
+):
     px, py = center
-    circ = plt.Circle((px, py), radius, facecolor=color, fill=fill,
-                      zorder=zorder, linewidth=linewidth, edgecolor=edgecolor, alpha=alpha)
+    circ = plt.Circle(
+        (px, py),
+        radius,
+        facecolor=color,
+        fill=fill,
+        zorder=zorder,
+        linewidth=linewidth,
+        edgecolor=edgecolor,
+        alpha=alpha,
+    )
     ax.add_artist(circ)
     if label_text:
-        text = ax.text(px, py, label_text, color=text_color,
-                        ha='center', va='center', size=7, weight='bold')
-        text.set_path_effects([path_effects.Stroke(linewidth=1, foreground='black'),
-                               path_effects.Normal()])
+        text = ax.text(
+            px,
+            py,
+            label_text,
+            color=text_color,
+            ha="center",
+            va="center",
+            size=7,
+            weight="bold",
+        )
+        text.set_path_effects(
+            [
+                path_effects.Stroke(linewidth=1, foreground="black"),
+                path_effects.Normal(),
+            ]
+        )
diff --git a/pomdp_py/problems/light_dark/env/visual.py b/pomdp_py/problems/light_dark/env/visual.py
index 0e497a5d..625e1ff2 100644
--- a/pomdp_py/problems/light_dark/env/visual.py
+++ b/pomdp_py/problems/light_dark/env/visual.py
@@ -1,10 +1,12 @@
 """Plot the light dark environment"""
+
 import matplotlib.pyplot as plt
 from matplotlib.collections import PolyCollection
 import pomdp_py.problems.light_dark as ld
 from pomdp_py.utils import plotting, colors
 from pomdp_py.utils.misc import remap
 
+
 class LightDarkViz:
     """This class deals with visualizing a light dark domain"""
 
@@ -22,7 +24,7 @@ def __init__(self, env, x_range, y_range, res):
         self._x_range = x_range
         self._y_range = y_range
         fig = plt.gcf()
-        self._ax = fig.add_subplot(1,1,1)
+        self._ax = fig.add_subplot(1, 1, 1)
         self._goal_pos = None
         self._m_0 = None  # initial belief pose
 
@@ -40,10 +42,12 @@ def set_goal(self, goal_pos):
     def set_initial_belief_pos(self, m_0):
         self._m_0 = m_0
 
-    def plot(self,
-             path_colors={0: [(0,0,0), (0,0,254)]},
-             path_styles={0: "--"},
-             path_widths={0: 1}):
+    def plot(
+        self,
+        path_colors={0: [(0, 0, 0), (0, 0, 254)]},
+        path_styles={0: "--"},
+        path_widths={0: 1},
+    ):
         self._plot_gradient()
         self._plot_path(path_colors, path_styles, path_widths)
         self._plot_robot()
@@ -52,40 +56,53 @@ def plot(self,
 
     def _plot_robot(self):
         cur_pos = self._env.state.position
-        plotting.plot_circle(self._ax, cur_pos,
-                         0.25, # tentative
-                         color="black", fill=False,
-                         linewidth=1, edgecolor="black",
-                         zorder=3)
+        plotting.plot_circle(
+            self._ax,
+            cur_pos,
+            0.25,  # tentative
+            color="black",
+            fill=False,
+            linewidth=1,
+            edgecolor="black",
+            zorder=3,
+        )
 
     def _plot_initial_belief_pos(self):
         if self._m_0 is not None:
-            plotting.plot_circle(self._ax, self._m_0,
-                                 0.25, # tentative
-                                 color="black", fill=False,
-                                 linewidth=1, edgecolor="black",
-                                 zorder=3)
+            plotting.plot_circle(
+                self._ax,
+                self._m_0,
+                0.25,  # tentative
+                color="black",
+                fill=False,
+                linewidth=1,
+                edgecolor="black",
+                zorder=3,
+            )
 
     def _plot_goal(self):
         if self._goal_pos is not None:
-            plotting.plot_circle(self._ax,
-                                 self._goal_pos,
-                                 0.25,  # tentative
-                                 linewidth=1, edgecolor="blue",
-                                 zorder=3)
+            plotting.plot_circle(
+                self._ax,
+                self._goal_pos,
+                0.25,  # tentative
+                linewidth=1,
+                edgecolor="blue",
+                zorder=3,
+            )
 
     def _plot_path(self, colors, styles, linewidths):
         """Plot robot path"""
         # Plot line segments
         for path in self._log_paths:
             if path not in colors:
-                path_color = [(0,0,0)] * len(self._log_paths[path])
+                path_color = [(0, 0, 0)] * len(self._log_paths[path])
             else:
                 if len(colors[path]) == 2:
                     c1, c2 = colors[path]
-                    path_color = colors.linear_color_gradient(c1, c2,
-                                                              len(self._log_paths[path]),
-                                                              normalize=True)
+                    path_color = colors.linear_color_gradient(
+                        c1, c2, len(self._log_paths[path]), normalize=True
+                    )
                 else:
                     path_color = [colors[path]] * len(self._log_paths[path])
 
@@ -100,13 +117,22 @@ def _plot_path(self, colors, styles, linewidths):
                 path_width = linewidths[path]
 
             for i in range(1, len(self._log_paths[path])):
-                p1 = self._log_paths[path][i-1]
+                p1 = self._log_paths[path][i - 1]
                 p2 = self._log_paths[path][i]
                 try:
-                    plotting.plot_line(self._ax, p1, p2, color=path_color[i],
-                                   linestyle=path_style, zorder=2, linewidth=path_width)
+                    plotting.plot_line(
+                        self._ax,
+                        p1,
+                        p2,
+                        color=path_color[i],
+                        linestyle=path_style,
+                        zorder=2,
+                        linewidth=path_width,
+                    )
                 except Exception:
-                    import pdb; pdb.set_trace()
+                    import pdb
+
+                    pdb.set_trace()
 
     def _plot_gradient(self):
         """display the light dark domain."""
@@ -114,8 +140,10 @@ def _plot_gradient(self):
         ymin, ymax = self._y_range
         # Note that higher brightness has lower brightness value
         hi_brightness = self._env.const
-        lo_brightness = max(0.5 * (self._env.light - xmin)**2 + self._env.const,
-                            0.5 * (self._env.light - xmax)**2 + self._env.const)
+        lo_brightness = max(
+            0.5 * (self._env.light - xmin) ** 2 + self._env.const,
+            0.5 * (self._env.light - xmax) ** 2 + self._env.const,
+        )
         # Plot a bunch of rectangular strips along the x axis
         # Check out: https://stackoverflow.com/questions/10550477
         x = xmin
@@ -125,9 +153,11 @@ def _plot_gradient(self):
             x_next = x + self._res
             verts.append([(x, ymin), (x_next, ymin), (x_next, ymax), (x, ymax)])
             # compute brightness based on equation in the paper
-            brightness = 0.5 * (self._env.light - x)**2 + self._env.const
+            brightness = 0.5 * (self._env.light - x) ** 2 + self._env.const
             # map brightness to a grayscale color
-            grayscale = int(round(remap(brightness, hi_brightness, lo_brightness, 255, 0)))
+            grayscale = int(
+                round(remap(brightness, hi_brightness, lo_brightness, 255, 0))
+            )
             grayscale_hex = colors.rgb_to_hex((grayscale, grayscale, grayscale))
             colors.append(grayscale_hex)
             x = x_next
@@ -135,15 +165,15 @@ def _plot_gradient(self):
         self._ax.set_xlim(xmin, xmax)
         self._ax.set_ylim(ymin, ymax)
 
+
 if __name__ == "__main__":
-    env = ld.LightDarkEnvironment(ld.State((0.5, 2.5)),  # init state
-                                  (1.5, -1),  # goal pose
-                                  5,  # light
-                                  1)  # const
+    env = ld.LightDarkEnvironment(
+        ld.State((0.5, 2.5)), (1.5, -1), 5, 1  # init state  # goal pose  # light
+    )  # const
     viz = LightDarkViz(env, (-1, 7), (-2, 4), 0.1)
-    viz.log_position((5,2))
-    viz.log_position((5,0))
-    viz.log_position((4,-1))
+    viz.log_position((5, 2))
+    viz.log_position((5, 0))
+    viz.log_position((4, -1))
     viz.plot()
 
     plt.show()
diff --git a/pomdp_py/problems/light_dark/models/observation_model.py b/pomdp_py/problems/light_dark/models/observation_model.py
index ce181252..e393eece 100644
--- a/pomdp_py/problems/light_dark/models/observation_model.py
+++ b/pomdp_py/problems/light_dark/models/observation_model.py
@@ -14,13 +14,14 @@
     state (i.e. robot position). The number 5 indicates the x-coordinate
     of the light bar as shown in the figure (Fig.1 of the paper).
 """
+
 import pomdp_py
 import copy
 import numpy as np
 from ..domain.observation import *
 
-class ObservationModel(pomdp_py.ObservationModel):
 
+class ObservationModel(pomdp_py.ObservationModel):
     def __init__(self, light, const):
         """
         `light` and `const` are parameters in
@@ -34,12 +35,11 @@ def __init__(self, light, const):
         self._const = const
 
     def _compute_variance(self, pos):
-        return 0.5 * (self._light - pos[0])**2 + self._const
+        return 0.5 * (self._light - pos[0]) ** 2 + self._const
 
     def noise_covariance(self, pos):
         variance = self._compute_variance(pos)
-        return np.array([[variance, 0],
-                         [0, variance]])
+        return np.array([[variance, 0], [0, variance]])
 
     def probability(self, observation, next_state, action):
         """
@@ -50,11 +50,11 @@ def probability(self, observation, next_state, action):
         if self._discrete:
             observation = observation.discretize()
         variance = self._compute_variance(next_state.position)
-        gaussian_noise = pomdp_py.Gaussian([0,0],
-                                           [[variance, 0],
-                                            [0, variance]])
-        omega = (observation.position[0] - next_state.position[0],
-                 observation.position[1] - next_state.position[1])
+        gaussian_noise = pomdp_py.Gaussian([0, 0], [[variance, 0], [0, variance]])
+        omega = (
+            observation.position[0] - next_state.position[0],
+            observation.position[1] - next_state.position[1],
+        )
         return gaussian_noise[omega]
 
     def sample(self, next_state, action, argmax=False):
@@ -62,43 +62,40 @@ def sample(self, next_state, action, argmax=False):
         # Sample a position shift according to the gaussian noise.
         obs_pos = self.func(next_state.position, mpe=argmax)
         return Observation(tuple(obs_pos))
-        
+
     def argmax(self, next_state, action):
         return self.sample(next_state, action, argmax=True)
 
     def func(self):
         def g(xt, mpe=False):
             variance = self._compute_variance(xt)
-            gaussian_noise = pomdp_py.Gaussian([0,0],
-                                               [[variance, 0],
-                                                [0, variance]])
+            gaussian_noise = pomdp_py.Gaussian([0, 0], [[variance, 0], [0, variance]])
             if mpe:
                 omega = gaussian_noise.mpe()
             else:
                 omega = gaussian_noise.random()
-            return np.array([xt[0] + omega[0],
-                             xt[1] + omega[1]])
+            return np.array([xt[0] + omega[0], xt[1] + omega[1]])
+
         return g
 
     def jac_dx(self):
         def dgdx(mt):
             variance = self._compute_variance(mt)
-            gaussian_noise = pomdp_py.Gaussian([0,0],
-                                               [[variance, 0],
-                                                [0, variance]])
+            gaussian_noise = pomdp_py.Gaussian([0, 0], [[variance, 0], [0, variance]])
             omega = gaussian_noise.random()
             # manually compute the jacobian of d(x + omega)/dx
-            return np.array([[omega[0], mt[1] + omega[1]],
-                             [mt[0] + omega[0], omega[1]]])
+            return np.array(
+                [[omega[0], mt[1] + omega[1]], [mt[0] + omega[0], omega[1]]]
+            )
+
         return dgdx
 
     def func_noise(self):
         """Returns a function that returns a state-dependent Gaussian noise."""
+
         def fn(mt):
             variance = self._compute_variance(mt)
-            gaussian_noise = pomdp_py.Gaussian([0,0],
-                                               [[variance, 0],
-                                                [0, variance]])
+            gaussian_noise = pomdp_py.Gaussian([0, 0], [[variance, 0], [0, variance]])
             return gaussian_noise
+
         return fn
-        
diff --git a/pomdp_py/problems/light_dark/models/transition_model.py b/pomdp_py/problems/light_dark/models/transition_model.py
index bbf52288..081c523b 100644
--- a/pomdp_py/problems/light_dark/models/transition_model.py
+++ b/pomdp_py/problems/light_dark/models/transition_model.py
@@ -8,6 +8,7 @@
     :math:`f(x_t,u_t)=x_t+u`. This means the transition dynamics is
     deterministic.
 """
+
 import pomdp_py
 import copy
 import numpy as np
@@ -17,6 +18,7 @@ class TransitionModel(pomdp_py.TransitionModel):
     """
     The underlying deterministic system dynamics
     """
+
     def __init__(self, epsilon=1e-9):
         self._epsilon = epsilon
 
@@ -43,34 +45,37 @@ def func(self):
         """Returns the function of the underlying system dynamics.
         The function is: (xt, ut) -> xt+1 where xt, ut, xt+1 are
         all numpy arrays."""
+
         def f(xt, ut):
-            return np.array([xt[0] + ut[0],
-                             xt[1] + ut[1]])
+            return np.array([xt[0] + ut[0], xt[1] + ut[1]])
+
         return f
 
     def jac_dx(self):
         """Returns the function of the jacobian of the system dynamics
         function with respect to the state vector mt: (mt, ut) -> At"""
+
         def dfdx(mt, ut):
             # The result of computing the jacobian by hand
-            return np.array([[ut[0], mt[1] + ut[1]],
-                             [mt[0] + ut[0], ut[1]]])
+            return np.array([[ut[0], mt[1] + ut[1]], [mt[0] + ut[0], ut[1]]])
+
         return dfdx
 
     def jac_du(self):
         """Returns the function of the jacobian of the system dynamics
         function with respect to the state vector mt: (mt, ut) -> Bt"""
+
         def dfdu(mt, ut):
             # The result of computing the jacobian by hand
-            return np.array([[mt[0], mt[1] + ut[1]],
-                             [mt[0] + ut[0], mt[1]]])
+            return np.array([[mt[0], mt[1] + ut[1]], [mt[0] + ut[0], mt[1]]])
+
         return dfdu
-    
+
     def func_noise(self, var_sysd=1e-9):
         """Returns a function that returns a state-dependent Gaussian noise."""
+
         def fn(mt):
-            gaussian_noise = pomdp_py.Gaussian([0,0],
-                                               [[var_sysd, 0],
-                                                [0, var_sysd]])
+            gaussian_noise = pomdp_py.Gaussian([0, 0], [[var_sysd, 0], [0, var_sysd]])
             return gaussian_noise
+
         return fn
diff --git a/pomdp_py/problems/light_dark/problem_discrete.py b/pomdp_py/problems/light_dark/problem_discrete.py
index 08275a3f..051b527f 100644
--- a/pomdp_py/problems/light_dark/problem_discrete.py
+++ b/pomdp_py/problems/light_dark/problem_discrete.py
@@ -1,4 +1,5 @@
 import pomdp_py
 
+
 class DiscreteLightDark(pomdp_py.POMDP):
     pass
diff --git a/pomdp_py/problems/light_dark/test.py b/pomdp_py/problems/light_dark/test.py
index 48137926..99210de1 100644
--- a/pomdp_py/problems/light_dark/test.py
+++ b/pomdp_py/problems/light_dark/test.py
@@ -23,9 +23,7 @@
 var_sysd = 1e-9
 
 # Environment.
-env = ld.LightDarkEnvironment(x_0,  # init state
-                              light,  # light
-                              const)  # const
+env = ld.LightDarkEnvironment(x_0, light, const)  # init state  # light  # const
 obsmodel = ld.ObservationModel(light, const)
 
 func_sysd = env.transition_model.func()
@@ -37,28 +35,32 @@
 noise_sysd = env.transition_model.func_noise(var_sysd)
 
 L = 200
-Q = np.array([[0.5, 0],
-              [0, 0.5]])
-R = np.array([[0.5, 0],
-              [0, 0.5]])
-
-b_des = (goal_pos,
-         np.array([[1e-6, 0.0],
-                   [0.0, 1e-6]]))
+Q = np.array([[0.5, 0], [0, 0.5]])
+R = np.array([[0.5, 0], [0, 0.5]])
+
+b_des = (goal_pos, np.array([[1e-6, 0.0], [0.0, 1e-6]]))
 # u_des = [0.5*np.random.rand(2) for _ in range(num_segments)]
 u_des = [[0.0, 0.0] for _ in range(num_segments)]
 
-blqr = pomdp_py.BLQR(func_sysd, func_obs, jac_sysd, jac_obs, jac_sysd_u,
-                     noise_obs, noise_sysd,
-                     None, L, Q, R,
-                     planning_horizon=planning_horizon)
+blqr = pomdp_py.BLQR(
+    func_sysd,
+    func_obs,
+    jac_sysd,
+    jac_obs,
+    jac_sysd_u,
+    noise_obs,
+    noise_sysd,
+    None,
+    L,
+    Q,
+    R,
+    planning_horizon=planning_horizon,
+)
 
 
 def manual_test(blqr):
     # Initial and final belief states.
-    b_0 = (np.array([2.0, 2.0]),
-           np.array([[5.0, 0.0],
-                     [0.0, 5.0]]))
+    b_0 = (np.array([2.0, 2.0]), np.array([[5.0, 0.0], [0.0, 5.0]]))
     print("Path through the light:")
     print(b_0[0])
     print(b_0[1])
@@ -91,11 +93,8 @@ def manual_test(blqr):
         print(b_1[0])
         print(b_1[1])
 
-
     # Initial and final belief states.
-    b_0 = (np.array([2.0, 2.0]),
-           np.array([[5.0, 0.0],
-                     [0.0, 5.0]]))
+    b_0 = (np.array([2.0, 2.0]), np.array([[5.0, 0.0], [0.0, 5.0]]))
     print("Path directly to goal")
     print(b_0[0])
     print(b_0[1])
@@ -109,15 +108,17 @@ def manual_test(blqr):
 
     ####### FOR DEBUGGING
     # traj through light
-    u_traj_light = [[1.0, 0.0],
-                    [1.0, 0.0],
-                    [0.0, -1.0],
-                    [0.0, -1.0],
-                    [-1.0, 0.0],
-                    [-1.0, 0.0],
-                    [-1.0, 0.0],
-                    [-1.0, 0.0],
-                    [-0.99, 0.09]]
+    u_traj_light = [
+        [1.0, 0.0],
+        [1.0, 0.0],
+        [0.0, -1.0],
+        [0.0, -1.0],
+        [-1.0, 0.0],
+        [-1.0, 0.0],
+        [-1.0, 0.0],
+        [-1.0, 0.0],
+        [-0.99, 0.09],
+    ]
     b_t = b_0
     b_traj_light = [b_t]
     for t in range(len(u_traj_light)):
@@ -125,10 +126,12 @@ def manual_test(blqr):
         b_tp1 = blqr.ekf_update_mlo(b_t, u_t)
         b_traj_light.append(b_tp1)
         b_t = b_tp1
-    bu_traj_light = [(b_traj_light[t], np.array(u_traj_light[t])) for t in range(len(u_traj_light))]
+    bu_traj_light = [
+        (b_traj_light[t], np.array(u_traj_light[t])) for t in range(len(u_traj_light))
+    ]
 
     # traj not through light
-    u_traj_dark = [[-1., -1.], [-1., -1.], [-1., -1.]]
+    u_traj_dark = [[-1.0, -1.0], [-1.0, -1.0], [-1.0, -1.0]]
     b_t = b_0
     b_traj_dark = [b_t]
     for t in range(len(u_traj_dark)):
@@ -136,18 +139,23 @@ def manual_test(blqr):
         b_tp1 = blqr.ekf_update_mlo(b_t, u_t)
         b_traj_dark.append(b_tp1)
         b_t = b_tp1
-    bu_traj_dark = [(b_traj_dark[t], np.array(u_traj_dark[t])) for t in range(len(u_traj_dark))]
-
+    bu_traj_dark = [
+        (b_traj_dark[t], np.array(u_traj_dark[t])) for t in range(len(u_traj_dark))
+    ]
 
     total_light = 0
     total_dark = 0
     for i in range(1000):
-        cost_light = blqr.segmented_cost_function(bu_traj_light, b_des, [], len(bu_traj_light))
-        cost_dark = blqr.segmented_cost_function(bu_traj_dark, b_des, [], len(bu_traj_dark))
+        cost_light = blqr.segmented_cost_function(
+            bu_traj_light, b_des, [], len(bu_traj_light)
+        )
+        cost_dark = blqr.segmented_cost_function(
+            bu_traj_dark, b_des, [], len(bu_traj_dark)
+        )
         total_light += cost_light
         total_dark += cost_dark
-    print("avg cost light: %.3f" % (total_light/1000.0))
-    print("avg cost dark: %.3f" % (total_dark/1000.0))
+    print("avg cost light: %.3f" % (total_light / 1000.0))
+    print("avg cost dark: %.3f" % (total_dark / 1000.0))
 
     x_range = (-1, 7)
     y_range = (-2, 4)
@@ -173,18 +181,18 @@ def manual_test(blqr):
         viz.log_position(tuple(sysd_b_dark[-1][0]), path=5)
         sysd_b_dark.append((func_sysd(sysd_b_dark[-1][0], u_t), 0))
 
-    viz.plot(path_colors={2: [(0,0,0), (0,255,0)],
-                          3: [(0,0,0), (0,255,255)],
-                          4: [(0,0,0), (255,255,0)],
-                          5: [(0,0,0), (255,0,255)]},
-             path_styles={2: "--",
-                          3: "-",
-                          4: "--",
-                          5: "-"})
+    viz.plot(
+        path_colors={
+            2: [(0, 0, 0), (0, 255, 0)],
+            3: [(0, 0, 0), (0, 255, 255)],
+            4: [(0, 0, 0), (255, 255, 0)],
+            5: [(0, 0, 0), (255, 0, 255)],
+        },
+        path_styles={2: "--", 3: "-", 4: "--", 5: "-"},
+    )
     plt.show()
 
 
-
 def opt_callback(xk, *args):
     global count
     print("Iteration %d" % count)
@@ -195,14 +203,16 @@ def opt_callback(xk, *args):
 
 def test(blqr):
     ############
-    b_0 = (np.array([2.0, 2.0]),
-           np.array([[5.0, 0.0],
-                     [0.0, 5.0]]))
-    x_sol = blqr.create_plan(b_0, b_des, u_des,
-                             num_segments=num_segments,
-                             opt_options={"maxiter": 30},
-                             opt_callback=opt_callback,
-                             control_bounds=(-0.1, 0.1))
+    b_0 = (np.array([2.0, 2.0]), np.array([[5.0, 0.0], [0.0, 5.0]]))
+    x_sol = blqr.create_plan(
+        b_0,
+        b_des,
+        u_des,
+        num_segments=num_segments,
+        opt_options={"maxiter": 30},
+        opt_callback=opt_callback,
+        control_bounds=(-0.1, 0.1),
+    )
     with np.printoptions(precision=3, suppress=True):
         print("SLSQP solution:")
         print(x_sol)
@@ -221,17 +231,14 @@ def test(blqr):
     for m_i, _, _ in plan:
         viz.log_position(tuple(m_i), path=0)
 
-    viz.plot(path_colors={0: [(0,0,0), (0,255,0)],
-                          1: [(0,0,0), (255,0,0)]},
-             path_styles={0: "--",
-                          1: "-"},
-             path_widths={0: 4,
-                          1: 1})
+    viz.plot(
+        path_colors={0: [(0, 0, 0), (0, 255, 0)], 1: [(0, 0, 0), (255, 0, 0)]},
+        path_styles={0: "--", 1: "-"},
+        path_widths={0: 4, 1: 1},
+    )
     plt.show()
 
 
-
-
 if __name__ == "__main__":
     test(blqr)
     # bt = (np.array([1.3, 2.0]), np.array([[0.5, 0.0], [0.0, 0.5]]))
diff --git a/pomdp_py/problems/load_unload/load_unload.py b/pomdp_py/problems/load_unload/load_unload.py
index b420e195..197ea5db 100644
--- a/pomdp_py/problems/load_unload/load_unload.py
+++ b/pomdp_py/problems/load_unload/load_unload.py
@@ -25,14 +25,17 @@
 EPSILON = 1e-3
 LOAD_LOCATION = 10
 
+
 class LUState(pomdp_py.State):
     def __init__(self, x, loaded):
         if type(x) != int or x < 0:
-            raise ValueError("Invalid state: {}\n".format((x, loaded)) +
-                             "x must be an integer > 0")
+            raise ValueError(
+                "Invalid state: {}\n".format((x, loaded)) + "x must be an integer > 0"
+            )
         if type(loaded) != bool:
-            raise ValueError("Invalid state: {}\n".format((x, loaded)) +
-                             "loaded must be a boolean")
+            raise ValueError(
+                "Invalid state: {}\n".format((x, loaded)) + "loaded must be a boolean"
+            )
         if x == 0 and loaded == True:
             raise ValueError("Agent can not be loaded in the 0th position")
         if x == LOAD_LOCATION and loaded == False:
@@ -40,58 +43,76 @@ def __init__(self, x, loaded):
 
         self.x = x
         self.loaded = loaded
+
     def __hash__(self):
         return hash((self.x, self.loaded))
+
     def __eq__(self, other):
         if isinstance(other, LUState):
             return self.x == other.x and self.loaded == self.loaded
         elif type(other) == tuple:
             return self.x == other[0] and self.loaded == other[1]
+
     def __str__(self):
         return str((self.x, self.loaded))
+
     def __repr__(self):
         return "State({})".format(self)
 
+
 class LUAction(pomdp_py.Action):
     def __init__(self, name):
         if name not in ["move-left", "move-right"]:
             raise ValueError("Invalid action: %s" % name)
 
         self.name = name
+
     def __hash__(self):
         return hash(self.name)
+
     def __eq__(self, other):
         if isinstance(other, LUAction):
             return self.name == other.name
         elif type(other) == str:
             return self.name == other
+
     def __str__(self):
         return self.name
+
     def __repr__(self):
         return "Action(%s)" % self.name
 
+
 class LUObservation(pomdp_py.Observation):
     def __init__(self, obs):
         if obs not in ["load", "unload", "middle"]:
-            raise ValueError("Invalid observation: {}\n".format(name) +
-                             "Observation must be an integer > 0")
+            raise ValueError(
+                "Invalid observation: {}\n".format(name)
+                + "Observation must be an integer > 0"
+            )
         self.name = obs
+
     def __hash__(self):
         return hash(self.name)
+
     def __eq__(self, other):
         if isinstance(other, LUObservation):
             return self.name == other.name
         elif type(other) == str:
             return self.name == other
+
     def __str__(self):
         return str(self.name)
+
     def __repr__(self):
         return "Observation(%s)" % str(self.x)
 
+
 # Observation model
 class LUObservationModel(pomdp_py.ObservationModel):
     """This problem is small enough for the probabilities to be directly given
     externally"""
+
     def probability(self, observation, next_state, action, normalized=False, **kwargs):
         if observation != self.sample(next_state, action):
             # return EPSILON to avoid degradation of particles
@@ -115,7 +136,8 @@ def argmax(self, next_state, action, normalized=False, **kwargs):
 # Transition Model
 class LUTransitionModel(pomdp_py.TransitionModel):
     """This problem is small enough for the probabilities to be directly given
-            externally"""
+    externally"""
+
     def probability(self, next_state, state, action, normalized=False, **kwargs):
         if next_state != self.sample(state, action):
             return EPSILON
@@ -123,8 +145,9 @@ def probability(self, next_state, state, action, normalized=False, **kwargs):
             return 1 - EPSILON
 
     def sample(self, state, action, normalized=False, **kwargs):
-        if ((state.x == LOAD_LOCATION and action == "move-right") or
-           (state.x == 0 and action == "move-left")):
+        if (state.x == LOAD_LOCATION and action == "move-right") or (
+            state.x == 0 and action == "move-left"
+        ):
             # trying to make invalid move, stay in the same place
             return state
 
@@ -144,9 +167,12 @@ def argmax(self, state, action, normalized=False, **kwargs):
         """Returns the most likely next state"""
         return self.sample(state, action)
 
+
 # Reward Model
 class LURewardModel(pomdp_py.RewardModel):
-    def probability(self, reward, state, action, next_state, normalized=False, **kwargs):
+    def probability(
+        self, reward, state, action, next_state, normalized=False, **kwargs
+    ):
         if reward == self.sample(state, action):
             return 1.0
         else:
@@ -163,10 +189,12 @@ def argmax(self, state, action, next_state, normalized=False, **kwargs):
         """Returns the most likely reward"""
         return self.sample(state, action)
 
+
 # Policy Model
 class LUPolicyModel(pomdp_py.RandomRollout):
     """This is an extremely dumb policy model; To keep consistent
     with the framework."""
+
     def __init__(self):
         self._all_actions = {LUAction("move-right"), LUAction("move-left")}
 
@@ -185,21 +213,21 @@ def get_all_actions(self, **kwargs):
 
 
 class LoadUnloadProblem(pomdp_py.POMDP):
-
     def __init__(self, init_state, init_belief):
         """init_belief is a Distribution."""
-        agent = pomdp_py.Agent(init_belief,
-                               LUPolicyModel(),
-                               LUTransitionModel(),
-                               LUObservationModel(),
-                               LURewardModel())
+        agent = pomdp_py.Agent(
+            init_belief,
+            LUPolicyModel(),
+            LUTransitionModel(),
+            LUObservationModel(),
+            LURewardModel(),
+        )
 
-        env = pomdp_py.Environment(init_state,
-                                   LUTransitionModel(),
-                                   LURewardModel())
+        env = pomdp_py.Environment(init_state, LUTransitionModel(), LURewardModel())
 
         super().__init__(agent, env, name="LoadUnloadProblem")
 
+
 def generate_random_state():
     # Flip a coin to determine if we are loaded
     loaded = np.random.rand() > 0.5
@@ -210,6 +238,7 @@ def generate_random_state():
         loaded = True
     return LUState(location, loaded)
 
+
 def generate_init_belief(num_particles):
     particles = []
     for _ in range(num_particles):
@@ -217,6 +246,7 @@ def generate_init_belief(num_particles):
 
     return pomdp_py.Particles(particles)
 
+
 def test_planner(load_unload_problem, planner, nsteps=3, discount=0.95):
     gamma = 1.0
     total_reward = 0
@@ -227,21 +257,22 @@ def test_planner(load_unload_problem, planner, nsteps=3, discount=0.95):
     plt.xlabel("Position")
 
     ax = fig.add_subplot(111)
-    ax.set_xlim(-1, LOAD_LOCATION+1)
+    ax.set_xlim(-1, LOAD_LOCATION + 1)
     ax.set_ylim(0, 2)
     x, y = [], []
-    scat, = ax.plot(x, y, marker="x", markersize=20, ls=" ", color="black")
+    (scat,) = ax.plot(x, y, marker="x", markersize=20, ls=" ", color="black")
 
     def update(t):
         nonlocal gamma, total_reward, total_discounted_reward
-        print("==== Step %d ====" % (t+1))
+        print("==== Step %d ====" % (t + 1))
         action = planner.plan(load_unload_problem.agent)
 
         env_reward = load_unload_problem.env.state_transition(action, execute=True)
         true_state = copy.deepcopy(load_unload_problem.env.state)
 
         real_observation = load_unload_problem.env.provide_observation(
-                load_unload_problem.agent.observation_model, action)
+            load_unload_problem.agent.observation_model, action
+        )
         load_unload_problem.agent.update_history(action, real_observation)
         planner.update(load_unload_problem.agent, action, real_observation)
         total_reward += env_reward
@@ -263,21 +294,27 @@ def update(t):
         new_x, new_y = [true_state.x], [1]
         scat.set_data(new_x, new_y)
         scat.set_color("b" if true_state.loaded else "r")
-        return scat,
+        return (scat,)
 
     ani = FuncAnimation(fig, update, frames=nsteps, interval=500)
     plt.show()
 
+
 def main():
     init_state = generate_random_state()
     init_belief = generate_init_belief(num_particles=100)
     load_unload_problem = LoadUnloadProblem(init_state, init_belief)
 
     print("** Testing POMCP **")
-    pomcp = pomdp_py.POMCP(max_depth=100, discount_factor=0.95,
-                           num_sims=100, exploration_const=110,
-                           rollout_policy=load_unload_problem.agent.policy_model)
+    pomcp = pomdp_py.POMCP(
+        max_depth=100,
+        discount_factor=0.95,
+        num_sims=100,
+        exploration_const=110,
+        rollout_policy=load_unload_problem.agent.policy_model,
+    )
     test_planner(load_unload_problem, pomcp, nsteps=100)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/pomdp_py/problems/maze/README.rst b/pomdp_py/problems/maze/README.rst
index 8fc77335..9eae2a29 100644
--- a/pomdp_py/problems/maze/README.rst
+++ b/pomdp_py/problems/maze/README.rst
@@ -9,7 +9,7 @@ Problem originally introduced in `Solving POMDPs by Searching the Space of Finit
 
 .. figure:: https://i.imgur.com/i1RDsrL.png
    :alt: Figure from the paper
-         
+
    Maze POMDP
 
 
diff --git a/pomdp_py/problems/maze/__init__.py b/pomdp_py/problems/maze/__init__.py
index d1c45628..fd56c91b 100644
--- a/pomdp_py/problems/maze/__init__.py
+++ b/pomdp_py/problems/maze/__init__.py
@@ -10,7 +10,7 @@
 
 .. figure:: https://i.imgur.com/i1RDsrL.png
    :alt: Figure from the paper
-         
+
    Maze POMDP
 
 
diff --git a/pomdp_py/problems/maze/domain/action.py b/pomdp_py/problems/maze/domain/action.py
index b96a2ca4..300a3aad 100644
--- a/pomdp_py/problems/maze/domain/action.py
+++ b/pomdp_py/problems/maze/domain/action.py
@@ -4,5 +4,11 @@
 
 # Reuses the actions in the multi object search domain
 import pomdp_py
-from pomdp_py.problems.multi_object_search.domain.action\
-    import MotionAction, MoveForward, MoveBackward, MoveLeft, MoveRight, LookAction
+from pomdp_py.problems.multi_object_search.domain.action import (
+    MotionAction,
+    MoveForward,
+    MoveBackward,
+    MoveLeft,
+    MoveRight,
+    LookAction,
+)
diff --git a/pomdp_py/problems/maze/domain/observation.py b/pomdp_py/problems/maze/domain/observation.py
index 3a3fa956..64aff2d0 100644
--- a/pomdp_py/problems/maze/domain/observation.py
+++ b/pomdp_py/problems/maze/domain/observation.py
@@ -1,16 +1,11 @@
-
 import pomdp_py
 
 
 # we index the walls around a grid cell in
 # clockwise fashion: top wall (0), right wall (1),
 # bottom wall (2), left wall (3).
-WALL = {
-    0: "top",
-    1: "right",
-    2: "bottom",
-    3: "left"
-}
+WALL = {0: "top", 1: "right", 2: "bottom", 3: "left"}
+
 
 class Observation(pomdp_py.Observation):
     def __init__(self, walls, orientation):
@@ -30,5 +25,4 @@ def __eq__(self, other):
         if not isinstance(other, Observation):
             return False
         else:
-            return self.walls == other.walls\
-                and self.orientation == other.orientation
+            return self.walls == other.walls and self.orientation == other.orientation
diff --git a/pomdp_py/problems/maze/domain/state.py b/pomdp_py/problems/maze/domain/state.py
index dc1dc351..3568c1a8 100644
--- a/pomdp_py/problems/maze/domain/state.py
+++ b/pomdp_py/problems/maze/domain/state.py
@@ -1,10 +1,13 @@
 """Defines the State for the maze domain, which is the position of the robot and its orientation.
 """
+
 import pomdp_py
 import numpy as np
 
+
 class State(pomdp_py.State):
     """The state of the problem is just the robot position"""
+
     def __init__(self, positition, orientation):
         """
         Initializes a state in light dark domain.
@@ -19,15 +22,15 @@ def __init__(self, positition, orientation):
 
     def __hash__(self):
         return hash(self.position, self.orientation)
-    
+
     def __eq__(self, other):
         if isinstance(other, State):
             return self.position == other.position
         else:
             return False
-        
+
     def __str__(self):
         return self.__repr__()
-    
+
     def __repr__(self):
         return "State(%s)" % (str(self.position, self.orientation))
diff --git a/pomdp_py/problems/maze/env/env.py b/pomdp_py/problems/maze/env/env.py
index 93211263..ccc11472 100644
--- a/pomdp_py/problems/maze/env/env.py
+++ b/pomdp_py/problems/maze/env/env.py
@@ -1,5 +1,6 @@
 import pomdp_py
 
+
 class MazeEnvironment(pomdp_py.Environment):
     def __init__(self, init_state):
         pass
diff --git a/pomdp_py/problems/maze/models/components/map.py b/pomdp_py/problems/maze/models/components/map.py
index c79f289a..79de3fe6 100644
--- a/pomdp_py/problems/maze/models/components/map.py
+++ b/pomdp_py/problems/maze/models/components/map.py
@@ -1,5 +1,6 @@
 import numpy as np
 
+
 class MazeMap:
     def __init__(self):
         pass
@@ -12,27 +13,26 @@ def from_dict(self, mapdict):
     "start": (5, 6),
     "goal": (5, 3),
     "walls": {
-        "top": [[(0,0), (10,0)],
-                [(2,1), (8,1)],
-                [(4,2), (6,2)]],
-        "bottom": [[(0,6), (10,6)],
-                   [(1,5), (9,5)]
-                   [(3,4), (7,4)],
-                   [(5,3), (5,3)]],
-        "left": [[(0,0), (0,6)],
-                 [(1,1), (5,5)],
-                 [(2,1), (2,4)],
-                 [(3,2), (3,4)],
-                 [(4,2), (4,3)],
-                 [(5,3), (5,3)]],
-        "right": [[(5,3), (5,3)]
-                  [(6,2), (6,3)],
-                  [(7,2), (7,4)],
-                  [(8,1), (8,4)],
-                  [(9,1), (9,5)],
-                  [(10,0), (10,6)]]
-        }
-    }
-
-
-
+        "top": [[(0, 0), (10, 0)], [(2, 1), (8, 1)], [(4, 2), (6, 2)]],
+        "bottom": [
+            [(0, 6), (10, 6)],
+            [(1, 5), (9, 5)][(3, 4), (7, 4)],
+            [(5, 3), (5, 3)],
+        ],
+        "left": [
+            [(0, 0), (0, 6)],
+            [(1, 1), (5, 5)],
+            [(2, 1), (2, 4)],
+            [(3, 2), (3, 4)],
+            [(4, 2), (4, 3)],
+            [(5, 3), (5, 3)],
+        ],
+        "right": [
+            [(5, 3), (5, 3)][(6, 2), (6, 3)],
+            [(7, 2), (7, 4)],
+            [(8, 1), (8, 4)],
+            [(9, 1), (9, 5)],
+            [(10, 0), (10, 6)],
+        ],
+    },
+}
diff --git a/pomdp_py/problems/multi_object_search/agent/agent.py b/pomdp_py/problems/multi_object_search/agent/agent.py
index bf64c6be..b1525706 100644
--- a/pomdp_py/problems/multi_object_search/agent/agent.py
+++ b/pomdp_py/problems/multi_object_search/agent/agent.py
@@ -10,20 +10,24 @@
 from ..models.reward_model import *
 from ..models.policy_model import *
 
+
 class MosAgent(pomdp_py.Agent):
     """One agent is one robot."""
-    def __init__(self,
-                 robot_id,
-                 init_robot_state,  # initial robot state (assuming robot state is observable perfectly)
-                 object_ids,  # target object ids
-                 dim,         # tuple (w,l) of the width (w) and length (l) of the gridworld search space.
-                 sensor,      # Sensor equipped on the robot
-                 sigma=0.01,     # parameter for observation model
-                 epsilon=1,   # parameter for observation model
-                 belief_rep="histogram",  # belief representation, either "histogram" or "particles".
-                 prior={},       # prior belief, as defined in belief.py:initialize_belief
-                 num_particles=100,  # used if the belief representation is particles
-                 grid_map=None):  # GridMap used to avoid collision with obstacles (None if not provided)
+
+    def __init__(
+        self,
+        robot_id,
+        init_robot_state,  # initial robot state (assuming robot state is observable perfectly)
+        object_ids,  # target object ids
+        dim,  # tuple (w,l) of the width (w) and length (l) of the gridworld search space.
+        sensor,  # Sensor equipped on the robot
+        sigma=0.01,  # parameter for observation model
+        epsilon=1,  # parameter for observation model
+        belief_rep="histogram",  # belief representation, either "histogram" or "particles".
+        prior={},  # prior belief, as defined in belief.py:initialize_belief
+        num_particles=100,  # used if the belief representation is particles
+        grid_map=None,
+    ):  # GridMap used to avoid collision with obstacles (None if not provided)
         self.robot_id = robot_id
         self._object_ids = object_ids
         self.sensor = sensor
@@ -34,29 +38,31 @@ def __init__(self,
         rth = init_robot_state.pose[2]
 
         # initialize belief
-        init_belief = initialize_belief(dim,
-                                        self.robot_id,
-                                        self._object_ids,
-                                        prior=prior,
-                                        representation=belief_rep,
-                                        robot_orientations={self.robot_id:rth},
-                                        num_particles=num_particles)
-        transition_model = MosTransitionModel(dim,
-                                              {self.robot_id: self.sensor},
-                                              self._object_ids)
-        observation_model = MosObservationModel(dim,
-                                                self.sensor,
-                                                self._object_ids,
-                                                sigma=sigma,
-                                                epsilon=epsilon)
+        init_belief = initialize_belief(
+            dim,
+            self.robot_id,
+            self._object_ids,
+            prior=prior,
+            representation=belief_rep,
+            robot_orientations={self.robot_id: rth},
+            num_particles=num_particles,
+        )
+        transition_model = MosTransitionModel(
+            dim, {self.robot_id: self.sensor}, self._object_ids
+        )
+        observation_model = MosObservationModel(
+            dim, self.sensor, self._object_ids, sigma=sigma, epsilon=epsilon
+        )
         reward_model = GoalRewardModel(self._object_ids, robot_id=self.robot_id)
         policy_model = PolicyModel(self.robot_id, grid_map=grid_map)
-        super().__init__(init_belief, policy_model,
-                         transition_model=transition_model,
-                         observation_model=observation_model,
-                         reward_model=reward_model)
+        super().__init__(
+            init_belief,
+            policy_model,
+            transition_model=transition_model,
+            observation_model=observation_model,
+            reward_model=reward_model,
+        )
 
     def clear_history(self):
         """Custum function; clear history"""
         self._history = None
-        
diff --git a/pomdp_py/problems/multi_object_search/agent/belief.py b/pomdp_py/problems/multi_object_search/agent/belief.py
index fb79299e..f27161df 100644
--- a/pomdp_py/problems/multi_object_search/agent/belief.py
+++ b/pomdp_py/problems/multi_object_search/agent/belief.py
@@ -14,9 +14,11 @@
 import copy
 from ..domain.state import *
 
+
 class MosOOBelief(pomdp_py.OOBelief):
     """This is needed to make sure the belief is sampling the right
     type of State for this problem."""
+
     def __init__(self, robot_id, object_beliefs):
         """
         robot_id (int): The id of the robot that has this belief.
@@ -33,8 +35,15 @@ def random(self, **kwargs):
         return MosOOState(pomdp_py.OOBelief.random(self, **kwargs).object_states)
 
 
-def initialize_belief(dim, robot_id, object_ids, prior={},
-                      representation="histogram", robot_orientations={}, num_particles=100):
+def initialize_belief(
+    dim,
+    robot_id,
+    object_ids,
+    prior={},
+    representation="histogram",
+    robot_orientations={},
+    num_particles=100,
+):
     """
     Returns a GenerativeDistribution that is the belief representation for
     the multi-object search problem.
@@ -46,7 +55,7 @@ def initialize_belief(dim, robot_id, object_ids, prior={},
                           over; They are `assumed` to be the target objects, not obstacles,
                           because the robot doesn't really care about obstacle locations and
                           modeling them just adds computation cost.
-        prior (dict): A mapping {(objid|robot_id) -> {(x,y) -> [0,1]}}. If used, then 
+        prior (dict): A mapping {(objid|robot_id) -> {(x,y) -> [0,1]}}. If used, then
                       all locations not included in the prior will be treated to have 0 probability.
                       If unspecified for an object, then the belief over that object is assumed
                       to be a uniform distribution.
@@ -58,14 +67,17 @@ def initialize_belief(dim, robot_id, object_ids, prior={},
         GenerativeDistribution: the initial belief representation.
     """
     if representation == "histogram":
-        return _initialize_histogram_belief(dim, robot_id, object_ids, prior, robot_orientations)
+        return _initialize_histogram_belief(
+            dim, robot_id, object_ids, prior, robot_orientations
+        )
     elif representation == "particles":
-        return _initialize_particles_belief(dim, robot_id, object_ids,
-                                            robot_orientations, num_particles=num_particles)
+        return _initialize_particles_belief(
+            dim, robot_id, object_ids, robot_orientations, num_particles=num_particles
+        )
     else:
         raise ValueError("Unsupported belief representation %s" % representation)
 
-    
+
 def _initialize_histogram_belief(dim, robot_id, object_ids, prior, robot_orientations):
     """
     Returns the belief distribution represented as a histogram
@@ -85,7 +97,7 @@ def _initialize_histogram_belief(dim, robot_id, object_ids, prior, robot_orienta
             # no prior knowledge. So uniform.
             for x in range(width):
                 for y in range(length):
-                    state = ObjectState(objid, "target", (x,y))
+                    state = ObjectState(objid, "target", (x, y))
                     hist[state] = 1.0
                     total_prob += hist[state]
 
@@ -100,14 +112,16 @@ def _initialize_histogram_belief(dim, robot_id, object_ids, prior, robot_orienta
     # Its pose must have been provided in the `prior`.
     assert robot_id in prior, "Missing initial robot pose in prior."
     init_robot_pose = list(prior[robot_id].keys())[0]
-    oo_hists[robot_id] =\
-        pomdp_py.Histogram({RobotState(robot_id, init_robot_pose, (), None): 1.0})
-        
+    oo_hists[robot_id] = pomdp_py.Histogram(
+        {RobotState(robot_id, init_robot_pose, (), None): 1.0}
+    )
+
     return MosOOBelief(robot_id, oo_hists)
 
 
-def _initialize_particles_belief(dim, robot_id, object_ids, prior,
-                                 robot_orientations, num_particles=100):
+def _initialize_particles_belief(
+    dim, robot_id, object_ids, prior, robot_orientations, num_particles=100
+):
     """This returns a single set of particles that represent the distribution over a
     joint state space of all objects.
 
@@ -123,11 +137,13 @@ def _initialize_particles_belief(dim, robot_id, object_ids, prior,
     # Its pose must have been provided in the `prior`.
     assert robot_id in prior, "Missing initial robot pose in prior."
     init_robot_pose = list(prior[robot_id].keys())[0]
-    
+
     oo_particles = {}  # objid -> Particageles
     width, length = dim
     for objid in object_ids:
-        particles = [RobotState(robot_id, init_robot_pose, (), None)]  # list of states; Starting the observable robot state.
+        particles = [
+            RobotState(robot_id, init_robot_pose, (), None)
+        ]  # list of states; Starting the observable robot state.
         if objid in prior:
             # prior knowledge provided. Just use the prior knowledge
             prior_sum = sum(prior[objid][pose] for pose in prior[objid])
@@ -141,12 +157,12 @@ def _initialize_particles_belief(dim, robot_id, object_ids, prior,
             for _ in range(num_particles):
                 x = random.randrange(0, width)
                 y = random.randrange(0, length)
-                state = ObjectState(objid, "target", (x,y))
+                state = ObjectState(objid, "target", (x, y))
                 particles.append(state)
 
         particles_belief = pomdp_py.Particles(particles)
         oo_particles[objid] = particles_belief
-        
+
     # Return Particles distribution which contains particles
     # that represent joint object states
     particles = []
diff --git a/pomdp_py/problems/multi_object_search/domain/action.py b/pomdp_py/problems/multi_object_search/domain/action.py
index 772f55b5..bbb41d16 100644
--- a/pomdp_py/problems/multi_object_search/domain/action.py
+++ b/pomdp_py/problems/multi_object_search/domain/action.py
@@ -1,7 +1,7 @@
 """
 Defines the Action for the 2D Multi-Object Search domain;
 
-Action space: 
+Action space:
 
     Motion :math:`\cup` Look :math:`\cup` Find
 
@@ -14,42 +14,52 @@
 It is possible to force "Look" after every N/S/E/W action;
 then the Look action could be dropped. This is optional behavior.
 """
+
 import pomdp_py
 import math
 
+
 ###### Actions ######
 class Action(pomdp_py.Action):
     """Mos action; Simple named action."""
+
     def __init__(self, name):
         self.name = name
+
     def __hash__(self):
         return hash(self.name)
+
     def __eq__(self, other):
         if isinstance(other, Action):
             return self.name == other.name
         elif type(other) == str:
             return self.name == other
+
     def __str__(self):
         return self.name
+
     def __repr__(self):
         return "Action(%s)" % self.name
 
-MOTION_SCHEME="xy"  # can be either xy or vw
-STEP_SIZE=1
+
+MOTION_SCHEME = "xy"  # can be either xy or vw
+STEP_SIZE = 1
+
+
 class MotionAction(Action):
     # scheme 1 (vx,vy,th)
     SCHEME_XYTH = "xyth"
     EAST = (STEP_SIZE, 0, 0)  # x is horizontal; x+ is right. y is vertical; y+ is down.
     WEST = (-STEP_SIZE, 0, math.pi)
-    NORTH = (0, -STEP_SIZE, 3*math.pi/2)
-    SOUTH = (0, STEP_SIZE, math.pi/2)
-    
+    NORTH = (0, -STEP_SIZE, 3 * math.pi / 2)
+    SOUTH = (0, STEP_SIZE, math.pi / 2)
+
     # scheme 2 (vt, vw) translational, rotational velocities.
-    SCHEME_VW = "vw" 
+    SCHEME_VW = "vw"
     FORWARD = (STEP_SIZE, 0)
     BACKWARD = (-STEP_SIZE, 0)
-    LEFT = (0, -math.pi/4)  # left 45 deg
-    RIGHT = (0, math.pi/4) # right 45 deg
+    LEFT = (0, -math.pi / 4)  # left 45 deg
+    RIGHT = (0, math.pi / 4)  # right 45 deg
 
     # scheme 3 (vx,vy)
     SCHEME_XY = "xy"
@@ -60,9 +70,7 @@ class MotionAction(Action):
 
     SCHEMES = {"xyth", "xy", "vw"}
 
-    def __init__(self, motion,
-                 scheme=MOTION_SCHEME, distance_cost=1,
-                 motion_name=None):
+    def __init__(self, motion, scheme=MOTION_SCHEME, distance_cost=1, motion_name=None):
         """
         motion (tuple): a tuple of floats that describes the motion;
         scheme (str): description of the motion scheme; Either
@@ -72,51 +80,90 @@ def __init__(self, motion,
             raise ValueError("Invalid motion scheme %s" % scheme)
 
         if scheme == MotionAction.SCHEME_XYTH:
-            if motion not in {MotionAction.EAST, MotionAction.WEST,
-                              MotionAction.NORTH, MotionAction.SOUTH}:
+            if motion not in {
+                MotionAction.EAST,
+                MotionAction.WEST,
+                MotionAction.NORTH,
+                MotionAction.SOUTH,
+            }:
                 raise ValueError("Invalid move motion %s" % str(motion))
         elif scheme == MotionAction.SCHEME_VW:
-            if motion not in {MotionAction.FORWARD, MotionAction.BACKWARD,
-                              MotionAction.LEFT, MotionAction.RIGHT}:
+            if motion not in {
+                MotionAction.FORWARD,
+                MotionAction.BACKWARD,
+                MotionAction.LEFT,
+                MotionAction.RIGHT,
+            }:
                 raise ValueError("Invalid move motion %s" % str(motion))
         elif scheme == MotionAction.SCHEME_XY:
-            if motion not in {MotionAction.EAST2D, MotionAction.WEST2D,
-                              MotionAction.NORTH2D, MotionAction.SOUTH2D}:
+            if motion not in {
+                MotionAction.EAST2D,
+                MotionAction.WEST2D,
+                MotionAction.NORTH2D,
+                MotionAction.SOUTH2D,
+            }:
                 raise ValueError("Invalid move motion %s" % str(motion))
-            
+
         self.motion = motion
         self.scheme = scheme
         self.distance_cost = distance_cost
         if motion_name is None:
             motion_name = str(motion)
         super().__init__("move-%s-%s" % (scheme, motion_name))
-        
-# Define some constant actions
-MoveEast = MotionAction(MotionAction.EAST,   scheme=MotionAction.SCHEME_XYTH, motion_name="East")
-MoveWest = MotionAction(MotionAction.WEST,   scheme=MotionAction.SCHEME_XYTH, motion_name="West")
-MoveNorth = MotionAction(MotionAction.NORTH, scheme=MotionAction.SCHEME_XYTH, motion_name="North")
-MoveSouth = MotionAction(MotionAction.SOUTH, scheme=MotionAction.SCHEME_XYTH, motion_name="South")
 
-MoveForward  = MotionAction(MotionAction.FORWARD,  scheme=MotionAction.SCHEME_VW, motion_name="Forward")
-MoveBackward = MotionAction(MotionAction.BACKWARD, scheme=MotionAction.SCHEME_VW, motion_name="Backward")
-MoveLeft     = MotionAction(MotionAction.LEFT,     scheme=MotionAction.SCHEME_VW, motion_name="TurnLeft")
-MoveRight    = MotionAction(MotionAction.RIGHT,    scheme=MotionAction.SCHEME_VW, motion_name="TurnRight")
 
-MoveEast2D  = MotionAction(MotionAction.EAST2D,  scheme=MotionAction.SCHEME_XY, motion_name="East2D")
-MoveWest2D  = MotionAction(MotionAction.WEST2D,  scheme=MotionAction.SCHEME_XY, motion_name="West2D")
-MoveNorth2D = MotionAction(MotionAction.NORTH2D, scheme=MotionAction.SCHEME_XY, motion_name="North2D")
-MoveSouth2D = MotionAction(MotionAction.SOUTH2D, scheme=MotionAction.SCHEME_XY, motion_name="South2D")
+# Define some constant actions
+MoveEast = MotionAction(
+    MotionAction.EAST, scheme=MotionAction.SCHEME_XYTH, motion_name="East"
+)
+MoveWest = MotionAction(
+    MotionAction.WEST, scheme=MotionAction.SCHEME_XYTH, motion_name="West"
+)
+MoveNorth = MotionAction(
+    MotionAction.NORTH, scheme=MotionAction.SCHEME_XYTH, motion_name="North"
+)
+MoveSouth = MotionAction(
+    MotionAction.SOUTH, scheme=MotionAction.SCHEME_XYTH, motion_name="South"
+)
+
+MoveForward = MotionAction(
+    MotionAction.FORWARD, scheme=MotionAction.SCHEME_VW, motion_name="Forward"
+)
+MoveBackward = MotionAction(
+    MotionAction.BACKWARD, scheme=MotionAction.SCHEME_VW, motion_name="Backward"
+)
+MoveLeft = MotionAction(
+    MotionAction.LEFT, scheme=MotionAction.SCHEME_VW, motion_name="TurnLeft"
+)
+MoveRight = MotionAction(
+    MotionAction.RIGHT, scheme=MotionAction.SCHEME_VW, motion_name="TurnRight"
+)
+
+MoveEast2D = MotionAction(
+    MotionAction.EAST2D, scheme=MotionAction.SCHEME_XY, motion_name="East2D"
+)
+MoveWest2D = MotionAction(
+    MotionAction.WEST2D, scheme=MotionAction.SCHEME_XY, motion_name="West2D"
+)
+MoveNorth2D = MotionAction(
+    MotionAction.NORTH2D, scheme=MotionAction.SCHEME_XY, motion_name="North2D"
+)
+MoveSouth2D = MotionAction(
+    MotionAction.SOUTH2D, scheme=MotionAction.SCHEME_XY, motion_name="South2D"
+)
 
 
 class LookAction(Action):
     # For simplicity, this LookAction is not parameterized by direction
     def __init__(self):
         super().__init__("look")
-        
+
+
 class FindAction(Action):
     def __init__(self):
         super().__init__("find")
 
+
 Look = LookAction()
 Find = FindAction()
 
diff --git a/pomdp_py/problems/multi_object_search/domain/observation.py b/pomdp_py/problems/multi_object_search/domain/observation.py
index 1be5da68..8ff256e3 100644
--- a/pomdp_py/problems/multi_object_search/domain/observation.py
+++ b/pomdp_py/problems/multi_object_search/domain/observation.py
@@ -12,32 +12,37 @@
     it could be something else. But the resulting observation
     should be a map from object id to observed pose or NULL (not observed).
 """
+
 import pomdp_py
 
+
 ###### Observation ######
 class ObjectObservation(pomdp_py.Observation):
     """The xy pose of the object is observed; or NULL if not observed"""
+
     NULL = None
+
     def __init__(self, objid, pose):
         self.objid = objid
-        if type(pose) == tuple and len(pose) == 2\
-           or pose == ObjectObservation.NULL:
+        if type(pose) == tuple and len(pose) == 2 or pose == ObjectObservation.NULL:
             self.pose = pose
         else:
-            raise ValueError("Invalid observation %s for object"
-                             % (str(pose), objid))
+            raise ValueError("Invalid observation %s for object" % (str(pose), objid))
+
     def __hash__(self):
         return hash((self.objid, self.pose))
+
     def __eq__(self, other):
         if not isinstance(other, ObjectObservation):
             return False
         else:
-            return self.objid == other.objid\
-                and self.pose == other.pose
+            return self.objid == other.objid and self.pose == other.pose
+
 
 class MosOOObservation(pomdp_py.OOObservation):
     """Observation for Mos that can be factored by objects;
     thus this is an OOObservation."""
+
     def __init__(self, objposes):
         """
         objposes (dict): map from objid to 2d pose or NULL (not ObjectObservation!).
@@ -50,10 +55,10 @@ def for_obj(self, objid):
             return ObjectObservation(objid, self.objposes[objid])
         else:
             return ObjectObservation(objid, ObjectObservation.NULL)
-        
+
     def __hash__(self):
         return self._hashcode
-    
+
     def __eq__(self, other):
         if not isinstance(other, MosOOObservation):
             return False
@@ -68,15 +73,21 @@ def __repr__(self):
 
     def factor(self, next_state, *params, **kwargs):
         """Factor this OO-observation by objects"""
-        return {objid: ObjectObservation(objid, self.objposes[objid])
-                for objid in next_state.object_states
-                if objid != next_state.robot_id}
-    
+        return {
+            objid: ObjectObservation(objid, self.objposes[objid])
+            for objid in next_state.object_states
+            if objid != next_state.robot_id
+        }
+
     @classmethod
     def merge(cls, object_observations, next_state, *params, **kwargs):
         """Merge `object_observations` into a single OOObservation object;
-        
+
         object_observation (dict): Maps from objid to ObjectObservation"""
-        return MosOOObservation({objid: object_observations[objid].pose
-                                 for objid in object_observations
-                                 if objid != next_state.object_states[objid].objclass != "robot"})
+        return MosOOObservation(
+            {
+                objid: object_observations[objid].pose
+                for objid in object_observations
+                if objid != next_state.object_states[objid].objclass != "robot"
+            }
+        )
diff --git a/pomdp_py/problems/multi_object_search/domain/state.py b/pomdp_py/problems/multi_object_search/domain/state.py
index 08a03f3a..991544e6 100644
--- a/pomdp_py/problems/multi_object_search/domain/state.py
+++ b/pomdp_py/problems/multi_object_search/domain/state.py
@@ -6,7 +6,7 @@
 
 Description: Multi-Object Search in a 2D grid world.
 
-State space: 
+State space:
 
     :math:`S_1 \\times S_2 \\times ... S_n \\times S_r`
     where :math:`S_i (1\leq i\leq n)` is the object state, with attribute
@@ -17,57 +17,84 @@
 import pomdp_py
 import math
 
+
 ###### States ######
 class ObjectState(pomdp_py.ObjectState):
     def __init__(self, objid, objclass, pose):
         if objclass != "obstacle" and objclass != "target":
-            raise ValueError("Only allow object class to be"\
-                             "either 'target' or 'obstacle'."
-                             "Got %s" % objclass)
-        super().__init__(objclass, {"pose":pose, "id":objid})
+            raise ValueError(
+                "Only allow object class to beeither 'target' or 'obstacle'.Got %s"
+                % objclass
+            )
+        super().__init__(objclass, {"pose": pose, "id": objid})
+
     def __str__(self):
-        return 'ObjectState(%s,%s)' % (str(self.objclass), str(self.pose))
+        return "ObjectState(%s,%s)" % (str(self.objclass), str(self.pose))
+
     @property
     def pose(self):
-        return self.attributes['pose']
+        return self.attributes["pose"]
+
     @property
     def objid(self):
-        return self.attributes['id']
+        return self.attributes["id"]
+
 
 class RobotState(pomdp_py.ObjectState):
     def __init__(self, robot_id, pose, objects_found, camera_direction):
         """Note: camera_direction is None unless the robot is looking at a direction,
         in which case camera_direction is the string e.g. look+x, or 'look'"""
-        super().__init__("robot", {"id":robot_id,
-                                   "pose":pose,  # x,y,th
-                                   "objects_found": objects_found,
-                                   "camera_direction": camera_direction})
+        super().__init__(
+            "robot",
+            {
+                "id": robot_id,
+                "pose": pose,  # x,y,th
+                "objects_found": objects_found,
+                "camera_direction": camera_direction,
+            },
+        )
+
     def __str__(self):
-        return 'RobotState(%s,%s|%s)' % (str(self.objclass), str(self.pose), str(self.objects_found))
+        return "RobotState(%s,%s|%s)" % (
+            str(self.objclass),
+            str(self.pose),
+            str(self.objects_found),
+        )
+
     def __repr__(self):
         return str(self)
+
     @property
     def pose(self):
-        return self.attributes['pose']
+        return self.attributes["pose"]
+
     @property
     def robot_pose(self):
-        return self.attributes['pose']
+        return self.attributes["pose"]
+
     @property
     def objects_found(self):
-        return self.attributes['objects_found']
+        return self.attributes["objects_found"]
+
 
 class MosOOState(pomdp_py.OOState):
     def __init__(self, object_states):
         super().__init__(object_states)
+
     def object_pose(self, objid):
         return self.object_states[objid]["pose"]
+
     def pose(self, objid):
         return self.object_pose(objid)
+
     @property
     def object_poses(self):
-        return {objid:self.object_states[objid]['pose']
-                for objid in self.object_states}
+        return {
+            objid: self.object_states[objid]["pose"] for objid in self.object_states
+        }
+
     def __str__(self):
-        return 'MosOOState%s' % (str(self.object_states))
+        return "MosOOState%s" % (str(self.object_states))
+
     def __repr__(self):
         return str(self)
diff --git a/pomdp_py/problems/multi_object_search/env/env.py b/pomdp_py/problems/multi_object_search/env/env.py
index e04ab387..da4fce30 100644
--- a/pomdp_py/problems/multi_object_search/env/env.py
+++ b/pomdp_py/problems/multi_object_search/env/env.py
@@ -7,8 +7,10 @@
 from pomdp_py.problems.multi_object_search.models.components.sensor import *
 from pomdp_py.problems.multi_object_search.domain.state import *
 
+
 class MosEnvironment(pomdp_py.Environment):
     """"""
+
     def __init__(self, dim, init_state, sensors, obstacles=set({})):
         """
         Args:
@@ -21,18 +23,17 @@ def __init__(self, dim, init_state, sensors, obstacles=set({})):
         self.width, self.length = dim
         self.sensors = sensors
         self.obstacles = obstacles
-        transition_model = MosTransitionModel(dim,
-                                              sensors,
-                                              set(init_state.object_states.keys()))
+        transition_model = MosTransitionModel(
+            dim, sensors, set(init_state.object_states.keys())
+        )
         # Target objects, a set of ids, are not robot nor obstacles
-        self.target_objects = \
-            {objid
-             for objid in set(init_state.object_states.keys()) - self.obstacles
-             if not isinstance(init_state.object_states[objid], RobotState)}
+        self.target_objects = {
+            objid
+            for objid in set(init_state.object_states.keys()) - self.obstacles
+            if not isinstance(init_state.object_states[objid], RobotState)
+        }
         reward_model = GoalRewardModel(self.target_objects)
-        super().__init__(init_state,
-                         transition_model,
-                         reward_model)
+        super().__init__(init_state, transition_model, reward_model)
 
     @property
     def robot_ids(self):
@@ -56,20 +57,25 @@ def state_transition(self, action, execute=True, robot_id=None):
             is False.
 
         """
-        assert robot_id is not None, "state transition should happen for a specific robot"
+        assert (
+            robot_id is not None
+        ), "state transition should happen for a specific robot"
 
         next_state = copy.deepcopy(self.state)
-        next_state.object_states[robot_id] =\
-            self.transition_model[robot_id].sample(self.state, action)
+        next_state.object_states[robot_id] = self.transition_model[robot_id].sample(
+            self.state, action
+        )
 
-        reward = self.reward_model.sample(self.state, action, next_state,
-                                          robot_id=robot_id)
+        reward = self.reward_model.sample(
+            self.state, action, next_state, robot_id=robot_id
+        )
         if execute:
             self.apply_transition(next_state)
             return reward
         else:
             return next_state, reward
 
+
 #### Interpret string as an initial world state ####
 def interpret(worldstr):
     """
@@ -116,11 +122,10 @@ def interpret(worldstr):
             if mode == "sensor":
                 sensorlines.append(line)
 
-    lines = [line for line in worldlines
-             if len(line) > 0]
+    lines = [line for line in worldlines if len(line) > 0]
     w, l = len(worldlines[0]), len(worldlines)
 
-    objects = {}    # objid -> object_state(pose)
+    objects = {}  # objid -> object_state(pose)
     obstacles = set({})  # objid
     robots = {}  # robot_id -> robot_state(pose)
     sensors = {}  # robot_id -> Sensor
@@ -128,25 +133,26 @@ def interpret(worldstr):
     # Parse world
     for y, line in enumerate(worldlines):
         if len(line) != w:
-            raise ValueError("World size inconsistent."\
-                             "Expected width: %d; Actual Width: %d"
-                             % (w, len(line)))
+            raise ValueError(
+                "World size inconsistent.Expected width: %d; Actual Width: %d"
+                % (w, len(line))
+            )
         for x, c in enumerate(line):
             if c == "x":
                 # obstacle
                 objid = 1000 + len(obstacles)  # obstacle id
-                objects[objid] = ObjectState(objid, "obstacle", (x,y))
+                objects[objid] = ObjectState(objid, "obstacle", (x, y))
                 obstacles.add(objid)
 
             elif c.isupper():
                 # target object
                 objid = len(objects)
-                objects[objid] = ObjectState(objid, "target", (x,y))
+                objects[objid] = ObjectState(objid, "target", (x, y))
 
             elif c.islower():
                 # robot
                 robot_id = interpret_robot_id(c)
-                robots[robot_id] = RobotState(robot_id, (x,y,0), (), None)
+                robots[robot_id] = RobotState(robot_id, (x, y, 0), (), None)
 
             else:
                 assert c == ".", "Unrecognized character %s in worldstr" % c
@@ -158,10 +164,14 @@ def interpret(worldstr):
     # Parse sensors
     for line in sensorlines:
         if "," in line:
-            raise ValueError("Wrong Fromat. SHould not have ','. Separate tokens with space.")
+            raise ValueError(
+                "Wrong Fromat. SHould not have ','. Separate tokens with space."
+            )
         robot_name = line.split(":")[0].strip()
         robot_id = interpret_robot_id(robot_name)
-        assert robot_id in robots, "Sensor specified for unknown robot %s" % (robot_name)
+        assert robot_id in robots, "Sensor specified for unknown robot %s" % (
+            robot_name
+        )
 
         sensor_setting = line.split(":")[1].strip()
         sensor_type = sensor_setting.split(" ")[0].strip()
@@ -179,7 +189,8 @@ def interpret(worldstr):
             raise ValueError("Unknown sensor type %s" % sensor_type)
         sensors[robot_id] = sensor
 
-    return (w,l), robots, objects, obstacles, sensors
+    return (w, l), robots, objects, obstacles, sensors
+
 
 def interpret_robot_id(robot_name):
     return -ord(robot_name)
@@ -202,6 +213,7 @@ def equip_sensors(worldmap, sensors):
         worldmap += "%s: %s\n" % (robot_char, sensors[robot_char])
     return worldmap
 
+
 def make_laser_sensor(fov, dist_range, angle_increment, occlusion):
     """
     Returns string representation of the laser scanner configuration.
@@ -222,6 +234,7 @@ def make_laser_sensor(fov, dist_range, angle_increment, occlusion):
     occstr = "occlusion_enabled=%s" % str(occlusion)
     return "laser %s %s %s %s" % (fovstr, rangestr, angicstr, occstr)
 
+
 def make_proximity_sensor(radius, occlusion):
     """
     Returns string representation of the proximity sensor configuration.
diff --git a/pomdp_py/problems/multi_object_search/env/visual.py b/pomdp_py/problems/multi_object_search/env/visual.py
index 40a3d6e3..6983bf83 100644
--- a/pomdp_py/problems/multi_object_search/env/visual.py
+++ b/pomdp_py/problems/multi_object_search/env/visual.py
@@ -18,25 +18,25 @@
 from pomdp_py.problems.multi_object_search.domain.state import *
 from pomdp_py.problems.multi_object_search.example_worlds import *
 
+
 # Deterministic way to get object color
 def object_color(objid, count):
     color = [107, 107, 107]
     if count % 3 == 0:
-        color[0] += 100 + (3 * (objid*5 % 11))
+        color[0] += 100 + (3 * (objid * 5 % 11))
         color[0] = max(12, min(222, color[0]))
     elif count % 3 == 1:
-        color[1] += 100 + (3 * (objid*5 % 11))
+        color[1] += 100 + (3 * (objid * 5 % 11))
         color[1] = max(12, min(222, color[1]))
     else:
-        color[2] += 100 + (3 * (objid*5 % 11))
+        color[2] += 100 + (3 * (objid * 5 % 11))
         color[2] = max(12, min(222, color[2]))
     return tuple(color)
 
+
 #### Visualization through pygame ####
 class MosViz:
-
-    def __init__(self, env,
-                 res=30, fps=30, controllable=False):
+    def __init__(self, env, res=30, fps=30, controllable=False):
         self._env = env
 
         self._res = res
@@ -60,8 +60,7 @@ def __init__(self, env,
     def _make_gridworld_image(self, r):
         # Preparing 2d array
         w, l = self._env.width, self._env.length
-        arr2d = np.full((self._env.width,
-                         self._env.length), 0)  # free grids
+        arr2d = np.full((self._env.width, self._env.length), 0)  # free grids
         state = self._env.state
         for objid in state.object_states:
             pose = state.object_states[objid]["pose"]
@@ -73,20 +72,24 @@ def _make_gridworld_image(self, r):
                 arr2d[pose[0], pose[1]] = 2  # target
 
         # Creating image
-        img = np.full((w*r,l*r,3), 255, dtype=np.int32)
+        img = np.full((w * r, l * r, 3), 255, dtype=np.int32)
         for x in range(w):
             for y in range(l):
-                if arr2d[x,y] == 0:    # free
-                    cv2.rectangle(img, (y*r, x*r), (y*r+r, x*r+r),
-                                  (255, 255, 255), -1)
-                elif arr2d[x,y] == 1:  # obstacle
-                    cv2.rectangle(img, (y*r, x*r), (y*r+r, x*r+r),
-                                  (40, 31, 3), -1)
-                elif arr2d[x,y] == 2:  # target
-                    cv2.rectangle(img, (y*r, x*r), (y*r+r, x*r+r),
-                                  (255, 165, 0), -1)
-                cv2.rectangle(img, (y*r, x*r), (y*r+r, x*r+r),
-                              (0, 0, 0), 1, 8)
+                if arr2d[x, y] == 0:  # free
+                    cv2.rectangle(
+                        img, (y * r, x * r), (y * r + r, x * r + r), (255, 255, 255), -1
+                    )
+                elif arr2d[x, y] == 1:  # obstacle
+                    cv2.rectangle(
+                        img, (y * r, x * r), (y * r + r, x * r + r), (40, 31, 3), -1
+                    )
+                elif arr2d[x, y] == 2:  # target
+                    cv2.rectangle(
+                        img, (y * r, x * r), (y * r + r, x * r + r), (255, 165, 0), -1
+                    )
+                cv2.rectangle(
+                    img, (y * r, x * r), (y * r + r, x * r + r), (0, 0, 0), 1, 8
+                )
         return img
 
     @property
@@ -117,23 +120,26 @@ def update(self, robot_id, action, observation, viz_observation, belief):
         self._last_belief[robot_id] = belief
 
     @staticmethod
-    def draw_robot(img, x, y, th, size, color=(255,12,12)):
+    def draw_robot(img, x, y, th, size, color=(255, 12, 12)):
         radius = int(round(size / 2))
-        cv2.circle(img, (y+radius, x+radius), radius, color, thickness=2)
+        cv2.circle(img, (y + radius, x + radius), radius, color, thickness=2)
 
-        endpoint = (y+radius + int(round(radius*math.sin(th))),
-                    x+radius + int(round(radius*math.cos(th))))
-        cv2.line(img, (y+radius,x+radius), endpoint, color, 2)
+        endpoint = (
+            y + radius + int(round(radius * math.sin(th))),
+            x + radius + int(round(radius * math.cos(th))),
+        )
+        cv2.line(img, (y + radius, x + radius), endpoint, color, 2)
 
     @staticmethod
-    def draw_observation(img, z, rx, ry, rth, r, size, color=(12,12,255)):
+    def draw_observation(img, z, rx, ry, rth, r, size, color=(12, 12, 255)):
         assert type(z) == MosOOObservation, "%s != MosOOObservation" % (str(type(z)))
         radius = int(round(r / 2))
         for objid in z.objposes:
             if z.for_obj(objid).pose != ObjectObservation.NULL:
                 lx, ly = z.for_obj(objid).pose
-                cv2.circle(img, (ly*r+radius,
-                                 lx*r+radius), size, color, thickness=-1)
+                cv2.circle(
+                    img, (ly * r + radius, lx * r + radius), size, color, thickness=-1
+                )
 
     @staticmethod
     def draw_belief(img, belief, r, size, target_colors):
@@ -151,20 +157,25 @@ def draw_belief(img, belief, r, size, target_colors):
             last_val = -1
             count = 0
             for state in reversed(sorted(hist, key=hist.get)):
-                if state.objclass == 'target':
+                if state.objclass == "target":
                     if last_val != -1:
-                        color = util.lighter(color, 1-hist[state]/last_val)
+                        color = util.lighter(color, 1 - hist[state] / last_val)
                     if np.mean(np.array(color) / np.array([255, 255, 255])) < 0.99:
-                        tx, ty = state['pose']
-                        if (tx,ty) not in circle_drawn:
-                            circle_drawn[(tx,ty)] = 0
-                        circle_drawn[(tx,ty)] += 1
-
-                        cv2.circle(img, (ty*r+radius,
-                                         tx*r+radius), size//circle_drawn[(tx,ty)], color, thickness=-1)
+                        tx, ty = state["pose"]
+                        if (tx, ty) not in circle_drawn:
+                            circle_drawn[(tx, ty)] = 0
+                        circle_drawn[(tx, ty)] += 1
+
+                        cv2.circle(
+                            img,
+                            (ty * r + radius, tx * r + radius),
+                            size // circle_drawn[(tx, ty)],
+                            color,
+                            thickness=-1,
+                        )
                         last_val = hist[state]
 
-                        count +=1
+                        count += 1
                         if last_val <= 0:
                             break
 
@@ -173,14 +184,14 @@ def on_init(self):
         """pygame init"""
         pygame.init()  # calls pygame.font.init()
         # init main screen and background
-        self._display_surf = pygame.display.set_mode((self.img_width,
-                                                      self.img_height),
-                                                     pygame.HWSURFACE)
+        self._display_surf = pygame.display.set_mode(
+            (self.img_width, self.img_height), pygame.HWSURFACE
+        )
         self._background = pygame.Surface(self._display_surf.get_size()).convert()
         self._clock = pygame.time.Clock()
 
         # Font
-        self._myfont = pygame.font.SysFont('Comic Sans MS', 30)
+        self._myfont = pygame.font.SysFont("Comic Sans MS", 30)
         self._running = True
 
     def on_event(self, event):
@@ -221,15 +232,18 @@ def on_event(self, event):
 
             if self._controllable:
                 if isinstance(action, MotionAction):
-                    reward = self._env.state_transition(action, execute=True, robot_id=robot_id)
+                    reward = self._env.state_transition(
+                        action, execute=True, robot_id=robot_id
+                    )
                     z = None
                 elif isinstance(action, LookAction) or isinstance(action, FindAction):
                     robot_pose = self._env.state.pose(robot_id)
-                    z = self._env.sensors[robot_id].observe(robot_pose,
-                                                            self._env.state)
+                    z = self._env.sensors[robot_id].observe(robot_pose, self._env.state)
                     self._last_observation[robot_id] = z
                     self._last_viz_observation[robot_id] = z
-                    reward = self._env.state_transition(action, execute=True, robot_id=robot_id)
+                    reward = self._env.state_transition(
+                        action, execute=True, robot_id=robot_id
+                    )
                 print("robot state: %s" % str(self._env.state.object_states[robot_id]))
                 print("     action: %s" % str(action.name))
                 print("     observation: %s" % str(z))
@@ -248,10 +262,18 @@ def on_render(self):
         fps_text = "FPS: {0:.2f}".format(self._clock.get_fps())
         last_action = self._last_action.get(robot_id, None)
         last_action_str = "no_action" if last_action is None else str(last_action)
-        pygame.display.set_caption("%s | Robot%d(%.2f,%.2f,%.2f) | %s | %s" %
-                                   (last_action_str, robot_id, rx, ry, rth*180/math.pi,
-                                    str(self._env.state.object_states[robot_id]["objects_found"]),
-                                    fps_text))
+        pygame.display.set_caption(
+            "%s | Robot%d(%.2f,%.2f,%.2f) | %s | %s"
+            % (
+                last_action_str,
+                robot_id,
+                rx,
+                ry,
+                rth * 180 / math.pi,
+                str(self._env.state.object_states[robot_id]["objects_found"]),
+                fps_text,
+            )
+        )
         pygame.display.flip()
 
     def on_cleanup(self):
@@ -261,7 +283,7 @@ def on_execute(self):
         if self.on_init() == False:
             self._running = False
 
-        while( self._running ):
+        while self._running:
             for event in pygame.event.get():
                 self.on_event(event)
             self.on_loop()
@@ -278,17 +300,29 @@ def render_env(self, display_surf):
             last_viz_observation = self._last_viz_observation.get(robot_id, None)
             last_belief = self._last_belief.get(robot_id, None)
             if last_belief is not None:
-                MosViz.draw_belief(img, last_belief, r, r//3, self._target_colors)
+                MosViz.draw_belief(img, last_belief, r, r // 3, self._target_colors)
             if last_viz_observation is not None:
-                MosViz.draw_observation(img, last_viz_observation,
-                                        rx, ry, rth, r, r//4, color=(200, 200, 12))
+                MosViz.draw_observation(
+                    img,
+                    last_viz_observation,
+                    rx,
+                    ry,
+                    rth,
+                    r,
+                    r // 4,
+                    color=(200, 200, 12),
+                )
             if last_observation is not None:
-                MosViz.draw_observation(img, last_observation,
-                                        rx, ry, rth, r, r//8, color=(20, 20, 180))
+                MosViz.draw_observation(
+                    img, last_observation, rx, ry, rth, r, r // 8, color=(20, 20, 180)
+                )
 
-            MosViz.draw_robot(img, rx*r, ry*r, rth, r, color=(12, 255*(0.8*(i+1)), 12))
+            MosViz.draw_robot(
+                img, rx * r, ry * r, rth, r, color=(12, 255 * (0.8 * (i + 1)), 12)
+            )
         pygame.surfarray.blit_array(display_surf, img)
 
+
 def unittest():
     # If you don't want occlusion, use this:
     laserstr = make_laser_sensor(90, (1, 8), 0.5, False)
@@ -307,11 +341,10 @@ def unittest():
 
     dim, robots, objects, obstacles, sensors = interpret(worldstr)
     init_state = MosOOState({**objects, **robots})
-    env = MosEnvironment(dim,
-                         init_state, sensors,
-                         obstacles=obstacles)
+    env = MosEnvironment(dim, init_state, sensors, obstacles=obstacles)
     viz = MosViz(env, controllable=True)
     viz.on_execute()
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     unittest()
diff --git a/pomdp_py/problems/multi_object_search/example_worlds.py b/pomdp_py/problems/multi_object_search/example_worlds.py
index 6bb58a1b..7fd02bd4 100644
--- a/pomdp_py/problems/multi_object_search/example_worlds.py
+++ b/pomdp_py/problems/multi_object_search/example_worlds.py
@@ -1,4 +1,5 @@
 """This file has some examples of world string."""
+
 import random
 
 ############# Example Worlds ###########
@@ -6,14 +7,16 @@
 # the format
 
 world0 = (
-"""
+    """
 rx...
 .x.xT
 .....
-""", "r")
+""",
+    "r",
+)
 
 world1 = (
-"""
+    """
 rx.T...
 .x.....
 ...xx..
@@ -21,11 +24,13 @@
 .xxx.T.
 .xxx...
 .......
-""", "r")
+""",
+    "r",
+)
 
 # Used to test the shape of the sensor
 world2 = (
-"""
+    """
 .................
 .................
 ..xxxxxxxxxxxxx..
@@ -47,11 +52,13 @@
 ..xxxxxxxxxxxxx..
 .................
 .................
-""", "r")    
+""",
+    "r",
+)
 
 # Used to test sensor occlusion
 world3 = (
-"""
+    """
 .................
 .................
 ..xxxxxxxxxxxxx..
@@ -73,11 +80,13 @@
 ..xxxxxxxxxxxxx..
 .................
 .................
-""", "r")
+""",
+    "r",
+)
+
 
-def random_world(width, length, num_obj, num_obstacles,
-                 robot_char="r"):
-    worldstr = [[ "." for i in range(width)] for j in range(length)]
+def random_world(width, length, num_obj, num_obstacles, robot_char="r"):
+    worldstr = [["." for i in range(width)] for j in range(length)]
     # First place obstacles
     num_obstacles_placed = 0
     while num_obstacles_placed < num_obstacles:
@@ -86,7 +95,7 @@ def random_world(width, length, num_obj, num_obstacles,
         if worldstr[y][x] == ".":
             worldstr[y][x] = "x"
             num_obstacles_placed += 1
-            
+
     num_obj_placed = 0
     while num_obj_placed < num_obj:
         x = random.randrange(0, width)
diff --git a/pomdp_py/problems/multi_object_search/models/components/grid_map.py b/pomdp_py/problems/multi_object_search/models/components/grid_map.py
index 38093b48..847a2d15 100644
--- a/pomdp_py/problems/multi_object_search/models/components/grid_map.py
+++ b/pomdp_py/problems/multi_object_search/models/components/grid_map.py
@@ -1,14 +1,18 @@
 """Optional grid map to assist collision avoidance during planning."""
 
-from pomdp_py.problems.multi_object_search.models.transition_model import RobotTransitionModel
+from pomdp_py.problems.multi_object_search.models.transition_model import (
+    RobotTransitionModel,
+)
 from pomdp_py.problems.multi_object_search.domain.action import *
 from pomdp_py.problems.multi_object_search.domain.state import *
 
+
 class GridMap:
     """This map assists the agent to avoid planning invalid
     actions that will run into obstacles. Used if we assume
     the agent has a map. This map does not contain information
     about the object locations."""
+
     def __init__(self, width, length, obstacles):
         """
         Args:
@@ -28,8 +32,7 @@ def __init__(self, width, length, obstacles):
             for objid in self._obstacles
         }
         # set of obstacle poses
-        self.obstacle_poses = set({self._obstacles[objid]
-                                   for objid in self._obstacles})
+        self.obstacle_poses = set({self._obstacles[objid] for objid in self._obstacles})
 
     def valid_motions(self, robot_id, robot_pose, all_motion_actions):
         """
@@ -39,16 +42,18 @@ def valid_motions(self, robot_id, robot_pose, all_motion_actions):
         the assumption that the robot dynamics is deterministic.
         """
         state = MosOOState(self._obstacle_states)
-        state.set_object_state(robot_id,
-                               RobotState(robot_id, robot_pose, None, None))
+        state.set_object_state(robot_id, RobotState(robot_id, robot_pose, None, None))
 
         valid = set({})
         for motion_action in all_motion_actions:
             if not isinstance(motion_action, MotionAction):
-                raise ValueError("This (%s) is not a motion action" % str(motion_action))
+                raise ValueError(
+                    "This (%s) is not a motion action" % str(motion_action)
+                )
 
-            next_pose = RobotTransitionModel.if_move_by(robot_id, state,
-                                                        motion_action, (self.width, self.length))
+            next_pose = RobotTransitionModel.if_move_by(
+                robot_id, state, motion_action, (self.width, self.length)
+            )
             if next_pose != robot_pose:
                 # robot moved --> valid motion
                 valid.add(motion_action)
diff --git a/pomdp_py/problems/multi_object_search/models/components/sensor.py b/pomdp_py/problems/multi_object_search/models/components/sensor.py
index 033956f2..91241837 100644
--- a/pomdp_py/problems/multi_object_search/models/components/sensor.py
+++ b/pomdp_py/problems/multi_object_search/models/components/sensor.py
@@ -13,21 +13,26 @@
 # not receive observation for some regions in the field of view
 # making it a more challenging situation to deal with.
 
+
 # Utility functions
 def euclidean_dist(p1, p2):
-    return math.sqrt(sum([(a - b)** 2 for a, b in zip(p1, p2)]))
+    return math.sqrt(sum([(a - b) ** 2 for a, b in zip(p1, p2)]))
+
 
 def to_rad(deg):
     return deg * math.pi / 180.0
 
+
 def in_range(val, rang):
     # Returns True if val is in range (a,b); Inclusive.
     return val >= rang[0] and val <= rang[1]
 
+
 #### Sensors ####
 class Sensor:
     LASER = "laser"
     PROXIMITY = "proximity"
+
     def observe(self, robot_pose, env_state):
         """
         Returns an Observation with this sensor model.
@@ -48,13 +53,19 @@ def robot_id(self):
         # id of the robot equipped with this sensor
         return self._robot_id
 
+
 class Laser2DSensor:
     """Fan shaped 2D laser sensor"""
 
-    def __init__(self, robot_id,
-                 fov=90, min_range=1, max_range=5,
-                 angle_increment=5,
-                 occlusion_enabled=False):
+    def __init__(
+        self,
+        robot_id,
+        fov=90,
+        min_range=1,
+        max_range=5,
+        angle_increment=5,
+        occlusion_enabled=False,
+    ):
         """
         fov (float): angle between the start and end beams of one scan (degree).
         min_range (int or float)
@@ -72,26 +83,44 @@ def __init__(self, robot_id,
         # For example, the fov=pi, means the range scanner scans 180 degrees
         # in front of the robot. By our angle convention, 180 degrees maps to [0,90] and [270, 360]."""
         self._fov_left = (0, self.fov / 2)
-        self._fov_right = (2*math.pi - self.fov/2, 2*math.pi)
+        self._fov_right = (2 * math.pi - self.fov / 2, 2 * math.pi)
 
         # beams that are actually within the fov (set of angles)
-        self._beams = {round(th, 2)
-                       for th in np.linspace(self._fov_left[0],
-                                             self._fov_left[1],
-                                             int(round((self._fov_left[1] - self._fov_left[0]) / self.angle_increment)))}\
-                    | {round(th, 2)
-                       for th in np.linspace(self._fov_right[0],
-                                             self._fov_right[1],
-                                             int(round((self._fov_right[1] - self._fov_right[0]) / self.angle_increment)))}
+        self._beams = {
+            round(th, 2)
+            for th in np.linspace(
+                self._fov_left[0],
+                self._fov_left[1],
+                int(
+                    round(
+                        (self._fov_left[1] - self._fov_left[0]) / self.angle_increment
+                    )
+                ),
+            )
+        } | {
+            round(th, 2)
+            for th in np.linspace(
+                self._fov_right[0],
+                self._fov_right[1],
+                int(
+                    round(
+                        (self._fov_right[1] - self._fov_right[0]) / self.angle_increment
+                    )
+                ),
+            )
+        }
         # The size of the sensing region here is the area covered by the fan
-        self._sensing_region_size = self.fov / (2*math.pi) * math.pi * (max_range - min_range)**2
+        self._sensing_region_size = (
+            self.fov / (2 * math.pi) * math.pi * (max_range - min_range) ** 2
+        )
 
     def in_field_of_view(th, view_angles):
         """Determines if the beame at angle `th` is in a field of view of size `view_angles`.
         For example, the view_angles=180, means the range scanner scans 180 degrees
-        in front of the robot. By our angle convention, 180 degrees maps to [0,90] and [270, 360]."""
+        in front of the robot. By our angle convention, 180 degrees maps to [0,90] and [270, 360].
+        """
         fov_right = (0, view_angles / 2)
-        fov_left = (2*math.pi - view_angles/2, 2*math.pi)
+        fov_left = (2 * math.pi - view_angles / 2, 2 * math.pi)
 
     def within_range(self, robot_pose, point):
         """Returns true if the point is within range of the sensor; but the point might not
@@ -99,8 +128,9 @@ def within_range(self, robot_pose, point):
         dist, bearing = self.shoot_beam(robot_pose, point)
         if not in_range(dist, (self.min_range, self.max_range)):
             return False
-        if (not in_range(bearing, self._fov_left))\
-           and (not in_range(bearing, self._fov_right)):
+        if (not in_range(bearing, self._fov_left)) and (
+            not in_range(bearing, self._fov_right)
+        ):
             return False
         return True
 
@@ -108,16 +138,21 @@ def shoot_beam(self, robot_pose, point):
         """Shoots a beam from robot_pose at point. Returns the distance and bearing
         of the beame (i.e. the length and orientation of the beame)"""
         rx, ry, rth = robot_pose
-        dist = euclidean_dist(point, (rx,ry))
-        bearing = (math.atan2(point[1] - ry, point[0] - rx) - rth) % (2*math.pi)  # bearing (i.e. orientation)
+        dist = euclidean_dist(point, (rx, ry))
+        bearing = (math.atan2(point[1] - ry, point[0] - rx) - rth) % (
+            2 * math.pi
+        )  # bearing (i.e. orientation)
         return (dist, bearing)
 
     def valid_beam(self, dist, bearing):
         """Returns true beam length (i.e. `dist`) is within range and its angle
         `bearing` is valid, that is, it is within the fov range and in
         accordance with the angle increment."""
-        return dist >= self.min_range and dist <= self.max_range\
+        return (
+            dist >= self.min_range
+            and dist <= self.max_range
             and round(bearing, 2) in self._beams
+        )
 
     def _build_beam_map(self, beam, point, beam_map={}):
         """beam_map (dict): Maps from bearing to (dist, point)"""
@@ -125,7 +160,7 @@ def _build_beam_map(self, beam, point, beam_map={}):
         valid = self.valid_beam(dist, bearing)
         if not valid:
             return
-        bearing_key = round(bearing,2)
+        bearing_key = round(bearing, 2)
         if bearing_key in beam_map:
             # There's an object covered by this beame already.
             # see if this beame is closer
@@ -180,9 +215,8 @@ def sensing_region_size(self):
 class ProximitySensor(Laser2DSensor):
     """This is a simple sensor; Observes a region centered
     at the robot."""
-    def __init__(self, robot_id,
-                 radius=5,
-                 occlusion_enabled=False):
+
+    def __init__(self, robot_id, radius=5, occlusion_enabled=False):
         """
         radius (int or float) radius of the sensing region.
         """
@@ -197,9 +231,11 @@ def __init__(self, robot_id,
             angle_increment = 5
         else:
             angle_increment = 0.25
-        super().__init__(robot_id,
-                         fov=360,
-                         min_range=0.1,
-                         max_range=radius,
-                         angle_increment=angle_increment,
-                         occlusion_enabled=occlusion_enabled)
+        super().__init__(
+            robot_id,
+            fov=360,
+            min_range=0.1,
+            max_range=radius,
+            angle_increment=angle_increment,
+            occlusion_enabled=occlusion_enabled,
+        )
diff --git a/pomdp_py/problems/multi_object_search/models/observation_model.py b/pomdp_py/problems/multi_object_search/models/observation_model.py
index 424118e9..0ab962d2 100644
--- a/pomdp_py/problems/multi_object_search/models/observation_model.py
+++ b/pomdp_py/problems/multi_object_search/models/observation_model.py
@@ -25,20 +25,20 @@
 from pomdp_py.problems.multi_object_search.domain.action import *
 from pomdp_py.problems.multi_object_search.domain.observation import *
 
+
 #### Observation Models ####
 class MosObservationModel(pomdp_py.OOObservationModel):
     """Object-oriented transition model"""
-    def __init__(self,
-                 dim,
-                 sensor,
-                 object_ids,
-                 sigma=0.01,
-                 epsilon=1):
+
+    def __init__(self, dim, sensor, object_ids, sigma=0.01, epsilon=1):
         self.sigma = sigma
         self.epsilon = epsilon
-        observation_models = {objid: ObjectObservationModel(objid, sensor, dim,
-                                                            sigma=sigma, epsilon=epsilon)
-                              for objid in object_ids}
+        observation_models = {
+            objid: ObjectObservationModel(
+                objid, sensor, dim, sigma=sigma, epsilon=epsilon
+            )
+            for objid in object_ids
+        }
         pomdp_py.OOObservationModel.__init__(self, observation_models)
 
     def sample(self, next_state, action, argmax=False, **kwargs):
@@ -51,6 +51,7 @@ def sample(self, next_state, action, argmax=False, **kwargs):
         factored_observations = super().sample(next_state, action, argmax=argmax)
         return MosOOObservation.merge(factored_observations, next_state)
 
+
 class ObjectObservationModel(pomdp_py.ObservationModel):
     def __init__(self, objid, sensor, dim, sigma=0, epsilon=1):
         """
@@ -97,13 +98,15 @@ def probability(self, observation, next_state, action, **kwargs):
         # The (funny) business of allowing histogram belief update using O(oi|si',sr',a).
         next_robot_state = kwargs.get("next_robot_state", None)
         if next_robot_state is not None:
-            assert next_robot_state["id"] == self._sensor.robot_id,\
-                "Robot id of observation model mismatch with given state"
+            assert (
+                next_robot_state["id"] == self._sensor.robot_id
+            ), "Robot id of observation model mismatch with given state"
             robot_pose = next_robot_state.pose
 
             if isinstance(next_state, ObjectState):
-                assert next_state["id"] == self._objid,\
-                    "Object id of observation model mismatch with given state"
+                assert (
+                    next_state["id"] == self._objid
+                ), "Object id of observation model mismatch with given state"
                 object_pose = next_state.pose
             else:
                 object_pose = next_state.pose(self._objid)
@@ -113,7 +116,9 @@ def probability(self, observation, next_state, action, **kwargs):
 
         # Compute the probability
         zi = observation.pose
-        alpha, beta, gamma = self._compute_params(self._sensor.within_range(robot_pose, object_pose))
+        alpha, beta, gamma = self._compute_params(
+            self._sensor.within_range(robot_pose, object_pose)
+        )
 
         # Requires Python >= 3.6
         prob = 0.0
@@ -124,9 +129,9 @@ def probability(self, observation, next_state, action, **kwargs):
             # This has 0.0 probability.
             prob += 0.0 * alpha
         else:
-            gaussian = pomdp_py.Gaussian(list(object_pose),
-                                         [[self.sigma**2, 0],
-                                          [0, self.sigma**2]])
+            gaussian = pomdp_py.Gaussian(
+                list(object_pose), [[self.sigma**2, 0], [0, self.sigma**2]]
+            )
             prob += gaussian[zi] * alpha
 
         # Event B
@@ -137,7 +142,6 @@ def probability(self, observation, next_state, action, **kwargs):
         prob += pr_c * gamma
         return prob
 
-
     def sample(self, next_state, action, **kwargs):
         """Returns observation"""
         if not isinstance(action, LookAction):
@@ -148,21 +152,25 @@ def sample(self, next_state, action, **kwargs):
         object_pose = next_state.pose(self._objid)
 
         # Obtain observation according to distribution.
-        alpha, beta, gamma = self._compute_params(self._sensor.within_range(robot_pose, object_pose))
+        alpha, beta, gamma = self._compute_params(
+            self._sensor.within_range(robot_pose, object_pose)
+        )
 
         # Requires Python >= 3.6
-        event_occured = random.choices(["A", "B", "C"], weights=[alpha, beta, gamma], k=1)[0]
+        event_occured = random.choices(
+            ["A", "B", "C"], weights=[alpha, beta, gamma], k=1
+        )[0]
         zi = self._sample_zi(event_occured, next_state)
 
         return ObjectObservation(self._objid, zi)
 
     def argmax(self, next_state, action, **kwargs):
         # Obtain observation according to distribution.
-        alpha, beta, gamma = self._compute_params(self._sensor.within_range(robot_pose, object_pose))
+        alpha, beta, gamma = self._compute_params(
+            self._sensor.within_range(robot_pose, object_pose)
+        )
 
-        event_probs = {"A": alpha,
-                       "B": beta,
-                       "C": gamma}
+        event_probs = {"A": alpha, "B": beta, "C": gamma}
         event_occured = max(event_probs, key=lambda e: event_probs[e])
         zi = self._sample_zi(event_occured, next_state, argmax=True)
         return ObjectObservation(self._objid, zi)
@@ -170,9 +178,9 @@ def argmax(self, next_state, action, **kwargs):
     def _sample_zi(self, event, next_state, argmax=False):
         if event == "A":
             object_true_pose = next_state.object_pose(self._objid)
-            gaussian =  pomdp_py.Gaussian(list(object_true_pose),
-                                          [[self.sigma**2, 0],
-                                           [0, self.sigma**2]])
+            gaussian = pomdp_py.Gaussian(
+                list(object_true_pose), [[self.sigma**2, 0], [0, self.sigma**2]]
+            )
             if not argmax:
                 zi = gaussian.random()
             else:
@@ -183,23 +191,29 @@ def _sample_zi(self, event, next_state, argmax=False):
             # TODO: FIX. zi should ONLY come from the field of view.
             # There is currently no easy way to sample from the field of view.
             width, height = self._dim
-            zi = (random.randint(0, width),   # x axis
-                  random.randint(0, height))  # y axis
-        else: # event == C
+            zi = (
+                random.randint(0, width),  # x axis
+                random.randint(0, height),
+            )  # y axis
+        else:  # event == C
             zi = ObjectObservation.NULL
         return zi
 
 
 ### Unit test ###
 def unittest():
-    from ..env.env import make_laser_sensor,\
-        make_proximity_sensor, equip_sensors,\
-        interpret, interpret_robot_id
+    from ..env.env import (
+        make_laser_sensor,
+        make_proximity_sensor,
+        equip_sensors,
+        interpret,
+        interpret_robot_id,
+    )
+
     # Test within search region check,
     # and the observation model probability and
     # sampling functions.
-    worldmap =\
-        """
+    worldmap = """
         ..........
         ....T.....
         ......x...
@@ -208,34 +222,41 @@ def unittest():
         ....T.....
         ..........
         """
-       #0123456789
-       # 10 x 8
-    worldstr = equip_sensors(worldmap,
-                             {"r": make_laser_sensor(90, (1,5), 0.5, False)})
+    # 0123456789
+    # 10 x 8
+    worldstr = equip_sensors(worldmap, {"r": make_laser_sensor(90, (1, 5), 0.5, False)})
     env = interpret(worldstr)
     robot_id = interpret_robot_id("r")
     robot_pose = env.state.pose(robot_id)
 
     # within_range test
     sensor = env.sensors[robot_id]
-    assert sensor.within_range(robot_pose, (4,3)) == False
-    assert sensor.within_range(robot_pose, (5,3)) == True
-    assert sensor.within_range(robot_pose, (6,3)) == True
-    assert sensor.within_range(robot_pose, (7,2)) == True
-    assert sensor.within_range(robot_pose, (7,3)) == True
-    assert sensor.within_range(robot_pose, (4,3)) == False
-    assert sensor.within_range(robot_pose, (2,4)) == False
-    assert sensor.within_range(robot_pose, (4,1)) == False
-    assert sensor.within_range(robot_pose, (4,5)) == False
-    assert sensor.within_range(robot_pose, (0,0)) == False
+    assert sensor.within_range(robot_pose, (4, 3)) == False
+    assert sensor.within_range(robot_pose, (5, 3)) == True
+    assert sensor.within_range(robot_pose, (6, 3)) == True
+    assert sensor.within_range(robot_pose, (7, 2)) == True
+    assert sensor.within_range(robot_pose, (7, 3)) == True
+    assert sensor.within_range(robot_pose, (4, 3)) == False
+    assert sensor.within_range(robot_pose, (2, 4)) == False
+    assert sensor.within_range(robot_pose, (4, 1)) == False
+    assert sensor.within_range(robot_pose, (4, 5)) == False
+    assert sensor.within_range(robot_pose, (0, 0)) == False
 
     print(env.state)
 
     # observation model test
-    O0 = ObjectObservationModel(0, sensor, (env.width, env.length), sigma=0.01, epsilon=1)
-    O2 = ObjectObservationModel(2, sensor, (env.width, env.length), sigma=0.01, epsilon=1)
-    O3 = ObjectObservationModel(3, sensor, (env.width, env.length), sigma=0.01, epsilon=1)
-    O5 = ObjectObservationModel(5, sensor, (env.width, env.length), sigma=0.01, epsilon=1)
+    O0 = ObjectObservationModel(
+        0, sensor, (env.width, env.length), sigma=0.01, epsilon=1
+    )
+    O2 = ObjectObservationModel(
+        2, sensor, (env.width, env.length), sigma=0.01, epsilon=1
+    )
+    O3 = ObjectObservationModel(
+        3, sensor, (env.width, env.length), sigma=0.01, epsilon=1
+    )
+    O5 = ObjectObservationModel(
+        5, sensor, (env.width, env.length), sigma=0.01, epsilon=1
+    )
 
     z0 = O0.sample(env.state, Look)
     assert z0.pose == ObjectObservation.NULL
@@ -249,9 +270,12 @@ def unittest():
     assert O0.probability(z0, env.state, Look) == 1.0
     assert O2.probability(z2, env.state, Look) == 1.0
     assert O3.probability(z3, env.state, Look) >= 1.0
-    assert O3.probability(ObjectObservation(3, ObjectObservation.NULL),
-                          env.state, Look) == 0.0
+    assert (
+        O3.probability(ObjectObservation(3, ObjectObservation.NULL), env.state, Look)
+        == 0.0
+    )
     assert O5.probability(z5, env.state, Look) == 1.0
 
+
 if __name__ == "__main__":
     unittest()
diff --git a/pomdp_py/problems/multi_object_search/models/policy_model.py b/pomdp_py/problems/multi_object_search/models/policy_model.py
index d0011576..f59a0118 100644
--- a/pomdp_py/problems/multi_object_search/models/policy_model.py
+++ b/pomdp_py/problems/multi_object_search/models/policy_model.py
@@ -7,6 +7,7 @@
 import random
 from pomdp_py.problems.multi_object_search.domain.action import *
 
+
 class PolicyModel(pomdp_py.RolloutPolicy):
     """Simple policy model. All actions are possible at any state."""
 
@@ -38,10 +39,9 @@ def get_all_actions(self, state=None, history=None):
             return ALL_MOTION_ACTIONS | {Look} | find_action
         else:
             if self._grid_map is not None:
-                valid_motions =\
-                    self._grid_map.valid_motions(self.robot_id,
-                                                 state.pose(self.robot_id),
-                                                 ALL_MOTION_ACTIONS)
+                valid_motions = self._grid_map.valid_motions(
+                    self.robot_id, state.pose(self.robot_id), ALL_MOTION_ACTIONS
+                )
                 return valid_motions | {Look} | find_action
             else:
                 return ALL_MOTION_ACTIONS | {Look} | find_action
diff --git a/pomdp_py/problems/multi_object_search/models/reward_model.py b/pomdp_py/problems/multi_object_search/models/reward_model.py
index 4adc31bc..98ad7ddb 100644
--- a/pomdp_py/problems/multi_object_search/models/reward_model.py
+++ b/pomdp_py/problems/multi_object_search/models/reward_model.py
@@ -3,6 +3,7 @@
 import pomdp_py
 from pomdp_py.problems.multi_object_search.domain.action import *
 
+
 class MosRewardModel(pomdp_py.RewardModel):
     def __init__(self, target_objects, big=1000, small=1, robot_id=None):
         """
@@ -15,14 +16,15 @@ def __init__(self, target_objects, big=1000, small=1, robot_id=None):
         self.small = small
         self._target_objects = target_objects
 
-    def probability(self, reward, state, action, next_state, normalized=False, **kwargs):
+    def probability(
+        self, reward, state, action, next_state, normalized=False, **kwargs
+    ):
         if reward == self._reward_func(state, action):
             return 1.0
         else:
             return 0.0
 
-    def sample(self, state, action, next_state,
-               normalized=False, robot_id=None):
+    def sample(self, state, action, next_state, normalized=False, robot_id=None):
         # deterministic
         return self._reward_func(state, action, next_state, robot_id=robot_id)
 
@@ -30,21 +32,25 @@ def argmax(self, state, action, next_state, normalized=False, robot_id=None):
         """Returns the most likely reward"""
         return self._reward_func(state, action, next_state, robot_id=robot_id)
 
+
 class GoalRewardModel(MosRewardModel):
     """
     This is a reward where the agent gets reward only for detect-related actions.
     """
+
     def _reward_func(self, state, action, next_state, robot_id=None):
         if robot_id is None:
-            assert self._robot_id is not None,\
-                "Reward must be computed with respect to one robot."
+            assert (
+                self._robot_id is not None
+            ), "Reward must be computed with respect to one robot."
             robot_id = self._robot_id
 
         reward = 0
 
         # If the robot has detected all objects
-        if len(state.object_states[robot_id]['objects_found'])\
-           == len(self._target_objects):
+        if len(state.object_states[robot_id]["objects_found"]) == len(
+            self._target_objects
+        ):
             return 0  # no reward or penalty; the task is finished.
 
         if isinstance(action, MotionAction):
@@ -52,13 +58,15 @@ def _reward_func(self, state, action, next_state, robot_id=None):
         elif isinstance(action, LookAction):
             reward = reward - self.small
         elif isinstance(action, FindAction):
-            if state.object_states[robot_id]['camera_direction'] is None:
+            if state.object_states[robot_id]["camera_direction"] is None:
                 # The robot didn't look before detect. So nothing is in the field of view.
                 reward -= self.big
             else:
                 # transition function should've taken care of the detection.
-                new_objects_count = len(set(next_state.object_states[robot_id].objects_found)\
-                                        - set(state.object_states[robot_id].objects_found))
+                new_objects_count = len(
+                    set(next_state.object_states[robot_id].objects_found)
+                    - set(state.object_states[robot_id].objects_found)
+                )
                 if new_objects_count == 0:
                     # No new detection. "detect" is a bad action.
                     reward -= self.big
diff --git a/pomdp_py/problems/multi_object_search/models/transition_model.py b/pomdp_py/problems/multi_object_search/models/transition_model.py
index 4b0d28de..92e97c8a 100644
--- a/pomdp_py/problems/multi_object_search/models/transition_model.py
+++ b/pomdp_py/problems/multi_object_search/models/transition_model.py
@@ -8,12 +8,14 @@
 
 Transition: deterministic
 """
+
 import pomdp_py
 import copy
 from pomdp_py.problems.multi_object_search.domain.state import *
 from pomdp_py.problems.multi_object_search.domain.observation import *
 from pomdp_py.problems.multi_object_search.domain.action import *
 
+
 ####### Transition Model #######
 class MosTransitionModel(pomdp_py.OOTransitionModel):
     """Object-oriented transition model; The transition model supports the
@@ -21,22 +23,23 @@ class MosTransitionModel(pomdp_py.OOTransitionModel):
     multi-robot transition model should be used by the Environment, but
     not necessarily by each robot for planning.
     """
-    def __init__(self,
-                 dim, sensors, object_ids,
-                 epsilon=1e-9):
+
+    def __init__(self, dim, sensors, object_ids, epsilon=1e-9):
         """
         sensors (dict): robot_id -> Sensor
         for_env (bool): True if this is a robot transition model used by the
              Environment.  see RobotTransitionModel for details.
         """
         self._sensors = sensors
-        transition_models = {objid: StaticObjectTransitionModel(objid, epsilon=epsilon)
-                             for objid in object_ids
-                             if objid not in sensors}
+        transition_models = {
+            objid: StaticObjectTransitionModel(objid, epsilon=epsilon)
+            for objid in object_ids
+            if objid not in sensors
+        }
         for robot_id in sensors:
-            transition_models[robot_id] = RobotTransitionModel(sensors[robot_id],
-                                                               dim,
-                                                               epsilon=epsilon)
+            transition_models[robot_id] = RobotTransitionModel(
+                sensors[robot_id], dim, epsilon=epsilon
+            )
         super().__init__(transition_models)
 
     def sample(self, state, action, **kwargs):
@@ -47,14 +50,16 @@ def argmax(self, state, action, normalized=False, **kwargs):
         oostate = pomdp_py.OOTransitionModel.argmax(self, state, action, **kwargs)
         return MosOOState(oostate.object_states)
 
+
 class StaticObjectTransitionModel(pomdp_py.TransitionModel):
     """This model assumes the object is static."""
+
     def __init__(self, objid, epsilon=1e-9):
         self._objid = objid
         self._epsilon = epsilon
 
     def probability(self, next_object_state, state, action):
-        if next_object_state != state.object_states[next_object_state['id']]:
+        if next_object_state != state.object_states[next_object_state["id"]]:
             return self._epsilon
         else:
             return 1.0 - self._epsilon
@@ -70,6 +75,7 @@ def argmax(self, state, action):
 
 class RobotTransitionModel(pomdp_py.TransitionModel):
     """We assume that the robot control is perfect and transitions are deterministic."""
+
     def __init__(self, sensor, dim, epsilon=1e-9):
         """
         dim (tuple): a tuple (width, length) for the dimension of the world
@@ -81,8 +87,7 @@ def __init__(self, sensor, dim, epsilon=1e-9):
         self._epsilon = epsilon
 
     @classmethod
-    def if_move_by(cls, robot_id, state, action, dim,
-                   check_collision=True):
+    def if_move_by(cls, robot_id, state, action, dim, check_collision=True):
         """Defines the dynamics of robot motion;
         dim (tuple): the width, length of the search world."""
         if not isinstance(action, MotionAction):
@@ -99,15 +104,18 @@ def if_move_by(cls, robot_id, state, action, dim,
             # odometry motion model
             forward, angle = action.motion
             rth += angle  # angle (radian)
-            rx = int(round(rx + forward*math.cos(rth)))
-            ry = int(round(ry + forward*math.sin(rth)))
-            rth = rth % (2*math.pi)
-
-        if valid_pose((rx, ry, rth),
-                      dim[0], dim[1],
-                      state=state,
-                      check_collision=check_collision,
-                      pose_objid=robot_id):
+            rx = int(round(rx + forward * math.cos(rth)))
+            ry = int(round(ry + forward * math.sin(rth)))
+            rth = rth % (2 * math.pi)
+
+        if valid_pose(
+            (rx, ry, rth),
+            dim[0],
+            dim[1],
+            state=state,
+            check_collision=check_collision,
+            pose_objid=robot_id,
+        ):
             return (rx, ry, rth)
         else:
             return robot_pose  # no change because change results in invalid pose
@@ -127,29 +135,34 @@ def argmax(self, state, action):
 
         next_robot_state = copy.deepcopy(robot_state)
         # camera direction is only not None when looking
-        next_robot_state['camera_direction'] = None
+        next_robot_state["camera_direction"] = None
         if isinstance(action, MotionAction):
             # motion action
-            next_robot_state['pose'] = \
-                RobotTransitionModel.if_move_by(self._robot_id,
-                                                state, action, self._dim)
+            next_robot_state["pose"] = RobotTransitionModel.if_move_by(
+                self._robot_id, state, action, self._dim
+            )
         elif isinstance(action, LookAction):
             if hasattr(action, "motion") and action.motion is not None:
                 # rotate the robot
-                next_robot_state['pose'] = \
-                    self._if_move_by(self._robot_id,
-                                     state, action, self._dim)
-            next_robot_state['camera_direction'] = action.name
+                next_robot_state["pose"] = self._if_move_by(
+                    self._robot_id, state, action, self._dim
+                )
+            next_robot_state["camera_direction"] = action.name
         elif isinstance(action, FindAction):
             robot_pose = state.pose(self._robot_id)
             z = self._sensor.observe(robot_pose, state)
             # Update "objects_found" set for target objects
-            observed_target_objects = {objid
-                                       for objid in z.objposes
-                                       if (state.object_states[objid].objclass == "target"\
-                                           and z.objposes[objid] != ObjectObservation.NULL)}
-            next_robot_state["objects_found"] = tuple(set(next_robot_state['objects_found'])\
-                                                      | set(observed_target_objects))
+            observed_target_objects = {
+                objid
+                for objid in z.objposes
+                if (
+                    state.object_states[objid].objclass == "target"
+                    and z.objposes[objid] != ObjectObservation.NULL
+                )
+            }
+            next_robot_state["objects_found"] = tuple(
+                set(next_robot_state["objects_found"]) | set(observed_target_objects)
+            )
         return next_robot_state
 
     def sample(self, state, action):
@@ -173,19 +186,19 @@ def valid_pose(pose, width, length, state=None, check_collision=True, pose_objid
             if state.object_states[objid].objclass.startswith("obstacle"):
                 if objid == pose_objid:
                     continue
-                if (x,y) == object_poses[objid]:
+                if (x, y) == object_poses[objid]:
                     return False
     return in_boundary(pose, width, length)
 
 
 def in_boundary(pose, width, length):
     # Check if in boundary
-    x,y = pose[:2]
+    x, y = pose[:2]
     if x >= 0 and x < width:
         if y >= 0 and y < length:
             if len(pose) == 3:
                 th = pose[2]  # radian
-                if th < 0 or th > 2*math.pi:
+                if th < 0 or th > 2 * math.pi:
                     return False
             return True
     return False
diff --git a/pomdp_py/problems/multi_object_search/problem.py b/pomdp_py/problems/multi_object_search/problem.py
index c62f6582..b3926b89 100644
--- a/pomdp_py/problems/multi_object_search/problem.py
+++ b/pomdp_py/problems/multi_object_search/problem.py
@@ -2,6 +2,7 @@
 Uses the domain, models, and agent/environment
 to actually define the POMDP problem for multi-object search.
 Then, solve it using POUCT or POMCP."""
+
 import pomdp_py
 from pomdp_py.problems.multi_object_search.env.env import *
 from pomdp_py.problems.multi_object_search.env.visual import *
@@ -13,6 +14,7 @@
 import time
 import random
 
+
 class MosOOPOMDP(pomdp_py.OOPOMDP):
     """
     A MosOOPOMDP is instantiated given a string description
@@ -27,10 +29,20 @@ class MosOOPOMDP(pomdp_py.OOPOMDP):
     could construct an Environment object and give None to
     the object poses.
     """
-    def __init__(self, robot_id, env=None, grid_map=None,
-                 sensors=None, sigma=0.01, epsilon=1,
-                 belief_rep="histogram", prior={}, num_particles=100,
-                 agent_has_map=False):
+
+    def __init__(
+        self,
+        robot_id,
+        env=None,
+        grid_map=None,
+        sensors=None,
+        sigma=0.01,
+        epsilon=1,
+        belief_rep="histogram",
+        prior={},
+        num_particles=100,
+        agent_has_map=False,
+    ):
         """
         Args:
             robot_id (int or str): the id of the agent that will solve this MosOOPOMDP.
@@ -59,15 +71,14 @@ def __init__(self, robot_id, env=None, grid_map=None,
             num_particles (int): setting for the particle belief representation
         """
         if env is None:
-            assert grid_map is not None and sensors is not None,\
-                "Since env is not provided, you must provide string descriptions"\
+            assert grid_map is not None and sensors is not None, (
+                "Since env is not provided, you must provide string descriptions"
                 "of the world and sensors."
+            )
             worldstr = equip_sensors(grid_map, sensors)
             dim, robots, objects, obstacles, sensors = interpret(worldstr)
             init_state = MosOOState({**objects, **robots})
-            env = MosEnvironment(dim,
-                          init_state, sensors,
-                          obstacles=obstacles)
+            env = MosEnvironment(dim, init_state, sensors, obstacles=obstacles)
 
         # construct prior
         if type(prior) == str:
@@ -84,22 +95,33 @@ def __init__(self, robot_id, env=None, grid_map=None,
         # only defined over a single agent. Perhaps, MultiAgent is just a kind
         # of Agent, which will make the implementation of multi-agent POMDP cleaner.
         robot_id = robot_id if type(robot_id) == int else interpret_robot_id(robot_id)
-        grid_map = GridMap(env.width, env.length,
-                           {objid: env.state.pose(objid)
-                            for objid in env.obstacles}) if agent_has_map else None
-        agent = MosAgent(robot_id,
-                         env.state.object_states[robot_id],
-                         env.target_objects,
-                         (env.width, env.length),
-                         env.sensors[robot_id],
-                         sigma=sigma,
-                         epsilon=epsilon,
-                         belief_rep=belief_rep,
-                         prior=prior,
-                         num_particles=num_particles,
-                         grid_map=grid_map)
-        super().__init__(agent, env,
-                         name="MOS(%d,%d,%d)" % (env.width, env.length, len(env.target_objects)))
+        grid_map = (
+            GridMap(
+                env.width,
+                env.length,
+                {objid: env.state.pose(objid) for objid in env.obstacles},
+            )
+            if agent_has_map
+            else None
+        )
+        agent = MosAgent(
+            robot_id,
+            env.state.object_states[robot_id],
+            env.target_objects,
+            (env.width, env.length),
+            env.sensors[robot_id],
+            sigma=sigma,
+            epsilon=epsilon,
+            belief_rep=belief_rep,
+            prior=prior,
+            num_particles=num_particles,
+            grid_map=grid_map,
+        )
+        super().__init__(
+            agent,
+            env,
+            name="MOS(%d,%d,%d)" % (env.width, env.length, len(env.target_objects)),
+        )
 
 
 ### Belief Update ###
@@ -146,31 +168,37 @@ def belief_update(agent, real_action, real_observation, next_robot_state, planne
                     # observation model from O(oi|s',a) to O(label_i|s',a) and
                     # it becomes easy to define O(label_i=i|s',a) and O(label_i=FREE|s',a).
                     # These ideas are used in my recent 3D object search work.
-                    new_belief = pomdp_py.update_histogram_belief(belief_obj,
-                                                                  real_action,
-                                                                  real_observation.for_obj(objid),
-                                                                  agent.observation_model[objid],
-                                                                  agent.transition_model[objid],
-                                                                  # The agent knows the objects are static.
-                                                                  static_transition=objid != agent.robot_id,
-                                                                  oargs={"next_robot_state": next_robot_state})
+                    new_belief = pomdp_py.update_histogram_belief(
+                        belief_obj,
+                        real_action,
+                        real_observation.for_obj(objid),
+                        agent.observation_model[objid],
+                        agent.transition_model[objid],
+                        # The agent knows the objects are static.
+                        static_transition=objid != agent.robot_id,
+                        oargs={"next_robot_state": next_robot_state},
+                    )
             else:
-                raise ValueError("Unexpected program state."\
-                                 "Are you using the appropriate belief representation?")
+                raise ValueError(
+                    "Unexpected program state."
+                    "Are you using the appropriate belief representation?"
+                )
 
             agent.cur_belief.set_object_belief(objid, new_belief)
 
 
 ### Solve the problem with POUCT/POMCP planner ###
 ### This is the main online POMDP solver logic ###
-def solve(problem,
-          max_depth=10,  # planning horizon
-          discount_factor=0.99,
-          planning_time=1.,       # amount of time (s) to plan each step
-          exploration_const=1000, # exploration constant
-          visualize=True,
-          max_time=120,  # maximum amount of time allowed to solve the problem
-          max_steps=500): # maximum number of planning steps the agent can take.
+def solve(
+    problem,
+    max_depth=10,  # planning horizon
+    discount_factor=0.99,
+    planning_time=1.0,  # amount of time (s) to plan each step
+    exploration_const=1000,  # exploration constant
+    visualize=True,
+    max_time=120,  # maximum amount of time allowed to solve the problem
+    max_steps=500,
+):  # maximum number of planning steps the agent can take.
     """
     This function terminates when:
     - maximum time (max_time) reached; This time includes planning and updates
@@ -185,31 +213,35 @@ def solve(problem,
     random_object_belief = problem.agent.belief.object_beliefs[random_objid]
     if isinstance(random_object_belief, pomdp_py.Histogram):
         # Use POUCT
-        planner = pomdp_py.POUCT(max_depth=max_depth,
-                                 discount_factor=discount_factor,
-                                 planning_time=planning_time,
-                                 exploration_const=exploration_const,
-                                 rollout_policy=problem.agent.policy_model)  # Random by default
+        planner = pomdp_py.POUCT(
+            max_depth=max_depth,
+            discount_factor=discount_factor,
+            planning_time=planning_time,
+            exploration_const=exploration_const,
+            rollout_policy=problem.agent.policy_model,
+        )  # Random by default
     elif isinstance(random_object_belief, pomdp_py.Particles):
         # Use POMCP
-        planner = pomdp_py.POMCP(max_depth=max_depth,
-                                 discount_factor=discount_factor,
-                                 planning_time=planning_time,
-                                 exploration_const=exploration_const,
-                                 rollout_policy=problem.agent.policy_model)  # Random by default
+        planner = pomdp_py.POMCP(
+            max_depth=max_depth,
+            discount_factor=discount_factor,
+            planning_time=planning_time,
+            exploration_const=exploration_const,
+            rollout_policy=problem.agent.policy_model,
+        )  # Random by default
     else:
-        raise ValueError("Unsupported object belief type %s" % str(type(random_object_belief)))
+        raise ValueError(
+            "Unsupported object belief type %s" % str(type(random_object_belief))
+        )
 
     robot_id = problem.agent.robot_id
     if visualize:
-        viz = MosViz(problem.env, controllable=False)  # controllable=False means no keyboard control.
+        viz = MosViz(
+            problem.env, controllable=False
+        )  # controllable=False means no keyboard control.
         if viz.on_init() == False:
             raise Exception("Environment failed to initialize")
-        viz.update(robot_id,
-                   None,
-                   None,
-                   None,
-                   problem.agent.cur_belief)
+        viz.update(robot_id, None, None, None, problem.agent.cur_belief)
         viz.on_render()
 
     _time_used = 0
@@ -224,32 +256,38 @@ def solve(problem,
             break  # no more time to update.
 
         # Execute action
-        reward = problem.env.state_transition(real_action, execute=True,
-                                              robot_id=robot_id)
+        reward = problem.env.state_transition(
+            real_action, execute=True, robot_id=robot_id
+        )
 
         # Receive observation
         _start = time.time()
-        real_observation = \
-            problem.env.provide_observation(problem.agent.observation_model, real_action)
+        real_observation = problem.env.provide_observation(
+            problem.agent.observation_model, real_action
+        )
 
         # Updates
         problem.agent.clear_history()  # truncate history
         problem.agent.update_history(real_action, real_observation)
-        belief_update(problem.agent, real_action, real_observation,
-                      problem.env.state.object_states[robot_id],
-                      planner)
+        belief_update(
+            problem.agent,
+            real_action,
+            real_observation,
+            problem.env.state.object_states[robot_id],
+            planner,
+        )
         _time_used += time.time() - _start
 
         # Info and render
         _total_reward += reward
         if isinstance(real_action, FindAction):
             _find_actions_count += 1
-        print("==== Step %d ====" % (i+1))
+        print("==== Step %d ====" % (i + 1))
         print("Action: %s" % str(real_action))
         print("Observation: %s" % str(real_observation))
         print("Reward: %s" % str(reward))
         print("Reward (Cumulative): %s" % str(_total_reward))
-        print("Find Actions Count: %d" %  _find_actions_count)
+        print("Find Actions Count: %d" % _find_actions_count)
         if isinstance(planner, pomdp_py.POUCT):
             print("__num_sims__: %d" % planner.last_num_sims)
 
@@ -258,21 +296,27 @@ def solve(problem,
             # according to observation model.
             robot_pose = problem.env.state.object_states[robot_id].pose
             viz_observation = MosOOObservation({})
-            if isinstance(real_action, LookAction) or isinstance(real_action, FindAction):
-                viz_observation = \
-                    problem.env.sensors[robot_id].observe(robot_pose,
-                                                          problem.env.state)
-            viz.update(robot_id,
-                       real_action,
-                       real_observation,
-                       viz_observation,
-                       problem.agent.cur_belief)
+            if isinstance(real_action, LookAction) or isinstance(
+                real_action, FindAction
+            ):
+                viz_observation = problem.env.sensors[robot_id].observe(
+                    robot_pose, problem.env.state
+                )
+            viz.update(
+                robot_id,
+                real_action,
+                real_observation,
+                viz_observation,
+                problem.agent.cur_belief,
+            )
             viz.on_loop()
             viz.on_render()
 
         # Termination check
-        if set(problem.env.state.object_states[robot_id].objects_found)\
-           == problem.env.target_objects:
+        if (
+            set(problem.env.state.object_states[robot_id].objects_found)
+            == problem.env.target_objects
+        ):
             print("Done!")
             break
         if _find_actions_count >= len(problem.env.target_objects):
@@ -282,27 +326,33 @@ def solve(problem,
             print("Maximum time reached.")
             break
 
+
 # Test
 def unittest():
     # random world
     grid_map, robot_char = random_world(10, 10, 5, 10)
     laserstr = make_laser_sensor(90, (1, 4), 0.5, False)
     proxstr = make_proximity_sensor(4, False)
-    problem = MosOOPOMDP(robot_char,  # r is the robot character
-                         sigma=0.05,  # observation model parameter
-                         epsilon=0.95, # observation model parameter
-                         grid_map=grid_map,
-                         sensors={robot_char: proxstr},
-                         prior="uniform",
-                         agent_has_map=True)
-    solve(problem,
-          max_depth=10,
-          discount_factor=0.99,
-          planning_time=1.,
-          exploration_const=1000,
-          visualize=True,
-          max_time=120,
-          max_steps=500)
+    problem = MosOOPOMDP(
+        robot_char,  # r is the robot character
+        sigma=0.05,  # observation model parameter
+        epsilon=0.95,  # observation model parameter
+        grid_map=grid_map,
+        sensors={robot_char: proxstr},
+        prior="uniform",
+        agent_has_map=True,
+    )
+    solve(
+        problem,
+        max_depth=10,
+        discount_factor=0.99,
+        planning_time=1.0,
+        exploration_const=1000,
+        visualize=True,
+        max_time=120,
+        max_steps=500,
+    )
+
 
 if __name__ == "__main__":
     unittest()
diff --git a/pomdp_py/problems/rocksample/rocksample_problem.py b/pomdp_py/problems/rocksample/rocksample_problem.py
index 8e8036ef..c02d5efe 100644
--- a/pomdp_py/problems/rocksample/rocksample_problem.py
+++ b/pomdp_py/problems/rocksample/rocksample_problem.py
@@ -31,6 +31,7 @@
 
 Initial belief: every rock has equal probability of being Good or Bad.
 """
+
 import pomdp_py
 import random
 import math
@@ -40,26 +41,31 @@
 
 EPSILON = 1e-9
 
+
 def euclidean_dist(p1, p2):
-    return math.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)
+    return math.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2)
+
 
 class RockType:
-    GOOD = 'good'
-    BAD = 'bad'
+    GOOD = "good"
+    BAD = "bad"
+
     @staticmethod
     def invert(rocktype):
-        if rocktype == 'good':
-            return 'bad'
+        if rocktype == "good":
+            return "bad"
         else:
-            return 'good'
+            return "good"
         # return 1 - rocktype
+
     @staticmethod
     def random(p=0.5):
-        if random.uniform(0,1) >= p:
+        if random.uniform(0, 1) >= p:
             return RockType.GOOD
         else:
             return RockType.BAD
 
+
 class State(pomdp_py.State):
     def __init__(self, position, rocktypes, terminal=False):
         """
@@ -79,11 +85,14 @@ def __init__(self, position, rocktypes, terminal=False):
 
     def __hash__(self):
         return hash((self.position, self.rocktypes, self.terminal))
+
     def __eq__(self, other):
         if isinstance(other, State):
-            return self.position == other.position\
-                and self.rocktypes == other.rocktypes\
+            return (
+                self.position == other.position
+                and self.rocktypes == other.rocktypes
                 and self.terminal == other.terminal
+            )
         else:
             return False
 
@@ -91,20 +100,29 @@ def __str__(self):
         return self.__repr__()
 
     def __repr__(self):
-        return "State(%s | %s | %s)" % (str(self.position), str(self.rocktypes), str(self.terminal))
+        return "State(%s | %s | %s)" % (
+            str(self.position),
+            str(self.rocktypes),
+            str(self.terminal),
+        )
+
 
 class Action(pomdp_py.Action):
     def __init__(self, name):
         self.name = name
+
     def __hash__(self):
         return hash(self.name)
+
     def __eq__(self, other):
         if isinstance(other, Action):
             return self.name == other.name
         elif type(other) == str:
             return self.name == other
+
     def __str__(self):
         return self.name
+
     def __repr__(self):
         return "Action(%s)" % self.name
 
@@ -114,62 +132,77 @@ class MoveAction(Action):
     WEST = (-1, 0)
     NORTH = (0, -1)
     SOUTH = (0, 1)
+
     def __init__(self, motion, name):
-        if motion not in {MoveAction.EAST, MoveAction.WEST,
-                          MoveAction.NORTH, MoveAction.SOUTH}:
+        if motion not in {
+            MoveAction.EAST,
+            MoveAction.WEST,
+            MoveAction.NORTH,
+            MoveAction.SOUTH,
+        }:
             raise ValueError("Invalid move motion %s" % motion)
         self.motion = motion
         super().__init__("move-%s" % str(name))
 
+
 MoveEast = MoveAction(MoveAction.EAST, "EAST")
 MoveWest = MoveAction(MoveAction.WEST, "WEST")
 MoveNorth = MoveAction(MoveAction.NORTH, "NORTH")
 MoveSouth = MoveAction(MoveAction.SOUTH, "SOUTH")
 
+
 class SampleAction(Action):
     def __init__(self):
         super().__init__("sample")
 
+
 class CheckAction(Action):
     def __init__(self, rock_id):
         self.rock_id = rock_id
         super().__init__("check-%d" % self.rock_id)
 
+
 class Observation(pomdp_py.Observation):
     def __init__(self, quality):
         self.quality = quality
+
     def __hash__(self):
         return hash(self.quality)
+
     def __eq__(self, other):
         if isinstance(other, Observation):
             return self.quality == other.quality
         elif type(other) == str:
             return self.quality == other
+
     def __str__(self):
         return str(self.quality)
+
     def __repr__(self):
         return "Observation(%s)" % str(self.quality)
 
-class RSTransitionModel(pomdp_py.TransitionModel):
 
-    """ The model is deterministic """
+class RSTransitionModel(pomdp_py.TransitionModel):
+    """The model is deterministic"""
 
     def __init__(self, n, rock_locs, in_exit_area):
         """
         rock_locs: a map from (x,y) location to rock_id
-        in_exit_area: a function (x,y) -> Bool that returns True if (x,y) is in exit area"""
+        in_exit_area: a function (x,y) -> Bool that returns True if (x,y) is in exit area
+        """
         self._n = n
         self._rock_locs = rock_locs
         self._in_exit_area = in_exit_area
 
     def _move_or_exit(self, position, action):
-        expected = (position[0] + action.motion[0],
-                    position[1] + action.motion[1])
+        expected = (position[0] + action.motion[0], position[1] + action.motion[1])
         if self._in_exit_area(expected):
             return expected, True
         else:
-            return (max(0, min(position[0] + action.motion[0], self._n-1)),
-                    max(0, min(position[1] + action.motion[1], self._n-1))), False
+            return (
+                max(0, min(position[0] + action.motion[0], self._n - 1)),
+                max(0, min(position[1] + action.motion[1], self._n - 1)),
+            ), False
 
     def probability(self, next_state, state, action, normalized=False, **kwargs):
         if next_state != self.sample(state, action):
@@ -205,7 +238,7 @@ def argmax(self, state, action):
 class RSObservationModel(pomdp_py.ObservationModel):
     def __init__(self, rock_locs, half_efficiency_dist=20):
         self._half_efficiency_dist = half_efficiency_dist
-        self._rocks = {rock_locs[pos]:pos for pos in rock_locs}
+        self._rocks = {rock_locs[pos]: pos for pos in rock_locs}
 
     def probability(self, observation, next_state, action):
         if isinstance(action, CheckAction):
@@ -283,12 +316,15 @@ def sample(self, state, action, next_state, normalized=False, **kwargs):
     def argmax(self, state, action, next_state, normalized=False, **kwargs):
         raise NotImplementedError
 
-    def probability(self, reward, state, action, next_state, normalized=False, **kwargs):
+    def probability(
+        self, reward, state, action, next_state, normalized=False, **kwargs
+    ):
         raise NotImplementedError
 
 
 class RSPolicyModel(pomdp_py.RolloutPolicy):
     """Simple policy model according to problem description."""
+
     def __init__(self, n, k):
         check_actions = set({CheckAction(rock_id) for rock_id in range(k)})
         self._move_actions = {MoveEast, MoveWest, MoveNorth, MoveSouth}
@@ -326,13 +362,11 @@ def rollout(self, state, history=None):
 
 
 class RockSampleProblem(pomdp_py.POMDP):
-
     @staticmethod
     def random_free_location(n, not_free_locs):
         """returns a random (x,y) location in nxn grid that is free."""
         while True:
-            loc = (random.randint(0, n-1),
-                   random.randint(0, n-1))
+            loc = (random.randint(0, n - 1), random.randint(0, n - 1))
             if loc not in not_free_locs:
                 return loc
 
@@ -343,10 +377,12 @@ def in_exit_area(self, pos):
     def generate_instance(n, k):
         """Returns init_state and rock locations for an instance of RockSample(n,k)"""
 
-        rover_position = (0, random.randint(0, n-1))
+        rover_position = (0, random.randint(0, n - 1))
         rock_locs = {}  # map from rock location to rock id
         for i in range(k):
-            loc = RockSampleProblem.random_free_location(n, set(rock_locs.keys()) | set({rover_position}))
+            loc = RockSampleProblem.random_free_location(
+                n, set(rock_locs.keys()) | set({rover_position})
+            )
             rock_locs[loc] = i
 
         # random rocktypes
@@ -363,29 +399,29 @@ def print_state(self):
         rocktypes = self.env.state.rocktypes
         # Rock id map
         for y in range(self._n):
-            for x in range(self._n+1):
+            for x in range(self._n + 1):
                 char = "."
                 if x == self._n:
                     char = ">"
-                if (x,y) in self._rock_locs:
-                    char = str(self._rock_locs[(x,y)])
-                if (x,y) == rover_position:
+                if (x, y) in self._rock_locs:
+                    char = str(self._rock_locs[(x, y)])
+                if (x, y) == rover_position:
                     char = "R"
                 string += char
             string += "\n"
         string += "_____G/B_____\n"
         # Good/bad map
         for y in range(self._n):
-            for x in range(self._n+1):
+            for x in range(self._n + 1):
                 char = "."
                 if x == self._n:
                     char = ">"
-                if (x,y) in self._rock_locs:
-                    if rocktypes[self._rock_locs[(x,y)]] == RockType.GOOD:
+                if (x, y) in self._rock_locs:
+                    if rocktypes[self._rock_locs[(x, y)]] == RockType.GOOD:
                         char = "$"
                     else:
                         char = "x"
-                if (x,y) == rover_position:
+                if (x, y) == rover_position:
                     char = "R"
                 string += char
             string += "\n"
@@ -393,14 +429,18 @@ def print_state(self):
 
     def __init__(self, n, k, init_state, rock_locs, init_belief):
         self._n, self._k = n, k
-        agent = pomdp_py.Agent(init_belief,
-                               RSPolicyModel(n,k),
-                               RSTransitionModel(n, rock_locs, self.in_exit_area),
-                               RSObservationModel(rock_locs),
-                               RSRewardModel(rock_locs, self.in_exit_area))
-        env = pomdp_py.Environment(init_state,
-                                   RSTransitionModel(n, rock_locs, self.in_exit_area),
-                                   RSRewardModel(rock_locs, self.in_exit_area))
+        agent = pomdp_py.Agent(
+            init_belief,
+            RSPolicyModel(n, k),
+            RSTransitionModel(n, rock_locs, self.in_exit_area),
+            RSObservationModel(rock_locs),
+            RSRewardModel(rock_locs, self.in_exit_area),
+        )
+        env = pomdp_py.Environment(
+            init_state,
+            RSTransitionModel(n, rock_locs, self.in_exit_area),
+            RSRewardModel(rock_locs, self.in_exit_area),
+        )
         self._rock_locs = rock_locs
         super().__init__(agent, env, name="RockSampleProblem")
 
@@ -410,7 +450,7 @@ def test_planner(rocksample, planner, nsteps=3, discount=0.95):
     total_reward = 0
     total_discounted_reward = 0
     for i in range(nsteps):
-        print("==== Step %d ====" % (i+1))
+        print("==== Step %d ====" % (i + 1))
         action = planner.plan(rocksample.agent)
         # pomdp_py.visual.visualize_pouct_search_tree(rocksample.agent.tree,
         #                                             max_depth=5, anonymize=False)
@@ -419,8 +459,9 @@ def test_planner(rocksample, planner, nsteps=3, discount=0.95):
         env_reward = rocksample.env.state_transition(action, execute=True)
         true_next_state = copy.deepcopy(rocksample.env.state)
 
-        real_observation = rocksample.env.provide_observation(rocksample.agent.observation_model,
-                                                              action)
+        real_observation = rocksample.env.provide_observation(
+            rocksample.agent.observation_model, action
+        )
         rocksample.agent.update_history(action, real_observation)
         planner.update(rocksample.agent, action, real_observation)
         total_reward += env_reward
@@ -460,6 +501,7 @@ def init_particles_belief(k, num_particles, init_state, belief="uniform"):
     init_belief = pomdp_py.Particles(particles)
     return init_belief
 
+
 def main():
     n, k = 5, 5
     init_state, rock_locs = RockSampleProblem.generate_instance(n, k)
@@ -486,10 +528,14 @@ def main():
     rocksample.print_state()
 
     print("*** Testing POMCP ***")
-    pomcp = pomdp_py.POMCP(max_depth=20, discount_factor=0.95,
-                           num_sims=10000, exploration_const=20,
-                           rollout_policy=rocksample.agent.policy_model,
-                           num_visits_init=1)
+    pomcp = pomdp_py.POMCP(
+        max_depth=20,
+        discount_factor=0.95,
+        num_sims=10000,
+        exploration_const=20,
+        rollout_policy=rocksample.agent.policy_model,
+        num_visits_init=1,
+    )
     tt, ttd = test_planner(rocksample, pomcp, nsteps=100, discount=0.95)
 
     rocksample.env.state.position = init_state_copy.position
@@ -499,5 +545,5 @@ def main():
     rocksample.agent.set_belief(init_belief)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/pomdp_py/problems/tag/__init__.py b/pomdp_py/problems/tag/__init__.py
index 8b137891..e69de29b 100644
--- a/pomdp_py/problems/tag/__init__.py
+++ b/pomdp_py/problems/tag/__init__.py
@@ -1 +0,0 @@
-
diff --git a/pomdp_py/problems/tag/agent/agent.py b/pomdp_py/problems/tag/agent/agent.py
index dd84a67b..1a166d9b 100644
--- a/pomdp_py/problems/tag/agent/agent.py
+++ b/pomdp_py/problems/tag/agent/agent.py
@@ -11,6 +11,7 @@
 from pomdp_py.problems.tag.models.components.motion_policy import *
 from pomdp_py.problems.tag.models.components.grid_map import *
 
+
 ## initialize belief
 def initialize_belief(grid_map, init_robot_position, prior={}):
     """Initialize belief.
@@ -28,12 +29,12 @@ def initialize_belief(grid_map, init_robot_position, prior={}):
 
     for x in range(grid_map.width):
         for y in range(grid_map.length):
-            if (x,y) in grid_map.obstacle_poses:
+            if (x, y) in grid_map.obstacle_poses:
                 # Skip obstacles
                 continue
-            state = TagState(init_robot_position, (x,y), False)
+            state = TagState(init_robot_position, (x, y), False)
             if len(prior) > 0:
-                if (x,y) not in prior:
+                if (x, y) not in prior:
                     hist[state] = 1e-9
             else:
                 hist[state] = 1.0
@@ -45,7 +46,10 @@ def initialize_belief(grid_map, init_robot_position, prior={}):
     hist_belief = pomdp_py.Histogram(hist)
     return hist_belief
 
-def initialize_particles_belief(grid_map, init_robot_position, num_particles=100, prior={}):
+
+def initialize_particles_belief(
+    grid_map, init_robot_position, num_particles=100, prior={}
+):
     """Initialize belief.
 
     Args:
@@ -63,8 +67,10 @@ def initialize_particles_belief(grid_map, init_robot_position, num_particles=100
                 particles.append(state)
     else:
         while len(particles) < num_particles:
-            target_position = (random.randint(0, grid_map.width-1),
-                               random.randint(0, grid_map.length-1))
+            target_position = (
+                random.randint(0, grid_map.width - 1),
+                random.randint(0, grid_map.length - 1),
+            )
             if target_position in grid_map.obstacle_poses:
                 # Skip obstacles
                 continue
@@ -77,7 +83,9 @@ def initialize_particles_belief(grid_map, init_robot_position, num_particles=100
 def belief_update(agent, real_action, real_observation):
     # Update agent belief
     current_mpe_state = agent.cur_belief.mpe()
-    next_robot_position = agent.transition_model.sample(current_mpe_state, real_action).robot_position
+    next_robot_position = agent.transition_model.sample(
+        current_mpe_state, real_action
+    ).robot_position
 
     next_state_space = set({})
     for state in agent.cur_belief:
@@ -86,33 +94,32 @@ def belief_update(agent, real_action, real_observation):
         next_state_space.add(next_state)
 
     new_belief = pomdp_py.update_histogram_belief(
-        agent.cur_belief, real_action, real_observation,
-        agent.observation_model, agent.transition_model,
-        next_state_space=next_state_space)
+        agent.cur_belief,
+        real_action,
+        real_observation,
+        agent.observation_model,
+        agent.transition_model,
+        next_state_space=next_state_space,
+    )
 
     agent.set_belief(new_belief)
 
-class TagAgent(pomdp_py.Agent):
 
-    def __init__(self,
-                 init_belief,
-                 grid_map,
-                 pr_stay=0.2,
-                 small=1,
-                 big=10):
+class TagAgent(pomdp_py.Agent):
+    def __init__(self, init_belief, grid_map, pr_stay=0.2, small=1, big=10):
         self._grid_map = grid_map
-        target_motion_policy = TagTargetMotionPolicy(grid_map,
-                                                     pr_stay)
-        transition_model = TagTransitionModel(grid_map,
-                                              target_motion_policy)
+        target_motion_policy = TagTargetMotionPolicy(grid_map, pr_stay)
+        transition_model = TagTransitionModel(grid_map, target_motion_policy)
         reward_model = TagRewardModel(small=small, big=big)
         observation_model = TagObservationModel()
         policy_model = TagPolicyModel(grid_map=grid_map)
-        super().__init__(init_belief,
-                         policy_model,
-                         transition_model=transition_model,
-                         observation_model=observation_model,
-                         reward_model=reward_model)
+        super().__init__(
+            init_belief,
+            policy_model,
+            transition_model=transition_model,
+            observation_model=observation_model,
+            reward_model=reward_model,
+        )
 
     def clear_history(self):
         """Custum function; clear history"""
diff --git a/pomdp_py/problems/tag/domain/action.py b/pomdp_py/problems/tag/domain/action.py
index 04694473..5b73eafd 100644
--- a/pomdp_py/problems/tag/domain/action.py
+++ b/pomdp_py/problems/tag/domain/action.py
@@ -6,11 +6,18 @@
 
 # Reuses the actions in the multi object search domain
 import pomdp_py
-from pomdp_py.problems.multi_object_search.domain.action\
-    import Action, MotionAction, MoveEast2D, MoveWest2D, MoveSouth2D, MoveNorth2D
+from pomdp_py.problems.multi_object_search.domain.action import (
+    Action,
+    MotionAction,
+    MoveEast2D,
+    MoveWest2D,
+    MoveSouth2D,
+    MoveNorth2D,
+)
 
 MOTION_ACTIONS = {MoveEast2D, MoveWest2D, MoveSouth2D, MoveNorth2D}
 
+
 class TagAction(Action):
     def __init__(self):
         super().__init__("tag")
diff --git a/pomdp_py/problems/tag/domain/observation.py b/pomdp_py/problems/tag/domain/observation.py
index 7bfe77b9..ae341f3b 100644
--- a/pomdp_py/problems/tag/domain/observation.py
+++ b/pomdp_py/problems/tag/domain/observation.py
@@ -4,8 +4,10 @@
 Observation space: the agent observes the target's location when the agent and
     the target are in the same cell.
 """
+
 import pomdp_py
 
+
 class TagObservation(pomdp_py.Observation):
     def __init__(self, target_position):
         self.target_position = target_position
@@ -20,5 +22,4 @@ def __eq__(self, other):
             return self.target_position == other.target_position
 
     def __str__(self):
-        return 'Observation(%s)' % (str(self.target_position))
-
+        return "Observation(%s)" % (str(self.target_position))
diff --git a/pomdp_py/problems/tag/domain/state.py b/pomdp_py/problems/tag/domain/state.py
index d5297a6f..91e7d1f1 100644
--- a/pomdp_py/problems/tag/domain/state.py
+++ b/pomdp_py/problems/tag/domain/state.py
@@ -4,10 +4,11 @@
 State space: state of the robot (x,y), state of the person (x,y), person found.
 
 """
+
 import pomdp_py
 
+
 class TagState(pomdp_py.State):
-    
     def __init__(self, robot_position, target_position, target_found):
         """
         robot_position (tuple): x,y location of the robot.
@@ -25,14 +26,18 @@ def __eq__(self, other):
         if not isinstance(other, TagState):
             return False
         else:
-            return self.robot_position == other.robot_position\
-                and self.target_position == other.target_position\
+            return (
+                self.robot_position == other.robot_position
+                and self.target_position == other.target_position
                 and self.target_found == other.target_found
+            )
 
     def __str__(self):
-        return 'State(%s, %s | %s)' % (str(self.robot_position),
-                                       str(self.target_position),
-                                       str(self.target_found))
+        return "State(%s, %s | %s)" % (
+            str(self.robot_position),
+            str(self.target_position),
+            str(self.target_found),
+        )
 
     def __repr__(self):
         return str(self)
diff --git a/pomdp_py/problems/tag/env/env.py b/pomdp_py/problems/tag/env/env.py
index a66ac829..47211aff 100644
--- a/pomdp_py/problems/tag/env/env.py
+++ b/pomdp_py/problems/tag/env/env.py
@@ -7,22 +7,14 @@
 from pomdp_py.problems.multi_object_search.env.env import interpret
 from pomdp_py.problems.multi_object_search.env.visual import MosViz
 
-class TagEnvironment(pomdp_py.Environment):
 
-    def __init__(self,
-                 init_state,
-                 grid_map,
-                 pr_stay=0.2,
-                 small=1,
-                 big=10):
+class TagEnvironment(pomdp_py.Environment):
+    def __init__(self, init_state, grid_map, pr_stay=0.2, small=1, big=10):
         self._grid_map = grid_map
-        target_motion_policy = TagTargetMotionPolicy(grid_map,
-                                                     pr_stay)
+        target_motion_policy = TagTargetMotionPolicy(grid_map, pr_stay)
         transition_model = TagTransitionModel(grid_map, target_motion_policy)
         reward_model = TagRewardModel(small=small, big=big)
-        super().__init__(init_state,
-                         transition_model,
-                         reward_model)
+        super().__init__(init_state, transition_model, reward_model)
 
     @property
     def width(self):
diff --git a/pomdp_py/problems/tag/env/visual.py b/pomdp_py/problems/tag/env/visual.py
index b8ee6d05..c27c638c 100644
--- a/pomdp_py/problems/tag/env/visual.py
+++ b/pomdp_py/problems/tag/env/visual.py
@@ -1,4 +1,5 @@
 """Largely based on MosViz, except this is not an OO-POMDP"""
+
 import pygame
 import cv2
 import math
@@ -12,9 +13,9 @@
 from pomdp_py.problems.tag.example_worlds import *
 from pomdp_py.problems.tag.models.observation_model import *
 
+
 #### Visualization through pygame ####
 class TagViz:
-
     def __init__(self, env, res=30, fps=30, controllable=False, observation_model=None):
         self._env = env
 
@@ -35,22 +36,24 @@ def __init__(self, env, res=30, fps=30, controllable=False, observation_model=No
     def _make_gridworld_image(self, r):
         # Preparing 2d array
         w, l = self._env.width, self._env.length
-        arr2d = np.full((self._env.width,
-                         self._env.length), 0)  # free grids
+        arr2d = np.full((self._env.width, self._env.length), 0)  # free grids
         # Creating image
-        img = np.full((w*r,l*r,3), 255, dtype=np.int32)
+        img = np.full((w * r, l * r, 3), 255, dtype=np.int32)
         for x in range(w):
             for y in range(l):
-                if (x,y) not in self._env.grid_map.obstacle_poses:
-                    arr2d[x,y] == 0    # free
-                    cv2.rectangle(img, (y*r, x*r), (y*r+r, x*r+r),
-                                  (255, 255, 255), -1)
+                if (x, y) not in self._env.grid_map.obstacle_poses:
+                    arr2d[x, y] == 0  # free
+                    cv2.rectangle(
+                        img, (y * r, x * r), (y * r + r, x * r + r), (255, 255, 255), -1
+                    )
                 else:
-                    arr2d[x,y] == 1  # obstacle
-                    cv2.rectangle(img, (y*r, x*r), (y*r+r, x*r+r),
-                                  (40, 31, 3), -1)
-                cv2.rectangle(img, (y*r, x*r), (y*r+r, x*r+r),
-                              (0, 0, 0), 1, 8)
+                    arr2d[x, y] == 1  # obstacle
+                    cv2.rectangle(
+                        img, (y * r, x * r), (y * r + r, x * r + r), (40, 31, 3), -1
+                    )
+                cv2.rectangle(
+                    img, (y * r, x * r), (y * r + r, x * r + r), (0, 0, 0), 1, 8
+                )
         return img
 
     @property
@@ -75,22 +78,22 @@ def update(self, action, observation, belief):
         self._last_belief = belief
 
     @staticmethod
-    def draw_robot(img, x, y, th, size, color=(255,12,12)):
+    def draw_robot(img, x, y, th, size, color=(255, 12, 12)):
         radius = int(round(size / 2))
-        cv2.circle(img, (y+radius, x+radius), radius, color, thickness=6)
+        cv2.circle(img, (y + radius, x + radius), radius, color, thickness=6)
         # endpoint = (y+radius + int(round(radius*math.sin(th))),
         #             x+radius + int(round(radius*math.cos(th))))
         # cv2.line(img, (y+radius,x+radius), endpoint, color, 2)
 
     @staticmethod
-    def draw_observation(img, z, rx, ry, rth, r, size, color=(12,12,255)):
+    def draw_observation(img, z, rx, ry, rth, r, size, color=(12, 12, 255)):
         assert type(z) == TagObservation, "%s != TagObservation" % (str(type(z)))
         radius = int(round(r / 2))
         if z.target_position is not None:
             lx, ly = z.target_position
-            cv2.circle(img, (ly*r+radius,
-                             lx*r+radius), size, color, thickness=-1)
-
+            cv2.circle(
+                img, (ly * r + radius, lx * r + radius), size, color, thickness=-1
+            )
 
     # TODO! Deprecated.
     @staticmethod
@@ -107,18 +110,23 @@ def draw_belief(img, belief, r, size, target_color):
         count = 0
         for state in reversed(sorted(hist, key=hist.get)):
             if last_val != -1:
-                color = util.lighter(color, 1-hist[state]/last_val)
+                color = util.lighter(color, 1 - hist[state] / last_val)
             if np.mean(np.array(color) / np.array([255, 255, 255])) < 0.999:
                 tx, ty = state.target_position
-                if (tx,ty) not in circle_drawn:
-                    circle_drawn[(tx,ty)] = 0
-                circle_drawn[(tx,ty)] += 1
-
-                cv2.circle(img, (ty*r+radius,
-                                 tx*r+radius), size//circle_drawn[(tx,ty)], color, thickness=-1)
+                if (tx, ty) not in circle_drawn:
+                    circle_drawn[(tx, ty)] = 0
+                circle_drawn[(tx, ty)] += 1
+
+                cv2.circle(
+                    img,
+                    (ty * r + radius, tx * r + radius),
+                    size // circle_drawn[(tx, ty)],
+                    color,
+                    thickness=-1,
+                )
                 last_val = hist[state]
 
-                count +=1
+                count += 1
                 if last_val <= 0:
                     break
 
@@ -127,14 +135,14 @@ def on_init(self):
         """pygame init"""
         pygame.init()  # calls pygame.font.init()
         # init main screen and background
-        self._display_surf = pygame.display.set_mode((self.img_width,
-                                                      self.img_height),
-                                                     pygame.HWSURFACE)
+        self._display_surf = pygame.display.set_mode(
+            (self.img_width, self.img_height), pygame.HWSURFACE
+        )
         self._background = pygame.Surface(self._display_surf.get_size()).convert()
         self._clock = pygame.time.Clock()
 
         # Font
-        self._myfont = pygame.font.SysFont('Comic Sans MS', 30)
+        self._myfont = pygame.font.SysFont("Comic Sans MS", 30)
         self._running = True
 
     def on_event(self, event):
@@ -170,7 +178,12 @@ def on_event(self, event):
                 print("     action: %s" % str(action.name))
                 print("     observation: %s" % str(z))
                 print("     reward: %s" % str(reward))
-                print(" valid motions: %s" % str(self._env.grid_map.valid_motions(self._env.state.robot_position)))
+                print(
+                    " valid motions: %s"
+                    % str(
+                        self._env.grid_map.valid_motions(self._env.state.robot_position)
+                    )
+                )
                 print("------------")
                 if self._env.state.target_found:
                     self._running = False
@@ -186,10 +199,10 @@ def on_render(self):
         fps_text = "FPS: {0:.2f}".format(self._clock.get_fps())
         last_action = self._last_action
         last_action_str = "no_action" if last_action is None else str(last_action)
-        pygame.display.set_caption("%s | Robot(%.2f,%.2f,%.2f) | %s | %s" %
-                                   (last_action_str, rx, ry, 0,
-                                    str(self._env.state.target_found),
-                                    fps_text))
+        pygame.display.set_caption(
+            "%s | Robot(%.2f,%.2f,%.2f) | %s | %s"
+            % (last_action_str, rx, ry, 0, str(self._env.state.target_found), fps_text)
+        )
         pygame.display.flip()
 
     def on_cleanup(self):
@@ -199,7 +212,7 @@ def on_execute(self):
         if self.on_init() == False:
             self._running = False
 
-        while( self._running ):
+        while self._running:
             for event in pygame.event.get():
                 self.on_event(event)
             self.on_loop()
@@ -212,8 +225,9 @@ def render_env(self, display_surf):
 
         # draw target
         tx, ty = self._env.state.target_position
-        cv2.rectangle(img, (ty*r, tx*r), (ty*r+r, tx*r+r),
-                      (255, 165, 0), -1)
+        cv2.rectangle(
+            img, (ty * r, tx * r), (ty * r + r, tx * r + r), (255, 165, 0), -1
+        )
 
         # draw robot
         rx, ry = self._env.state.robot_position
@@ -222,14 +236,16 @@ def render_env(self, display_surf):
         # last_viz_observation = self._last_viz_observation.get(robot_id, None)
         # last_belief = self._last_belief.get(robot_id, None)
         if self._last_belief is not None:
-            TagViz.draw_belief(img, self._last_belief, r, r//3, self._target_color)
+            TagViz.draw_belief(img, self._last_belief, r, r // 3, self._target_color)
         if self._last_observation is not None:
-            TagViz.draw_observation(img, self._last_observation,
-                                    rx, ry, 0, r, r//8, color=(20, 20, 180))
+            TagViz.draw_observation(
+                img, self._last_observation, rx, ry, 0, r, r // 8, color=(20, 20, 180)
+            )
 
-        TagViz.draw_robot(img, rx*r, ry*r, 0, r, color=(200, 12, 150))
+        TagViz.draw_robot(img, rx * r, ry * r, 0, r, color=(200, 12, 150))
         pygame.surfarray.blit_array(display_surf, img)
 
+
 # TODO! DEPRECATED!
 def unittest():
     worldmap, robot = world0
@@ -238,5 +254,6 @@ def unittest():
     viz = TagViz(env, controllable=True, observation_model=observation_model)
     viz.on_execute()
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     unittest()
diff --git a/pomdp_py/problems/tag/example_worlds.py b/pomdp_py/problems/tag/example_worlds.py
index 3474abbf..374ad8d8 100644
--- a/pomdp_py/problems/tag/example_worlds.py
+++ b/pomdp_py/problems/tag/example_worlds.py
@@ -1,18 +1,20 @@
 world0 = (
-"""
+    """
 xxxxx...xx
 xxxxx...xx
 xxxxx...xx
-.....T.... 
+.....T....
 r.........
-""", "r"
+""",
+    "r",
 )
 
 world1 = (
-"""
+    """
 x.Tx
 ....
 r...
 x..x
-"""
-,"r")
+""",
+    "r",
+)
diff --git a/pomdp_py/problems/tag/experiment.py b/pomdp_py/problems/tag/experiment.py
index 647ab5fd..c23bf1b3 100644
--- a/pomdp_py/problems/tag/experiment.py
+++ b/pomdp_py/problems/tag/experiment.py
@@ -3,34 +3,32 @@
 from pomdp_py.problems.tag.problem import *
 import numpy as np
 
+
 def trial(worldstr, **kwargs):
     grid_map = GridMap.from_str(worldstr)
     free_cells = grid_map.free_cells()
     init_robot_position = random.sample(free_cells, 1)[0]
     init_target_position = random.sample(free_cells, 1)[0]
 
-    problem = TagProblem(init_robot_position,
-                         init_target_position,
-                         grid_map, **kwargs)
-    discounted_reward = solve(problem,
-                              max_depth=15,
-                              discount_factor=0.95,
-                              planning_time=.7,
-                              exploration_const=10,
-                              visualize=True,
-                              max_time=120,
-                              max_steps=500)
+    problem = TagProblem(init_robot_position, init_target_position, grid_map, **kwargs)
+    discounted_reward = solve(
+        problem,
+        max_depth=15,
+        discount_factor=0.95,
+        planning_time=0.7,
+        exploration_const=10,
+        visualize=True,
+        max_time=120,
+        max_steps=500,
+    )
     return discounted_reward
 
+
 def main():
     all_rewards = []
     try:
         for i in range(100):
-            dr = trial(world0[0],
-                       pr_stay=0.5,
-                       small=1,
-                       big=10,
-                       prior="uniform")
+            dr = trial(world0[0], pr_stay=0.5, small=1, big=10, prior="uniform")
             all_rewards.append(dr)
     finally:
         print("All done!")
@@ -38,5 +36,6 @@ def main():
         print("Average discounted reward: %.3f" % (np.mean(all_rewards)))
         print("Std.dev discounted reward: %.3f" % (np.std(all_rewards)))
 
+
 if __name__ == "__main__":
     main()
diff --git a/pomdp_py/problems/tag/models/__init__.py b/pomdp_py/problems/tag/models/__init__.py
index 8b137891..e69de29b 100644
--- a/pomdp_py/problems/tag/models/__init__.py
+++ b/pomdp_py/problems/tag/models/__init__.py
@@ -1 +0,0 @@
-
diff --git a/pomdp_py/problems/tag/models/components/__init__.py b/pomdp_py/problems/tag/models/components/__init__.py
index 8b137891..e69de29b 100644
--- a/pomdp_py/problems/tag/models/components/__init__.py
+++ b/pomdp_py/problems/tag/models/components/__init__.py
@@ -1 +0,0 @@
-
diff --git a/pomdp_py/problems/tag/models/components/grid_map.py b/pomdp_py/problems/tag/models/components/grid_map.py
index 462c481a..8b057efa 100644
--- a/pomdp_py/problems/tag/models/components/grid_map.py
+++ b/pomdp_py/problems/tag/models/components/grid_map.py
@@ -2,8 +2,8 @@
 from pomdp_py.problems.tag.models.transition_model import TagTransitionModel
 from pomdp_py.problems.multi_object_search.env.env import interpret
 
-class GridMap:
 
+class GridMap:
     def __init__(self, width, length, obstacle_poses):
         self.width = width
         self.length = length
@@ -11,15 +11,18 @@ def __init__(self, width, length, obstacle_poses):
         self.obstacle_poses = obstacle_poses
 
     def valid_pose(self, position):
-        if not (position[0] >= 0 and position[0] < self.width\
-                and position[1] >= 0 and position[1] < self.length):
+        if not (
+            position[0] >= 0
+            and position[0] < self.width
+            and position[1] >= 0
+            and position[1] < self.length
+        ):
             return False
         if position in self.obstacle_poses:
             return False
         return True
 
-    def valid_motions(self, position,
-                      all_motions=MOTION_ACTIONS):
+    def valid_motions(self, position, all_motions=MOTION_ACTIONS):
         valid_motions = set({})
         for motion_action in all_motions:
             if TagTransitionModel.if_move_by(self, position, motion_action) == position:
@@ -38,8 +41,12 @@ def from_str(cls, worldstr, **kwargs):
         return grid_map
 
     def free_cells(self):
-        cells = set({(x,y)
-                     for x in range(self.width)
-                     for y in range(self.length)
-                     if (x,y) not in self.obstacle_poses})
+        cells = set(
+            {
+                (x, y)
+                for x in range(self.width)
+                for y in range(self.length)
+                if (x, y) not in self.obstacle_poses
+            }
+        )
         return cells
diff --git a/pomdp_py/problems/tag/models/components/motion_policy.py b/pomdp_py/problems/tag/models/components/motion_policy.py
index 8900e423..bc20641f 100644
--- a/pomdp_py/problems/tag/models/components/motion_policy.py
+++ b/pomdp_py/problems/tag/models/components/motion_policy.py
@@ -6,43 +6,47 @@
 
 
 class TagTargetMotionPolicy(pomdp_py.GenerativeDistribution):
-    def __init__(self,
-                 grid_map,
-                 pr_stay=0.2):  # With 1.0 - pr_stay chance, the target moves away
+    def __init__(
+        self, grid_map, pr_stay=0.2
+    ):  # With 1.0 - pr_stay chance, the target moves away
         self._grid_map = grid_map
         self._pr_stay = pr_stay
 
-    def _compute_candidate_actions(self,
-                                   robot_position,
-                                   target_position,
-                                   valid_target_motion_actions):
+    def _compute_candidate_actions(
+        self, robot_position, target_position, valid_target_motion_actions
+    ):
         candidate_actions = set({})
         cur_dist = euclidean_dist(robot_position, target_position)
         for action in valid_target_motion_actions:
-            next_target_position = TagTransitionModel.if_move_by(self._grid_map,
-                                                                 target_position,
-                                                                 action)
+            next_target_position = TagTransitionModel.if_move_by(
+                self._grid_map, target_position, action
+            )
             next_dist = euclidean_dist(robot_position, next_target_position)
             if next_dist > cur_dist:
                 candidate_actions.add(action)
         return candidate_actions
 
-
-    def probability(self, next_target_position,
-                    target_position, robot_position,
-                    valid_target_motion_actions):
+    def probability(
+        self,
+        next_target_position,
+        target_position,
+        robot_position,
+        valid_target_motion_actions,
+    ):
         # If it is impossible to go from target position to the next,
         # then it is a zero probability event.
         diff_x = abs(next_target_position[0] - target_position[0])
         diff_y = abs(next_target_position[1] - target_position[1])
-        if not ((diff_x == 1 and diff_y == 0)
-                or (diff_x == 0 and diff_y == 1)
-                or (diff_x == 0 and diff_y == 0)):
+        if not (
+            (diff_x == 1 and diff_y == 0)
+            or (diff_x == 0 and diff_y == 1)
+            or (diff_x == 0 and diff_y == 0)
+        ):
             return constants.EPSILON
 
-        candidate_actions = self._compute_candidate_actions(robot_position,
-                                                            target_position,
-                                                            valid_target_motion_actions)
+        candidate_actions = self._compute_candidate_actions(
+            robot_position, target_position, valid_target_motion_actions
+        )
         if len(candidate_actions) == 0:
             # No action possible, yet next_target_position is a valid
             # transition from current.
@@ -61,30 +65,33 @@ def probability(self, next_target_position,
             else:
                 # The object has taken an adversarial action.
                 for action in candidate_actions:
-                    if (target_position[0] + action.motion[0],
-                        target_position[1] + action.motion[1]) == next_target_position:
+                    if (
+                        target_position[0] + action.motion[0],
+                        target_position[1] + action.motion[1],
+                    ) == next_target_position:
                         return (1.0 - self._pr_stay) / len(candidate_actions)
                     return constants.EPSILON
 
-    def random(self, robot_position, target_position, valid_target_motion_actions,
-               mpe=False):
-        if mpe or random.uniform(0,1) > self._pr_stay:
+    def random(
+        self, robot_position, target_position, valid_target_motion_actions, mpe=False
+    ):
+        if mpe or random.uniform(0, 1) > self._pr_stay:
             # Move away; Pick motion actions that makes the target moves away from the robot
-            candidate_actions = self._compute_candidate_actions(robot_position,
-                                                                target_position,
-                                                                valid_target_motion_actions)
+            candidate_actions = self._compute_candidate_actions(
+                robot_position, target_position, valid_target_motion_actions
+            )
             if len(candidate_actions) == 0:
                 return target_position
 
             chosen_action = random.sample(candidate_actions, 1)[0]
-            return TagTransitionModel.if_move_by(self._grid_map,
-                                                 target_position,
-                                                 chosen_action)
+            return TagTransitionModel.if_move_by(
+                self._grid_map, target_position, chosen_action
+            )
         else:
             # stay
             return target_position
 
     def mpe(self, robot_position, target_position, valid_target_motion_actions):
-        return self.random(robot_position, target_position,
-                           valid_target_motion_actions,
-                           mpe=True)
+        return self.random(
+            robot_position, target_position, valid_target_motion_actions, mpe=True
+        )
diff --git a/pomdp_py/problems/tag/models/observation_model.py b/pomdp_py/problems/tag/models/observation_model.py
index eda84bb6..7f5201a5 100644
--- a/pomdp_py/problems/tag/models/observation_model.py
+++ b/pomdp_py/problems/tag/models/observation_model.py
@@ -2,6 +2,7 @@
 from pomdp_py.problems.tag.domain.observation import *
 import pomdp_py.problems.tag.constants as constants
 
+
 class TagObservationModel(pomdp_py.ObservationModel):
     """In this observation model, the robot deterministically
     observes the target location when it is in the same grid cell
diff --git a/pomdp_py/problems/tag/models/policy_model.py b/pomdp_py/problems/tag/models/policy_model.py
index fcc89a61..9bc7188e 100644
--- a/pomdp_py/problems/tag/models/policy_model.py
+++ b/pomdp_py/problems/tag/models/policy_model.py
@@ -3,6 +3,7 @@
 from pomdp_py.problems.tag.domain.action import *
 from pomdp_py.problems.tag.models.transition_model import *
 
+
 class TagPolicyModel(pomdp_py.RolloutPolicy):
     def __init__(self, grid_map=None):
         self._grid_map = grid_map
@@ -13,8 +14,9 @@ def sample(self, state, **kwargs):
     def get_all_actions(self, state=None, history=None):
         if state is not None:
             if self._grid_map is not None:
-                valid_motions = self._grid_map.valid_motions(state.robot_position,
-                                                             all_motions=MOTION_ACTIONS)
+                valid_motions = self._grid_map.valid_motions(
+                    state.robot_position, all_motions=MOTION_ACTIONS
+                )
                 return valid_motions | set({TagAction()})
         return MOTION_ACTIONS | set({TagAction()})
 
diff --git a/pomdp_py/problems/tag/models/reward_model.py b/pomdp_py/problems/tag/models/reward_model.py
index 36499fd9..ce84fbe0 100644
--- a/pomdp_py/problems/tag/models/reward_model.py
+++ b/pomdp_py/problems/tag/models/reward_model.py
@@ -1,13 +1,15 @@
 import pomdp_py
 from pomdp_py.problems.tag.domain.action import *
 
-class TagRewardModel(pomdp_py.RewardModel):
 
+class TagRewardModel(pomdp_py.RewardModel):
     def __init__(self, small=1, big=10):
         self.small = small
         self.big = big
 
-    def probability(self, reward, state, action, next_state, normalized=False, **kwargs):
+    def probability(
+        self, reward, state, action, next_state, normalized=False, **kwargs
+    ):
         if reward == self._reward_func(state, action):
             return 1.0
         else:
diff --git a/pomdp_py/problems/tag/models/transition_model.py b/pomdp_py/problems/tag/models/transition_model.py
index 4a524f75..cb77f226 100644
--- a/pomdp_py/problems/tag/models/transition_model.py
+++ b/pomdp_py/problems/tag/models/transition_model.py
@@ -6,16 +6,15 @@
     and with Pr=0.2, the target stays at the same place. The target never
     moves closer to the robot.
 """
+
 import copy
 import pomdp_py
 import pomdp_py.problems.tag.constants as constants
 from pomdp_py.problems.tag.domain.action import *
 
-class TagTransitionModel(pomdp_py.TransitionModel):
 
-    def __init__(self,
-                 grid_map,
-                 target_motion_policy):
+class TagTransitionModel(pomdp_py.TransitionModel):
+    def __init__(self, grid_map, target_motion_policy):
         self._grid_map = grid_map
         self.target_motion_policy = target_motion_policy
 
@@ -23,17 +22,16 @@ def __init__(self,
     def if_move_by(cls, grid_map, position, action):
         if isinstance(action, MotionAction):
             dx, dy = action.motion
-            next_position = (position[0] + dx,
-                             position[1] + dy)
+            next_position = (position[0] + dx, position[1] + dy)
             if grid_map.valid_pose(next_position):
                 return next_position
         return position
 
     def probability(self, next_state, state, action, **kwargs):
         # Robot motion
-        expected_robot_position = TagTransitionModel.if_move_by(self._grid_map,
-                                                                state.robot_position,
-                                                                action)
+        expected_robot_position = TagTransitionModel.if_move_by(
+            self._grid_map, state.robot_position, action
+        )
         if expected_robot_position != next_state.robot_position:
             return constants.EPSILON
 
@@ -50,18 +48,22 @@ def probability(self, next_state, state, action, **kwargs):
                     return 1.0 - constants.EPSILON
 
         # Target motion
-        valid_target_motion_actions = self._grid_map.valid_motions(state.target_position)
-        return self.target_motion_policy.probability(next_state.target_position,
-                                                     state.target_position,
-                                                     state.robot_position,
-                                                     valid_target_motion_actions)
+        valid_target_motion_actions = self._grid_map.valid_motions(
+            state.target_position
+        )
+        return self.target_motion_policy.probability(
+            next_state.target_position,
+            state.target_position,
+            state.robot_position,
+            valid_target_motion_actions,
+        )
 
     def sample(self, state, action, argmax=False):
         # Robot motion
         next_state = copy.deepcopy(state)
-        next_state.robot_position = TagTransitionModel.if_move_by(self._grid_map,
-                                                                  state.robot_position,
-                                                                  action)
+        next_state.robot_position = TagTransitionModel.if_move_by(
+            self._grid_map, state.robot_position, action
+        )
 
         # If Tag action
         if isinstance(action, TagAction):
@@ -71,15 +73,17 @@ def sample(self, state, action, argmax=False):
             return next_state
 
         # Target motion
-        valid_target_motion_actions = self._grid_map.valid_motions(state.target_position)
+        valid_target_motion_actions = self._grid_map.valid_motions(
+            state.target_position
+        )
         if not argmax:
-            next_state.target_position = self.target_motion_policy.random(state.robot_position,
-                                                                          state.target_position,
-                                                                          valid_target_motion_actions)
+            next_state.target_position = self.target_motion_policy.random(
+                state.robot_position, state.target_position, valid_target_motion_actions
+            )
         else:
-            next_state.target_position = self.target_motion_policy.mpe(state.robot_position,
-                                                                       state.target_position,
-                                                                       valid_target_motion_actions)
+            next_state.target_position = self.target_motion_policy.mpe(
+                state.robot_position, state.target_position, valid_target_motion_actions
+            )
         return next_state
 
     def argmax(self, state, action, **kwargs):
diff --git a/pomdp_py/problems/tag/problem.py b/pomdp_py/problems/tag/problem.py
index d5a9df14..a158af64 100644
--- a/pomdp_py/problems/tag/problem.py
+++ b/pomdp_py/problems/tag/problem.py
@@ -6,26 +6,22 @@
 from pomdp_py.problems.tag.example_worlds import *
 import time
 
-class TagProblem(pomdp_py.POMDP):
 
-    def __init__(self,
-                 init_robot_position,
-                 init_target_position,
-                 grid_map,
-                 pr_stay=0.2,
-                 small=1,
-                 big=10,
-                 prior="uniform",
-                 belief_type="hist",
-                 num_particles=6):
-        init_state = TagState(init_robot_position,
-                              init_target_position,
-                              False)
-        env = TagEnvironment(init_state,
-                             grid_map,
-                             pr_stay=pr_stay,
-                             small=1,
-                             big=10)
+class TagProblem(pomdp_py.POMDP):
+    def __init__(
+        self,
+        init_robot_position,
+        init_target_position,
+        grid_map,
+        pr_stay=0.2,
+        small=1,
+        big=10,
+        prior="uniform",
+        belief_type="hist",
+        num_particles=6,
+    ):
+        init_state = TagState(init_robot_position, init_target_position, False)
+        env = TagEnvironment(init_state, grid_map, pr_stay=pr_stay, small=1, big=10)
         if prior == "uniform":
             prior = {}
         elif prior == "informed":
@@ -34,47 +30,47 @@ def __init__(self,
             raise ValueError("Unrecognized prior type: %s" % prior)
 
         if belief_type == "particles":
-            init_belief = initialize_particles_belief(grid_map, init_robot_position,
-                                                      prior=prior, num_particles=num_particles)
+            init_belief = initialize_particles_belief(
+                grid_map, init_robot_position, prior=prior, num_particles=num_particles
+            )
         else:
             init_belief = initialize_belief(grid_map, init_robot_position, prior=prior)
-        agent = TagAgent(init_belief,
-                         grid_map,
-                         pr_stay=pr_stay,
-                         small=1,
-                         big=10)
-        super().__init__(agent, env,
-                         name="TagProblem")
-
-
-def solve(problem,
-          planner_type="pouct",
-          max_depth=10,  # planning horizon
-          discount_factor=0.99,
-          planning_time=1.,       # amount of time (s) to plan each step
-          exploration_const=1000, # exploration constant
-          visualize=True,
-          max_time=120,  # maximum amount of time allowed to solve the problem
-          max_steps=500):  # maximum number of planning steps the agent can take.
+        agent = TagAgent(init_belief, grid_map, pr_stay=pr_stay, small=1, big=10)
+        super().__init__(agent, env, name="TagProblem")
+
+
+def solve(
+    problem,
+    planner_type="pouct",
+    max_depth=10,  # planning horizon
+    discount_factor=0.99,
+    planning_time=1.0,  # amount of time (s) to plan each step
+    exploration_const=1000,  # exploration constant
+    visualize=True,
+    max_time=120,  # maximum amount of time allowed to solve the problem
+    max_steps=500,
+):  # maximum number of planning steps the agent can take.
     if planner_type == "pouct":
-        planner = pomdp_py.POUCT(max_depth=max_depth,
-                                 discount_factor=discount_factor,
-                                 planning_time=planning_time,
-                                 exploration_const=exploration_const,
-                                 rollout_policy=problem.agent.policy_model)
+        planner = pomdp_py.POUCT(
+            max_depth=max_depth,
+            discount_factor=discount_factor,
+            planning_time=planning_time,
+            exploration_const=exploration_const,
+            rollout_policy=problem.agent.policy_model,
+        )
     else:
-        planner = pomdp_py.POMCP(max_depth=max_depth,
-                                 discount_factor=discount_factor,
-                                 planning_time=planning_time,
-                                 exploration_const=exploration_const,
-                                 rollout_policy=problem.agent.policy_model)
+        planner = pomdp_py.POMCP(
+            max_depth=max_depth,
+            discount_factor=discount_factor,
+            planning_time=planning_time,
+            exploration_const=exploration_const,
+            rollout_policy=problem.agent.policy_model,
+        )
     if visualize:
         viz = TagViz(problem.env, controllable=False)
         if viz.on_init() == False:
             raise Exception("Environment failed to initialize")
-        viz.update(None,
-                   None,
-                   problem.agent.cur_belief)
+        viz.update(None, None, problem.agent.cur_belief)
         viz.on_render()
 
     _discount = 1.0
@@ -95,8 +91,9 @@ def solve(problem,
 
         # Receive observation
         _start = time.time()
-        real_observation = \
-            problem.env.provide_observation(problem.agent.observation_model, real_action)
+        real_observation = problem.env.provide_observation(
+            problem.agent.observation_model, real_action
+        )
 
         # Updates
         problem.agent.clear_history()  # truncate history
@@ -110,20 +107,18 @@ def solve(problem,
         _total_reward += reward
         _total_discounted_reward += reward * _discount
         _discount = _discount * discount_factor
-        print("==== Step %d ====" % (i+1))
+        print("==== Step %d ====" % (i + 1))
         print("Action: %s" % str(real_action))
         print("Observation: %s" % str(real_observation))
         print("Reward: %s" % str(reward))
         print("Reward (Cumulative): %s" % str(_total_reward))
         print("Reward (Discounted): %s" % str(_total_discounted_reward))
-        print("Find Actions Count: %d" %  _find_actions_count)
+        print("Find Actions Count: %d" % _find_actions_count)
         if isinstance(planner, pomdp_py.POUCT):
             print("__num_sims__: %d" % planner.last_num_sims)
 
         if visualize:
-            viz.update(real_action,
-                       real_observation,
-                       problem.agent.cur_belief)
+            viz.update(real_action, real_observation, problem.agent.cur_belief)
             viz.on_loop()
             viz.on_render()
 
@@ -134,7 +129,7 @@ def solve(problem,
         if _time_used > max_time:
             print("Maximum time reached.")
             break
-        if _discount*10 < 1e-4:
+        if _discount * 10 < 1e-4:
             print("Discount factor already too small")
             break
 
@@ -148,23 +143,27 @@ def main():
     init_robot_position = random.sample(free_cells, 1)[0]
     init_target_position = random.sample(free_cells, 1)[0]
 
-    problem = TagProblem(init_robot_position,
-                         init_target_position,
-                         grid_map,
-                         pr_stay=0.2,
-                         small=1,
-                         big=10,
-                         prior="uniform",
-                         belief_type="histogram")
-    solve(problem,
-          max_depth=15,
-          discount_factor=0.95,
-          planning_time=0.8,
-          exploration_const=20,
-          visualize=True,
-          max_time=360,
-          max_steps=251,
-          planner_type="pouct")
+    problem = TagProblem(
+        init_robot_position,
+        init_target_position,
+        grid_map,
+        pr_stay=0.2,
+        small=1,
+        big=10,
+        prior="uniform",
+        belief_type="histogram",
+    )
+    solve(
+        problem,
+        max_depth=15,
+        discount_factor=0.95,
+        planning_time=0.8,
+        exploration_const=20,
+        visualize=True,
+        max_time=360,
+        max_steps=251,
+        planner_type="pouct",
+    )
 
 
 if __name__ == "__main__":
diff --git a/pomdp_py/problems/tiger/tiger_problem.py b/pomdp_py/problems/tiger/tiger_problem.py
index 039f8846..67a378ba 100644
--- a/pomdp_py/problems/tiger/tiger_problem.py
+++ b/pomdp_py/problems/tiger/tiger_problem.py
@@ -41,53 +41,70 @@
 import sys
 import copy
 
+
 class TigerState(pomdp_py.State):
     def __init__(self, name):
         self.name = name
+
     def __hash__(self):
         return hash(self.name)
+
     def __eq__(self, other):
         if isinstance(other, TigerState):
             return self.name == other.name
         return False
+
     def __str__(self):
         return self.name
+
     def __repr__(self):
         return "TigerState(%s)" % self.name
+
     def other(self):
         if self.name.endswith("left"):
             return TigerState("tiger-right")
         else:
             return TigerState("tiger-left")
 
+
 class TigerAction(pomdp_py.Action):
     def __init__(self, name):
         self.name = name
+
     def __hash__(self):
         return hash(self.name)
+
     def __eq__(self, other):
         if isinstance(other, TigerAction):
             return self.name == other.name
         return False
+
     def __str__(self):
         return self.name
+
     def __repr__(self):
         return "TigerAction(%s)" % self.name
 
+
 class TigerObservation(pomdp_py.Observation):
     def __init__(self, name):
         self.name = name
+
     def __hash__(self):
         return hash(self.name)
+
     def __eq__(self, other):
         if isinstance(other, TigerObservation):
             return self.name == other.name
         return False
+
     def __str__(self):
         return self.name
+
     def __repr__(self):
         return "TigerObservation(%s)" % self.name
 
+
 # Observation model
 class ObservationModel(pomdp_py.ObservationModel):
     def __init__(self, noise=0.15):
@@ -109,7 +126,7 @@ def sample(self, next_state, action):
         else:
             thresh = 0.5
 
-        if random.uniform(0,1) < thresh:
+        if random.uniform(0, 1) < thresh:
             return TigerObservation(next_state.name)
         else:
             return TigerObservation(next_state.other().name)
@@ -118,8 +135,8 @@ def get_all_observations(self):
         """Only need to implement this if you're using
         a solver that needs to enumerate over the observation space
         (e.g. value iteration)"""
-        return [TigerObservation(s)
-                for s in {"tiger-left", "tiger-right"}]
+        return [TigerObservation(s) for s in {"tiger-left", "tiger-right"}]
+
 
 # Transition Model
 class TransitionModel(pomdp_py.TransitionModel):
@@ -142,9 +159,11 @@ def sample(self, state, action):
 
     def get_all_states(self):
         """Only need to implement this if you're using
-        a solver that needs to enumerate over the observation space (e.g. value iteration)"""
+        a solver that needs to enumerate over the observation space (e.g. value iteration)
+        """
         return [TigerState(s) for s in {"tiger-left", "tiger-right"}]
 
+
 # Reward Model
 class RewardModel(pomdp_py.RewardModel):
     def _reward_func(self, state, action):
@@ -158,19 +177,20 @@ def _reward_func(self, state, action):
                 return 10
             else:
                 return -100
-        else: # listen
+        else:  # listen
             return -1
 
     def sample(self, state, action, next_state):
         # deterministic
         return self._reward_func(state, action)
 
+
 # Policy Model
 class PolicyModel(pomdp_py.RolloutPolicy):
     """A simple policy model with uniform prior over a
-       small, finite action space"""
-    ACTIONS = [TigerAction(s)
-              for s in {"open-left", "open-right", "listen"}]
+    small, finite action space"""
+
+    ACTIONS = [TigerAction(s) for s in {"open-left", "open-right", "listen"}]
 
     def sample(self, state):
         return random.sample(self.get_all_actions(), 1)[0]
@@ -192,14 +212,14 @@ class TigerProblem(pomdp_py.POMDP):
 
     def __init__(self, obs_noise, init_true_state, init_belief):
         """init_belief is a Distribution."""
-        agent = pomdp_py.Agent(init_belief,
-                               PolicyModel(),
-                               TransitionModel(),
-                               ObservationModel(obs_noise),
-                               RewardModel())
-        env = pomdp_py.Environment(init_true_state,
-                                   TransitionModel(),
-                                   RewardModel())
+        agent = pomdp_py.Agent(
+            init_belief,
+            PolicyModel(),
+            TransitionModel(),
+            ObservationModel(obs_noise),
+            RewardModel(),
+        )
+        env = pomdp_py.Environment(init_true_state, TransitionModel(), RewardModel())
         super().__init__(agent, env, name="TigerProblem")
 
     @staticmethod
@@ -214,20 +234,15 @@ def create(state="tiger-left", belief=0.5, obs_noise=0.15):
                                model (default 0.15)
         """
         init_true_state = TigerState(state)
-        init_belief = pomdp_py.Histogram({
-            TigerState("tiger-left"): belief,
-            TigerState("tiger-right"): 1.0 - belief
-        })
-        tiger_problem = TigerProblem(obs_noise,
-                                     init_true_state, init_belief)
+        init_belief = pomdp_py.Histogram(
+            {TigerState("tiger-left"): belief, TigerState("tiger-right"): 1.0 - belief}
+        )
+        tiger_problem = TigerProblem(obs_noise, init_true_state, init_belief)
         tiger_problem.agent.set_belief(init_belief, prior=True)
         return tiger_problem
 
 
-
-
-def test_planner(tiger_problem, planner, nsteps=3,
-                 debug_tree=False):
+def test_planner(tiger_problem, planner, nsteps=3, debug_tree=False):
     """
     Runs the action-feedback loop of Tiger problem POMDP
 
@@ -242,10 +257,8 @@ def test_planner(tiger_problem, planner, nsteps=3,
         action = planner.plan(tiger_problem.agent)
         if debug_tree:
             from pomdp_py.utils import TreeDebugger
-            dd = TreeDebugger(tiger_problem.agent.tree)
-            import pdb; pdb.set_trace()
 
-        print("==== Step %d ====" % (i+1))
+        print("==== Step %d ====" % (i + 1))
         print(f"True state: {tiger_problem.env.state}")
         print(f"Belief: {tiger_problem.agent.cur_belief}")
         print(f"Action: {action}")
@@ -260,7 +273,9 @@ def test_planner(tiger_problem, planner, nsteps=3,
         # in real world); In that case, you could skip
         # the state transition and re-estimate the state
         # (e.g. through the perception stack on the robot).
-        reward = tiger_problem.env.reward_model.sample(tiger_problem.env.state, action, None)
+        reward = tiger_problem.env.reward_model.sample(
+            tiger_problem.env.state, action, None
+        )
         print("Reward:", reward)
 
         # Let's create some simulated real observation;
@@ -274,7 +289,7 @@ def test_planner(tiger_problem, planner, nsteps=3,
         # reading). Note that tiger_problem.env.state stores the
         # environment state after action execution.
         real_observation = TigerObservation(tiger_problem.env.state.name)
-        print(">> Observation:",  real_observation)
+        print(">> Observation:", real_observation)
         tiger_problem.agent.update_history(action, real_observation)
 
         # Update the belief. If the planner is POMCP, planner.update
@@ -284,13 +299,14 @@ def test_planner(tiger_problem, planner, nsteps=3,
             print("Num sims:", planner.last_num_sims)
             print("Plan time: %.5f" % planner.last_planning_time)
 
-        if isinstance(tiger_problem.agent.cur_belief,
-                      pomdp_py.Histogram):
+        if isinstance(tiger_problem.agent.cur_belief, pomdp_py.Histogram):
             new_belief = pomdp_py.update_histogram_belief(
                 tiger_problem.agent.cur_belief,
-                action, real_observation,
+                action,
+                real_observation,
                 tiger_problem.agent.observation_model,
-                tiger_problem.agent.transition_model)
+                tiger_problem.agent.transition_model,
+            )
             tiger_problem.agent.set_belief(new_belief)
 
         if action.name.startswith("open"):
@@ -298,20 +314,28 @@ def test_planner(tiger_problem, planner, nsteps=3,
             # until every time door is opened.
             print("\n")
 
+
 def make_tiger(noise=0.15, init_state="tiger-left", init_belief=[0.5, 0.5]):
     """Convenient function to quickly build a tiger domain.
     Useful for testing"""
-    tiger = TigerProblem(noise, TigerState(init_state),
-                         pomdp_py.Histogram({TigerState("tiger-left"): init_belief[0],
-                                             TigerState("tiger-right"): init_belief[1]}))
+    tiger = TigerProblem(
+        noise,
+        TigerState(init_state),
+        pomdp_py.Histogram(
+            {
+                TigerState("tiger-left"): init_belief[0],
+                TigerState("tiger-right"): init_belief[1],
+            }
+        ),
+    )
     return tiger
 
 
 def main():
-    init_true_state = random.choice(["tiger-left",
-                                     "tiger-right"])
-    init_belief = pomdp_py.Histogram({TigerState("tiger-left"): 0.5,
-                                      TigerState("tiger-right"): 0.5})
+    init_true_state = random.choice(["tiger-left", "tiger-right"])
+    init_belief = pomdp_py.Histogram(
+        {TigerState("tiger-left"): 0.5, TigerState("tiger-right"): 0.5}
+    )
     tiger = make_tiger(init_state=init_true_state)
     init_belief = tiger.agent.belief
 
@@ -320,10 +344,14 @@ def main():
     test_planner(tiger, vi, nsteps=3)
 
     print("\n** Testing POUCT **")
-    pouct = pomdp_py.POUCT(max_depth=3, discount_factor=0.95,
-                           num_sims=4096, exploration_const=50,
-                           rollout_policy=tiger.agent.policy_model,
-                           show_progress=True)
+    pouct = pomdp_py.POUCT(
+        max_depth=3,
+        discount_factor=0.95,
+        num_sims=4096,
+        exploration_const=50,
+        rollout_policy=tiger.agent.policy_model,
+        show_progress=True,
+    )
     test_planner(tiger, pouct, nsteps=10)
     TreeDebugger(tiger.agent.tree).pp
 
@@ -332,13 +360,21 @@ def main():
     tiger.agent.tree = None
 
     print("** Testing POMCP **")
-    tiger.agent.set_belief(pomdp_py.Particles.from_histogram(init_belief, num_particles=100), prior=True)
-    pomcp = pomdp_py.POMCP(max_depth=3, discount_factor=0.95,
-                           num_sims=1000, exploration_const=50,
-                           rollout_policy=tiger.agent.policy_model,
-                           show_progress=True, pbar_update_interval=500)
+    tiger.agent.set_belief(
+        pomdp_py.Particles.from_histogram(init_belief, num_particles=100), prior=True
+    )
+    pomcp = pomdp_py.POMCP(
+        max_depth=3,
+        discount_factor=0.95,
+        num_sims=1000,
+        exploration_const=50,
+        rollout_policy=tiger.agent.policy_model,
+        show_progress=True,
+        pbar_update_interval=500,
+    )
     test_planner(tiger, pomcp, nsteps=10)
     TreeDebugger(tiger.agent.tree).pp
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/pomdp_py/representations/belief/histogram.py b/pomdp_py/representations/belief/histogram.py
index 8caaf5a1..899416c2 100644
--- a/pomdp_py/representations/belief/histogram.py
+++ b/pomdp_py/representations/belief/histogram.py
@@ -2,7 +2,7 @@
 
 
 def abstraction_over_histogram(current_histogram, state_mapper):
-    state_mappings = {s:state_mapper(s) for s in current_histogram}
+    state_mappings = {s: state_mapper(s) for s in current_histogram}
     hist = {}
     for s in current_histogram:
         a_s = state_mapper(s)
@@ -11,11 +11,19 @@ def abstraction_over_histogram(current_histogram, state_mapper):
         hist[a_s] += current_histogram[s]
     return hist
 
-def update_histogram_belief(current_histogram, 
-                            real_action, real_observation,
-                            observation_model, transition_model, oargs={},
-                            targs={}, normalize=True, static_transition=False,
-                            next_state_space=None):
+
+def update_histogram_belief(
+    current_histogram,
+    real_action,
+    real_observation,
+    observation_model,
+    transition_model,
+    oargs={},
+    targs={},
+    normalize=True,
+    static_transition=False,
+    next_state_space=None,
+):
     """
     update_histogram_belief(current_histogram, real_action, real_observation,
                             observation_model, transition_model, oargs={},
@@ -54,20 +62,21 @@ def update_histogram_belief(current_histogram,
     if next_state_space is None:
         next_state_space = current_histogram
     for next_state in next_state_space:
-        observation_prob = observation_model.probability(real_observation,
-                                                         next_state,
-                                                         real_action,
-                                                         **oargs)
+        observation_prob = observation_model.probability(
+            real_observation, next_state, real_action, **oargs
+        )
         if not static_transition:
             transition_prob = 0
             for state in current_histogram:
-                transition_prob += transition_model.probability(next_state,
-                                                                state,
-                                                                real_action,
-                                                                **targs) * current_histogram[state]
+                transition_prob += (
+                    transition_model.probability(
+                        next_state, state, real_action, **targs
+                    )
+                    * current_histogram[state]
+                )
         else:
             transition_prob = current_histogram[next_state]
-            
+
         new_histogram[next_state] = observation_prob * transition_prob
         total_prob += new_histogram[next_state]
 
diff --git a/pomdp_py/representations/belief/particles.pyx b/pomdp_py/representations/belief/particles.pyx
index 7f71d46c..ecb066c6 100644
--- a/pomdp_py/representations/belief/particles.pyx
+++ b/pomdp_py/representations/belief/particles.pyx
@@ -30,9 +30,9 @@ cpdef particle_reinvigoration(Particles particles,
 
     if len(new_particles) > num_particles:
         return new_particles
-    
+
     print("Particle reinvigoration for %d particles" % (num_particles - len(new_particles)))
-    cdef State next_state    
+    cdef State next_state
     while len(new_particles) < num_particles:
         # need to make a copy otherwise the transform affects states in 'particles'
         next_state = copy.deepcopy(particles.random())
@@ -85,4 +85,3 @@ cpdef update_particles_belief(Particles current_particles,
     # Particle reinvigoration
     return particle_reinvigoration(Particles(filtered_particles), len(current_particles.particles),
                                    state_transform_func=state_transform_func)
-    
diff --git a/pomdp_py/representations/distribution/gaussian.pyx b/pomdp_py/representations/distribution/gaussian.pyx
index e1909d55..8fd18c0d 100644
--- a/pomdp_py/representations/distribution/gaussian.pyx
+++ b/pomdp_py/representations/distribution/gaussian.pyx
@@ -10,7 +10,7 @@ if scipy_spec is not None:
 else:
     raise ImportError("scipy not found."\
                       "Requires scipy.stats.multivariate_normal to use Gaussian")
-    
+
 
 cdef class Gaussian(GenerativeDistribution):
 
@@ -46,7 +46,7 @@ cdef class Gaussian(GenerativeDistribution):
     def __getitem__(self, value):
         """__getitem__(self, value)
         Evaluate the probability of given value
-        
+
         Args:
             value (list or array like)
         Returns:
@@ -54,8 +54,8 @@ cdef class Gaussian(GenerativeDistribution):
         return multivariate_normal.pdf(np.array(value),
                                        np.array(self._mean),
                                        np.array(self._cov))
-        
-            
+
+
     def __setitem__(self, value, prob):
         """__setitem__(self, value, prob)
         Not Implemented;
diff --git a/pomdp_py/utils/__init__.py b/pomdp_py/utils/__init__.py
index 59d457f8..82e106de 100644
--- a/pomdp_py/utils/__init__.py
+++ b/pomdp_py/utils/__init__.py
@@ -1,10 +1,23 @@
 # Assorted convenient functions
-from pomdp_py.utils.math import (vec, proj,
-                                 R_x, R_y, R_z, R_between, T,
-                                 to_radians, approx_equal)
-from pomdp_py.utils.misc import (remap, json_safe, safe_slice, similar, special_char)
-from pomdp_py.utils.colors import (lighter, rgb_to_hex, hex_to_rgb,
-                                   inverse_color_rgb, inverse_color_hex,
-                                   random_unique_color)
+from pomdp_py.utils.math import (
+    vec,
+    proj,
+    R_x,
+    R_y,
+    R_z,
+    R_between,
+    T,
+    to_radians,
+    approx_equal,
+)
+from pomdp_py.utils.misc import remap, json_safe, safe_slice, similar, special_char
+from pomdp_py.utils.colors import (
+    lighter,
+    rgb_to_hex,
+    hex_to_rgb,
+    inverse_color_rgb,
+    inverse_color_hex,
+    random_unique_color,
+)
 from pomdp_py.utils import typ
 from pomdp_py.utils.debugging import TreeDebugger
diff --git a/pomdp_py/utils/colors.py b/pomdp_py/utils/colors.py
index 54829c15..ef47d450 100644
--- a/pomdp_py/utils/colors.py
+++ b/pomdp_py/utils/colors.py
@@ -3,36 +3,42 @@
 import numpy as np
 import random
 
+
 # colors
 def lighter(color, percent):
-    '''assumes color is rgb between (0, 0, 0) and (255, 255, 255)'''
+    """assumes color is rgb between (0, 0, 0) and (255, 255, 255)"""
     color = np.array(color)
     white = np.array([255, 255, 255])
-    vector = white-color
+    vector = white - color
     return color + vector * percent
 
+
 def rgb_to_hex(rgb):
-    r,g,b = rgb
-    return '#%02x%02x%02x' % (int(r), int(g), int(b))
+    r, g, b = rgb
+    return "#%02x%02x%02x" % (int(r), int(g), int(b))
+
 
 def hex_to_rgb(hx):
     """hx is a string, begins with #. ASSUME len(hx)=7."""
     if len(hx) != 7:
         raise ValueError("Hex must be #------")
     hx = hx[1:]  # omit the '#'
-    r = int('0x'+hx[:2], 16)
-    g = int('0x'+hx[2:4], 16)
-    b = int('0x'+hx[4:6], 16)
-    return (r,g,b)
+    r = int("0x" + hx[:2], 16)
+    g = int("0x" + hx[2:4], 16)
+    b = int("0x" + hx[4:6], 16)
+    return (r, g, b)
+
 
 def inverse_color_rgb(rgb):
-    r,g,b = rgb
-    return (255-r, 255-g, 255-b)
+    r, g, b = rgb
+    return (255 - r, 255 - g, 255 - b)
+
 
 def inverse_color_hex(hx):
     """hx is a string, begins with #. ASSUME len(hx)=7."""
     return inverse_color_rgb(hex_to_rgb(hx))
 
+
 def random_unique_color(colors, ctype=1):
     """
     ctype=1: completely random
diff --git a/pomdp_py/utils/debugging.py b/pomdp_py/utils/debugging.py
index a4df9b9d..3e79b2fc 100644
--- a/pomdp_py/utils/debugging.py
+++ b/pomdp_py/utils/debugging.py
@@ -87,6 +87,7 @@
 
 To explore more features, browse the list of methods in the documentation.
 """
+
 import sys
 from pomdp_py.algorithms.po_uct import TreeNode, QNode, VNode, RootVNode
 from pomdp_py.utils import typ, similar, special_char
@@ -95,6 +96,7 @@
 DEFAULT_MARK_COLOR = "blue"
 MARKED = {}  # tracks marked nodes on tree
 
+
 def _node_pp(node, e=None, p=None, o=None):
     # We want to return the node, but we don't want to print it on pdb with
     # its default string. But instead, we want to print it with our own
@@ -104,10 +106,10 @@ def _node_pp(node, e=None, p=None, o=None):
     else:
         return _QNodePP(node, parent_edge=e, parent=p, original=o)
 
-class _NodePP:
 
+class _NodePP:
     def __init__(self, node, parent_edge=None, parent=None, original=None):
-        """node: either VNode or QNode (the actual node on the tree) """
+        """node: either VNode or QNode (the actual node on the tree)"""
         self.parent_edge = parent_edge
         self.parent = parent
         self.children = node.children
@@ -137,8 +139,7 @@ def to_edge(self, key):
             edges = list(sorted_by_str(self.children.keys()))
             return edges[key]
         elif type(key) == str:
-            chosen = max(self.children.keys(),
-                         key=lambda edge: similar(str(edge), key))
+            chosen = max(self.children.keys(), key=lambda edge: similar(str(edge), key))
             if similar(str(chosen), key) >= SIMILAR_THRESH:
                 return chosen
         raise ValueError("Cannot access children with key {}".format(key))
@@ -186,17 +187,18 @@ def interpret_print_type(opt):
     def p(self, opt=None, **kwargs):
         if opt is None:
             max_depth = None
-            print_type_opt = kwargs.get('t', "summary")
+            print_type_opt = kwargs.get("t", "summary")
         elif type(opt) == int:
             max_depth = opt
-            print_type_opt = kwargs.get('t', "summary")
+            print_type_opt = kwargs.get("t", "summary")
         elif type(opt) == str:
             print_type_opt = opt
-            max_depth = kwargs.get('d', None)
+            max_depth = kwargs.get("d", None)
         else:
             raise ValueError("Cannot deal with opt of type {}".format(type(opt)))
-        self.print_tree(max_depth=max_depth,
-                        print_type=_NodePP.interpret_print_type(print_type_opt))
+        self.print_tree(
+            max_depth=max_depth, print_type=_NodePP.interpret_print_type(print_type_opt)
+        )
 
     @property
     def pp(self):
@@ -207,13 +209,15 @@ def print_tree(self, **options):
         _NodePP._print_tree_helper(self, 0, "", [None], -1, **options)
 
     @staticmethod
-    def _print_tree_helper(root,
-                           depth,   # depth of root
-                           parent_edge,
-                           branch_positions,  # list of 'first', 'middle', 'last' for each level prior to root
-                           child_index,  # Index of the root as a child of parent
-                           max_depth=None,
-                           print_type="summary"):
+    def _print_tree_helper(
+        root,
+        depth,  # depth of root
+        parent_edge,
+        branch_positions,  # list of 'first', 'middle', 'last' for each level prior to root
+        child_index,  # Index of the root as a child of parent
+        max_depth=None,
+        print_type="summary",
+    ):
         """
         pos_among_children is either 'first', 'middle', or 'last'
         """
@@ -224,7 +228,9 @@ def _print_tree_helper(root,
 
         # Print the tree branches for all levels up to current root
         branches = ""
-        preceding_positions = branch_positions[:-1]  # all positions except for current root
+        preceding_positions = branch_positions[
+            :-1
+        ]  # all positions except for current root
         for pos in preceding_positions:
             if pos is None:
                 continue
@@ -243,22 +249,25 @@ def _print_tree_helper(root,
 
         root.print_children = False
         if child_index >= 0:
-            line = branches + str(child_index).translate(special_char.SUBSCRIPT) + str(root)
+            line = (
+                branches
+                + str(child_index).translate(special_char.SUBSCRIPT)
+                + str(root)
+            )
         else:
             line = branches + str(root)
         if isinstance(root, VNode):
-            line += typ.cyan("(depth="+str(depth)+")")
+            line += typ.cyan("(depth=" + str(depth) + ")")
 
         print(line)
 
         for i, c in enumerate(sorted_by_str(root.children)):
-
             skip = True
             if root[c].marked:
                 skip = False
             elif print_type == "complete":
                 skip = False
-            elif (root[c].num_visits > 1):
+            elif root[c].num_visits > 1:
                 skip = False
             if print_type == "marked-only" and not root[c].marked:
                 skip = True
@@ -276,36 +285,41 @@ def _print_tree_helper(root,
                 else:
                     next_pos = "middle"
 
-                _NodePP._print_tree_helper(root[c],
-                                           next_depth,
-                                           c,
-                                           branch_positions + [next_pos],
-                                           i,
-                                           max_depth=max_depth,
-                                           print_type=print_type)
+                _NodePP._print_tree_helper(
+                    root[c],
+                    next_depth,
+                    c,
+                    branch_positions + [next_pos],
+                    i,
+                    max_depth=max_depth,
+                    print_type=print_type,
+                )
 
 
 class _QNodePP(_NodePP, QNode):
     """QNode for better printing"""
+
     def __init__(self, qnode, **kwargs):
         QNode.__init__(self, qnode.num_visits, qnode.value)
         _NodePP.__init__(self, qnode, **kwargs)
 
     def __str__(self):
-        return TreeDebugger.single_node_str(self,
-                                            parent_edge=self.parent_edge,
-                                            include_children=self.print_children)
+        return TreeDebugger.single_node_str(
+            self, parent_edge=self.parent_edge, include_children=self.print_children
+        )
+
 
 class _VNodePP(_NodePP, VNode):
     """VNode for better printing"""
+
     def __init__(self, vnode, **kwargs):
         VNode.__init__(self, vnode.num_visits)
         _NodePP.__init__(self, vnode, **kwargs)
 
     def __str__(self):
-        return TreeDebugger.single_node_str(self,
-                                            parent_edge=self.parent_edge,
-                                            include_children=self.print_children)
+        return TreeDebugger.single_node_str(
+            self, parent_edge=self.parent_edge, include_children=self.print_children
+        )
 
 
 class TreeDebugger:
@@ -315,6 +329,7 @@ class TreeDebugger:
     QNodes (value represents Q(b,a); children are observations) and
     VNodes (value represents V(b); children are actions).
     """
+
     def __init__(self, tree):
         """
         Args:
@@ -323,18 +338,21 @@ def __init__(self, tree):
                 which can be accessed by agent.tree.
         """
         if not isinstance(tree, TreeNode):
-            raise ValueError("Expecting tree to be a TreeNode, but got {}".format(type(tree)))
+            raise ValueError(
+                "Expecting tree to be a TreeNode, but got {}".format(type(tree))
+            )
 
         self.tree = _node_pp(tree)
-        self.current = self.tree   # points to the node the user is interacting with
+        self.current = self.tree  # points to the node the user is interacting with
         self._stats_cache = {}
 
     def __str__(self):
         return str(self.current)
 
     def __repr__(self):
-        nodestr = TreeDebugger.single_node_str(self.current,
-                                               parent_edge=self.current.parent_edge)
+        nodestr = TreeDebugger.single_node_str(
+            self.current, parent_edge=self.current.parent_edge
+        )
         return "TreeDebugger@\n{}".format(nodestr)
 
     def __getitem__(self, key):
@@ -354,29 +372,31 @@ def _get_stats(self):
             self._stats_cache[id(self.current)] = stats
         return stats
 
-    def num_nodes(self, kind='all'):
+    def num_nodes(self, kind="all"):
         """
         Returns the total number of nodes in the tree rooted at "current"
         """
         stats = self._get_stats()
         res = {
-            'all': stats['total_vnodes'] + stats['total_qnodes'],
-            'q': stats['total_qnodes'],
-            'v': stats['total_vnodes']
+            "all": stats["total_vnodes"] + stats["total_qnodes"],
+            "q": stats["total_qnodes"],
+            "v": stats["total_vnodes"],
         }
         if kind in res:
             return res[kind]
         else:
-            raise ValueError("Invalid value for kind={}; Valid values are {}"\
-                             .format(kind, list(res.keys())))
-
+            raise ValueError(
+                "Invalid value for kind={}; Valid values are {}".format(
+                    kind, list(res.keys())
+                )
+            )
 
     @property
     def depth(self):
         """Tree depth starts from 0 (root node only).
         It is the largest number of edges on a path from root to leaf."""
         stats = self._get_stats()
-        return stats['max_depth']
+        return stats["max_depth"]
 
     @property
     def d(self):
@@ -397,17 +417,17 @@ def nl(self):
     @property
     def nn(self):
         """Returns the total number of nodes in the tree"""
-        return self.num_nodes(kind='all')
+        return self.num_nodes(kind="all")
 
     @property
     def nq(self):
         """Returns the total number of QNodes in the tree"""
-        return self.num_nodes(kind='q')
+        return self.num_nodes(kind="q")
 
     @property
     def nv(self):
         """Returns the total number of VNodes in the tree"""
-        return self.num_nodes(kind='v')
+        return self.num_nodes(kind="v")
 
     def l(self, depth, as_debuggers=True):
         """alias for layer"""
@@ -424,12 +444,16 @@ def layer(self, depth, as_debuggers=True):
                 one for each tree on the layer.
         """
         if depth < 0 or depth > self.depth:
-            raise ValueError("Depth {} is out of range (0-{})".format(depth, self.depth))
+            raise ValueError(
+                "Depth {} is out of range (0-{})".format(depth, self.depth)
+            )
         nodes = []
         self._layer_helper(self.current, 0, depth, nodes)
         return nodes
 
-    def _layer_helper(self, root, current_depth, target_depth, nodes, as_debuggers=True):
+    def _layer_helper(
+        self, root, current_depth, target_depth, nodes, as_debuggers=True
+    ):
         if current_depth == target_depth:
             if isinstance(root, VNode):
                 if as_debuggers:
@@ -442,10 +466,7 @@ def _layer_helper(self, root, current_depth, target_depth, nodes, as_debuggers=T
                     next_depth = current_depth
                 else:
                     next_depth = current_depth + 1
-                self._layer_helper(root[c],
-                                   next_depth,
-                                   target_depth,
-                                   nodes)
+                self._layer_helper(root[c], next_depth, target_depth, nodes)
 
     @property
     def leaf(self):
@@ -470,7 +491,6 @@ def step(self, key):
         self.current = self[edge]
         print("step: " + str(edge))
 
-
     def s(self, key):
         """alias for step"""
         return self.step(key)
@@ -587,9 +607,9 @@ def single_node_str(node, parent_edge=None, indent=1, include_children=True):
         if parent_edge is not None:
             output += opposite_color(str(parent_edge)) + "⟶"
 
-        output += color(str(node.__class__.__name__))\
-            + "(n={}, v={:.3f})".format(node.num_visits,
-                                    node.value)
+        output += color(str(node.__class__.__name__)) + "(n={}, v={:.3f})".format(
+            node.num_visits, node.value
+        )
         if include_children:
             output += "\n"
             for i, action in enumerate(sorted_by_str(node.children)):
@@ -597,9 +617,9 @@ def single_node_str(node, parent_edge=None, indent=1, include_children=True):
                 child_info = TreeDebugger.single_node_str(child, include_children=False)
 
                 spaces = "    " * indent
-                output += "{}- [{}] {}: {}".format(spaces, i,
-                                                   typ.white(str(action)),
-                                                   child_info)
+                output += "{}- [{}] {}: {}".format(
+                    spaces, i, typ.white(str(action)), child_info
+                )
                 if i < len(node.children) - 1:
                     output += "\n"
             output += "\n"
@@ -632,7 +652,7 @@ def _preferred_actions_helper(root, depth, seq, max_depth=None):
         equally_good = []
         if isinstance(root, VNode):
             for c in root.children:
-                if not(c == best_child) and root[c].value == best_value:
+                if not (c == best_child) and root[c].value == best_value:
                     equally_good.append(c)
 
         if best_child is not None and root[best_child] is not None:
@@ -642,8 +662,9 @@ def _preferred_actions_helper(root, depth, seq, max_depth=None):
             else:
                 next_depth = depth + 1
 
-            TreeDebugger._preferred_actions_helper(root[best_child], next_depth, seq,
-                                                   max_depth=max_depth)
+            TreeDebugger._preferred_actions_helper(
+                root[best_child], next_depth, seq, max_depth=max_depth
+            )
 
     def path(self, dest):
         """alias for path_to;
@@ -691,17 +712,17 @@ def _get_path(self, start, dest, parent):
     def tree_stats(root, max_depth=None):
         """Gether statistics about the tree"""
         stats = {
-            'total_vnodes': 0,
-            'total_qnodes': 0,
-            'total_vnodes_children': 0,
-            'total_qnodes_children': 0,
-            'max_vnodes_children': 0,
-            'max_qnodes_children': 0,
-            'max_depth': 0
+            "total_vnodes": 0,
+            "total_qnodes": 0,
+            "total_vnodes_children": 0,
+            "total_qnodes_children": 0,
+            "max_vnodes_children": 0,
+            "max_qnodes_children": 0,
+            "max_depth": 0,
         }
         TreeDebugger._tree_stats_helper(root, 0, stats, max_depth=max_depth)
-        stats['num_visits'] = root.num_visits
-        stats['value'] = root.value
+        stats["num_visits"] = root.num_visits
+        stats["value"] = root.value
         return stats
 
     @staticmethod
@@ -710,28 +731,37 @@ def _tree_stats_helper(root, depth, stats, max_depth=None):
             return
         else:
             if isinstance(root, VNode):
-                stats['total_vnodes'] += 1
-                stats['total_vnodes_children'] += len(root.children)
-                stats['max_vnodes_children'] = max(stats['max_vnodes_children'], len(root.children))
-                stats['max_depth'] = max(stats['max_depth'], depth)
+                stats["total_vnodes"] += 1
+                stats["total_vnodes_children"] += len(root.children)
+                stats["max_vnodes_children"] = max(
+                    stats["max_vnodes_children"], len(root.children)
+                )
+                stats["max_depth"] = max(stats["max_depth"], depth)
             else:
-                stats['total_qnodes'] += 1
-                stats['total_qnodes_children'] += len(root.children)
-                stats['max_qnodes_children'] = max(stats['max_qnodes_children'], len(root.children))
+                stats["total_qnodes"] += 1
+                stats["total_qnodes_children"] += len(root.children)
+                stats["max_qnodes_children"] = max(
+                    stats["max_qnodes_children"], len(root.children)
+                )
 
             for c in root.children:
                 if isinstance(root[c], QNode):
                     next_depth = depth
                 else:
                     next_depth = depth + 1
-                TreeDebugger._tree_stats_helper(root[c], next_depth, stats, max_depth=max_depth)
+                TreeDebugger._tree_stats_helper(
+                    root[c], next_depth, stats, max_depth=max_depth
+                )
+
 
 def sorted_by_str(enumerable):
     return sorted(enumerable, key=lambda n: str(n))
 
+
 def interpret_color(colorstr):
     if colorstr.lower() in typ.colors:
         return eval("typ.{}".format(colorstr))
     else:
-        raise ValueError("Invalid color: {};\n"
-                         "The available ones are {}".format(colorstr, typ.colors))
+        raise ValueError(
+            "Invalid color: {};\nThe available ones are {}".format(colorstr, typ.colors)
+        )
diff --git a/pomdp_py/utils/interfaces/conversion.py b/pomdp_py/utils/interfaces/conversion.py
index feece47f..b5fb7339 100644
--- a/pomdp_py/utils/interfaces/conversion.py
+++ b/pomdp_py/utils/interfaces/conversion.py
@@ -2,14 +2,15 @@
 Provides utility to convert a POMDP written in pomdp_py to
 a POMDP file format (.pomdp or .pomdpx). Output to a file.
 """
+
 import subprocess
 import os
 import pomdp_py
 import numpy as np
 import xml.etree.ElementTree as ET
 
-def to_pomdp_file(agent, output_path=None,
-                  discount_factor=0.95, float_precision=9):
+
+def to_pomdp_file(agent, output_path=None, discount_factor=0.95, float_precision=9):
     """
     Pass in an Agent, and use its components to generate
     a .pomdp file to `output_path`.
@@ -42,30 +43,41 @@ def to_pomdp_file(agent, output_path=None,
         all_actions = list(agent.all_actions)
         all_observations = list(agent.all_observations)
     except NotImplementedError:
-        raise ValueError("S, A, O must be enumerable for a given agent to convert to .pomdp format")
+        raise ValueError(
+            "S, A, O must be enumerable for a given agent to convert to .pomdp format"
+        )
 
     content = f"discount: %.{float_precision}f\n" % discount_factor
-    content += "values: reward\n" # We only consider reward, not cost.
+    content += "values: reward\n"  # We only consider reward, not cost.
 
     list_of_states = " ".join(str(s) for s in all_states)
-    assert len(list_of_states.split(" ")) == len(all_states),\
-        "states must be convertable to strings without blank spaces"
+    assert len(list_of_states.split(" ")) == len(
+        all_states
+    ), "states must be convertable to strings without blank spaces"
     content += "states: %s\n" % list_of_states
 
     list_of_actions = " ".join(str(a) for a in all_actions)
-    assert len(list_of_actions.split(" ")) == len(all_actions),\
-        "actions must be convertable to strings without blank spaces"
+    assert len(list_of_actions.split(" ")) == len(
+        all_actions
+    ), "actions must be convertable to strings without blank spaces"
     content += "actions: %s\n" % list_of_actions
 
     list_of_observations = " ".join(str(o) for o in all_observations)
-    assert len(list_of_observations.split(" ")) == len(all_observations),\
-        "observations must be convertable to strings without blank spaces"
+    assert len(list_of_observations.split(" ")) == len(
+        all_observations
+    ), "observations must be convertable to strings without blank spaces"
     content += "observations: %s\n" % list_of_observations
 
     # Starting belief state - they need to be normalized
     total_belief = sum(agent.belief[s] for s in all_states)
-    content += "start: %s\n" % (" ".join([f"%.{float_precision}f" % (agent.belief[s]/total_belief)
-                                          for s in all_states]))
+    content += "start: %s\n" % (
+        " ".join(
+            [
+                f"%.{float_precision}f" % (agent.belief[s] / total_belief)
+                for s in all_states
+            ]
+        )
+    )
 
     # State transition probabilities - they need to be normalized
     for s in all_states:
@@ -77,7 +89,12 @@ def to_pomdp_file(agent, output_path=None,
             total_prob = sum(probs)
             for i, s_next in enumerate(all_states):
                 prob_norm = probs[i] / total_prob
-                content += f'T : %s : %s : %s %.{float_precision}f\n' % (a, s, s_next, prob_norm)
+                content += f"T : %s : %s : %s %.{float_precision}f\n" % (
+                    a,
+                    s,
+                    s_next,
+                    prob_norm,
+                )
 
     # Observation probabilities - they need to be normalized
     for s_next in all_states:
@@ -87,12 +104,17 @@ def to_pomdp_file(agent, output_path=None,
                 prob = agent.observation_model.probability(o, s_next, a)
                 probs.append(prob)
             total_prob = sum(probs)
-            assert total_prob > 0.0,\
-                "No observation is probable under state={} action={}"\
-                .format(s_next, a)
+            assert (
+                total_prob > 0.0
+            ), "No observation is probable under state={} action={}".format(s_next, a)
             for i, o in enumerate(all_observations):
                 prob_norm = probs[i] / total_prob
-                content += f'O : %s : %s : %s %.{float_precision}f\n' % (a, s_next, o, prob_norm)
+                content += f"O : %s : %s : %s %.{float_precision}f\n" % (
+                    a,
+                    s_next,
+                    o,
+                    prob_norm,
+                )
 
     # Immediate rewards
     for s in all_states:
@@ -100,7 +122,12 @@ def to_pomdp_file(agent, output_path=None,
             for s_next in all_states:
                 # We will take the argmax reward, which works for deterministic rewards.
                 r = agent.reward_model.sample(s, a, s_next)
-                content += f'R : %s : %s : %s : *  %.{float_precision}f\n' % (a, s, s_next, r)
+                content += f"R : %s : %s : %s : *  %.{float_precision}f\n" % (
+                    a,
+                    s,
+                    s_next,
+                    r,
+                )
 
     if output_path is not None:
         with open(output_path, "w") as f:
@@ -108,10 +135,7 @@ def to_pomdp_file(agent, output_path=None,
     return all_states, all_actions, all_observations
 
 
-
-def to_pomdpx_file(agent, pomdpconvert_path,
-                   output_path=None,
-                   discount_factor=0.95):
+def to_pomdpx_file(agent, pomdpconvert_path, output_path=None, discount_factor=0.95):
     """
     Converts an agent to a pomdpx file. This works by first converting the agent into a .pomdp file and then
     using the :code:`pomdpconvert` utility program to convert that file to a .pomdpx file. Check out
@@ -133,15 +157,14 @@ def to_pomdpx_file(agent, pomdpconvert_path,
         discount_factor (float): The discount factor
     """
     pomdp_path = "./temp-pomdp.pomdp"
-    to_pomdp_file(agent, pomdp_path,
-                  discount_factor=discount_factor)
+    to_pomdp_file(agent, pomdp_path, discount_factor=discount_factor)
     proc = subprocess.Popen([pomdpconvert_path, pomdp_path])
     proc.wait()
 
     pomdpx_path = pomdp_path + "x"
     assert os.path.exists(pomdpx_path), "POMDPx conversion failed."
 
-    with open(pomdpx_path, 'r') as f:
+    with open(pomdpx_path, "r") as f:
         content = f.read()
 
     if output_path is not None:
@@ -175,7 +198,7 @@ def parse_pomdp_solve_output(alpha_path, pg_path=None):
         policy_graph: a mapping from node number to (action_number, edges)
     """
     alphas = []  # (alpha_vector, action_number) tuples
-    with open(alpha_path, 'r') as f:
+    with open(alpha_path, "r") as f:
         action_number = None
         alpha_vector = None
         mode = "action"
@@ -198,19 +221,19 @@ def parse_pomdp_solve_output(alpha_path, pg_path=None):
     if pg_path is None:
         return alphas
     else:
-        with open(pg_path, 'r') as f:
+        with open(pg_path, "r") as f:
             for line in f:
                 line = line.rstrip()
                 if len(line) == 0:
                     continue
                 parts = list(map(int, line.split()))  # Splits on whitespace
-                assert parts[0] not in policy_graph,\
+                assert parts[0] not in policy_graph, (
                     "The node id %d already exists. Something wrong" % parts[0]
+                )
                 policy_graph[parts[0]] = (parts[1], parts[2:])
         return alphas, policy_graph
 
 
-
 class AlphaVectorPolicy(pomdp_py.Planner):
     """
     An offline POMDP policy is specified by a collection
@@ -227,6 +250,7 @@ class AlphaVectorPolicy(pomdp_py.Planner):
 
     This can be constructed using .policy file created by sarsop.
     """
+
     def __init__(self, alphas, states):
         """
         Args:
@@ -240,8 +264,7 @@ def __init__(self, alphas, states):
     def plan(self, agent):
         """Returns an action that is mapped by the agent belief, under this policy"""
         b = [agent.belief[s] for s in self.states]
-        _, action = max(self.alphas,
-                 key=lambda va: np.dot(b, va[0]))
+        _, action = max(self.alphas, key=lambda va: np.dot(b, va[0]))
         return action
 
     def value(self, belief):
@@ -251,13 +274,11 @@ def value(self, belief):
         :math:`V(b) = max_{a\in\Gamma} {a} \cdot b`
         """
         b = [belief[s] for s in self.states]
-        alpha_vector, _ = max(self.alphas,
-                              key=lambda va: np.dot(b, va[0]))
+        alpha_vector, _ = max(self.alphas, key=lambda va: np.dot(b, va[0]))
         return np.dot(b, alpha_vector)
 
     @classmethod
-    def construct(cls, policy_path,
-                  states, actions, solver="pomdpsol"):
+    def construct(cls, policy_path, states, actions, solver="pomdpsol"):
         """
         Returns an AlphaVectorPolicy, given `alphas`,
         which are the output of parse_appl_policy_file.
@@ -279,35 +300,43 @@ def construct(cls, policy_path,
                 action = actions[int(vector.attrib["action"])]
                 alpha_vector = tuple(map(float, vector.text.split()))
                 alphas.append((alpha_vector, action))
-            return AlphaVectorPolicy(alphas,
-                                     states)
+            return AlphaVectorPolicy(alphas, states)
 
     @classmethod
-    def construct_from_pomdp_solve(cls, alpha_path,
-                                   states, actions):
+    def construct_from_pomdp_solve(cls, alpha_path, states, actions):
         alphas_with_action_numbers = parse_pomdp_solve_output(alpha_path)
-        alphas = [(alpha_vector, actions[action_number])
-                  for alpha_vector, action_number in alphas_with_action_numbers]
+        alphas = [
+            (alpha_vector, actions[action_number])
+            for alpha_vector, action_number in alphas_with_action_numbers
+        ]
         return AlphaVectorPolicy(alphas, states)
 
 
 class PGNode:
     """A node on the policy graph"""
+
     def __init__(self, node_id, alpha_vector, action):
         self.node_id = node_id
         self.alpha_vector = alpha_vector
         self.action = action
+
     def __eq__(self, other):
         if isinstance(other, PolicyNode):
             return self.node_id == other.node_id
         return False
+
     def __hash__(self):
         return hash(self.node_id)
+
     def __str__(self):
         return repr(self)
+
     def __repr__(self):
-        return "NodeID(%d)::AlphaVector(%s)::Action(%s)\n"\
-            % (self.node_id, str(self.alpha_vector), self.action)
+        return "NodeID(%d)::AlphaVector(%s)::Action(%s)\n" % (
+            self.node_id,
+            str(self.alpha_vector),
+            self.action,
+        )
 
 
 class PolicyGraph(pomdp_py.Planner):
@@ -323,14 +352,13 @@ def __init__(self, nodes, edges, states):
             edges (dict): Mapping from node_id to a dictionary {observation -> node_id}
             states (list): List of states, ordered as in .pomdp file
         """
-        self.nodes = {n.node_id:n for n in nodes}
+        self.nodes = {n.node_id: n for n in nodes}
         self.edges = edges
         self.states = states
         self._current_node = None
 
     @classmethod
-    def construct(cls, alpha_path, pg_path,
-                  states, actions, observations):
+    def construct(cls, alpha_path, pg_path, states, actions, observations):
         """
         See parse_pomdp_solve_output for detailed definitions of
         alphas and pg.
@@ -356,8 +384,9 @@ def construct(cls, alpha_path, pg_path,
             assert 0 <= node_id < len(nodes), "Invalid node id in policy graph"
 
             action_number, o_links = pg[node_id]
-            assert actions[action_number] == nodes[node_id].action,\
-                "Inconsistent action mapping"
+            assert (
+                actions[action_number] == nodes[node_id].action
+            ), "Inconsistent action mapping"
 
             edges[node_id] = {}
 
@@ -374,10 +403,9 @@ def plan(self, agent):
 
     def _find_node(self, agent):
         """Locate the node in the policy graph corresponding to the agent's current
-        belief state. """
+        belief state."""
         b = [agent.belief[s] for s in self.states]
-        nid = max(self.nodes,
-                   key=lambda nid: np.dot(b, self.nodes[nid].alpha_vector))
+        nid = max(self.nodes, key=lambda nid: np.dot(b, self.nodes[nid].alpha_vector))
         return self.nodes[nid]
 
     def update(self, agent, action, observation):
@@ -389,4 +417,6 @@ def update(self, agent, action, observation):
             # Find out the node number using agent current belief
             self._current_node = self._find_node(agent)
         # Transition the current node following the graph
-        self._current_node = self.nodes[self.edges[self._current_node.node_id][observation]]
+        self._current_node = self.nodes[
+            self.edges[self._current_node.node_id][observation]
+        ]
diff --git a/pomdp_py/utils/interfaces/simple_rl.py b/pomdp_py/utils/interfaces/simple_rl.py
index ba627fcb..7df1d273 100644
--- a/pomdp_py/utils/interfaces/simple_rl.py
+++ b/pomdp_py/utils/interfaces/simple_rl.py
@@ -15,8 +15,10 @@
 lacking. Providing this inteface is mostly to potentially leverage the MDP
 algorithms in simple_rl.
 """
+
 import simple_rl
 
+
 def convert_to_MDPClass(pomdp, discount_factor=0.99, step_cost=0):
     """Converts the pomdp to the building block MDPClass of simple_rl.  There are a lot of
     variants of MDPClass in simple_rl. It is up to the user to then convert this
@@ -39,16 +41,18 @@ def convert_to_MDPClass(pomdp, discount_factor=0.99, step_cost=0):
     # that simple_rl is supposed to work with.
     state = simple_rl.State(data=env.state)
 
-    return simple_rl.MDP(all_actions,
-                         agent.transition_model.sample,
-                         agent.reward_model.sample,
-                         gamma=discount_factor,
-                         step_cost=step_cost)
+    return simple_rl.MDP(
+        all_actions,
+        agent.transition_model.sample,
+        agent.reward_model.sample,
+        gamma=discount_factor,
+        step_cost=step_cost,
+    )
 
 
-def convert_to_POMDPClass(pomdp,
-                          discount_factor=0.99, step_cost=0,
-                          belief_updater_type="discrete"):
+def convert_to_POMDPClass(
+    pomdp, discount_factor=0.99, step_cost=0, belief_updater_type="discrete"
+):
     agent = pomdp.agent
     env = pomdp.env
     try:
@@ -63,15 +67,19 @@ def convert_to_POMDPClass(pomdp,
     try:
         belief_hist = agent.belief.get_histogram()
     except Exception:
-        raise ValueError("Agent belief cannot be converted into a histogram;\n"
-                         "thus cannot create POMDPClass.")
+        raise ValueError(
+            "Agent belief cannot be converted into a histogram;\n"
+            "thus cannot create POMDPClass."
+        )
 
-    return simple_rl.POMDP(all_actions,
-                           all_observations,
-                           agent.transition_model.sample,
-                           agent.reward_model.sample,
-                           agent.observation_model.sample,
-                           belief_hist,
-                           belief_updater_type=belief_updater_type,
-                           gamma=discount_factor,
-                           step_cost=step_cost)
+    return simple_rl.POMDP(
+        all_actions,
+        all_observations,
+        agent.transition_model.sample,
+        agent.reward_model.sample,
+        agent.observation_model.sample,
+        belief_hist,
+        belief_updater_type=belief_updater_type,
+        gamma=discount_factor,
+        step_cost=step_cost,
+    )
diff --git a/pomdp_py/utils/interfaces/solvers.py b/pomdp_py/utils/interfaces/solvers.py
index f05632bf..87b9eddc 100644
--- a/pomdp_py/utils/interfaces/solvers.py
+++ b/pomdp_py/utils/interfaces/solvers.py
@@ -10,18 +10,27 @@
 * `POMDP.jl <https://github.com/JuliaPOMDP/POMDPs.jl>`_
 * more? Help us if you can!
 """
+
 import pomdp_py
-from pomdp_py.utils.interfaces.conversion\
-    import to_pomdp_file, PolicyGraph, AlphaVectorPolicy, parse_pomdp_solve_output
+from pomdp_py.utils.interfaces.conversion import (
+    to_pomdp_file,
+    PolicyGraph,
+    AlphaVectorPolicy,
+    parse_pomdp_solve_output,
+)
 import subprocess
 import os, sys
 
-def vi_pruning(agent, pomdp_solve_path,
-               discount_factor=0.95,
-               options=[],
-               pomdp_name="temp-pomdp",
-               remove_generated_files=False,
-               return_policy_graph=False):
+
+def vi_pruning(
+    agent,
+    pomdp_solve_path,
+    discount_factor=0.95,
+    options=[],
+    pomdp_name="temp-pomdp",
+    remove_generated_files=False,
+    return_policy_graph=False,
+):
     """
     Value Iteration with pruning, using the software pomdp-solve
     https://www.pomdp.org/code/ developed by Anthony R. Cassandra.
@@ -49,24 +58,29 @@ def vi_pruning(agent, pomdp_solve_path,
         all_actions = list(agent.all_actions)
         all_observations = list(agent.all_observations)
     except NotImplementedError:
-        raise("S, A, O must be enumerable for a given agent to convert to .pomdp format")
+        raise (
+            "S, A, O must be enumerable for a given agent to convert to .pomdp format"
+        )
 
     pomdp_path = "./%s.pomdp" % pomdp_name
     to_pomdp_file(agent, pomdp_path, discount_factor=discount_factor)
-    proc = subprocess.Popen([pomdp_solve_path,
-                             "-pomdp", pomdp_path,
-                             "-o", pomdp_name] + list(map(str,options)))
+    proc = subprocess.Popen(
+        [pomdp_solve_path, "-pomdp", pomdp_path, "-o", pomdp_name]
+        + list(map(str, options))
+    )
     proc.wait()
 
     # Read the value and policy graph files
     alpha_path = "%s.alpha" % pomdp_name
     pg_path = "%s.pg" % pomdp_name
     if return_policy_graph:
-        policy = PolicyGraph.construct(alpha_path, pg_path,
-                                       all_states, all_actions, all_observations)
+        policy = PolicyGraph.construct(
+            alpha_path, pg_path, all_states, all_actions, all_observations
+        )
     else:
         policy = AlphaVectorPolicy.construct(
-            alpha_path, all_states, all_actions, solver="pomdp-solve")
+            alpha_path, all_states, all_actions, solver="pomdp-solve"
+        )
 
     # Remove temporary files
     if remove_generated_files:
@@ -76,13 +90,17 @@ def vi_pruning(agent, pomdp_solve_path,
     return policy
 
 
-def sarsop(agent, pomdpsol_path,
-           discount_factor=0.95,
-           timeout=30, memory=100,
-           precision=0.5,
-           pomdp_name="temp-pomdp",
-           remove_generated_files=False,
-           logfile=None):
+def sarsop(
+    agent,
+    pomdpsol_path,
+    discount_factor=0.95,
+    timeout=30,
+    memory=100,
+    precision=0.5,
+    pomdp_name="temp-pomdp",
+    remove_generated_files=False,
+    logfile=None,
+):
     """
     SARSOP, using the binary from https://github.com/AdaCompNUS/sarsop
     This is an anytime POMDP planning algorithm
@@ -104,7 +122,9 @@ def sarsop(agent, pomdpsol_path,
         all_actions = list(agent.all_actions)
         all_observations = list(agent.all_observations)
     except NotImplementedError:
-        raise("S, A, O must be enumerable for a given agent to convert to .pomdpx format")
+        raise (
+            "S, A, O must be enumerable for a given agent to convert to .pomdpx format"
+        )
 
     if logfile is None:
         stdout = None
@@ -116,12 +136,22 @@ def sarsop(agent, pomdpsol_path,
 
     pomdp_path = "./%s.pomdp" % pomdp_name
     to_pomdp_file(agent, pomdp_path, discount_factor=discount_factor)
-    proc = subprocess.Popen([pomdpsol_path,
-                             "--timeout", str(timeout),
-                             "--memory", str(memory),
-                             "--precision", str(precision),
-                             "--output", "%s.policy" % pomdp_name,
-                             pomdp_path], stdout=stdout, stderr=stderr)
+    proc = subprocess.Popen(
+        [
+            pomdpsol_path,
+            "--timeout",
+            str(timeout),
+            "--memory",
+            str(memory),
+            "--precision",
+            str(precision),
+            "--output",
+            "%s.policy" % pomdp_name,
+            pomdp_path,
+        ],
+        stdout=stdout,
+        stderr=stderr,
+    )
     if logfile is not None:
         for line in proc.stdout:
             line = line.decode("utf-8")
@@ -130,8 +160,7 @@ def sarsop(agent, pomdpsol_path,
     proc.wait()
 
     policy_path = "%s.policy" % pomdp_name
-    policy = AlphaVectorPolicy.construct(policy_path,
-                                         all_states, all_actions)
+    policy = AlphaVectorPolicy.construct(policy_path, all_states, all_actions)
 
     # Remove temporary files
     if remove_generated_files:
diff --git a/pomdp_py/utils/math.py b/pomdp_py/utils/math.py
index 763941d9..2da296c5 100644
--- a/pomdp_py/utils/math.py
+++ b/pomdp_py/utils/math.py
@@ -1,16 +1,19 @@
 """Assorted utilities for math"""
+
 import math
 import numpy as np
 
+
 # Math
 def vec(p1, p2):
-    """ vector from p1 to p2 """
+    """vector from p1 to p2"""
     if type(p1) != np.ndarray:
         p1 = np.array(p1)
     if type(p2) != np.ndarray:
         p2 = np.array(p2)
     return p2 - p1
 
+
 def proj(vec1, vec2, scalar=False):
     # Project vec1 onto vec2. Returns a vector in the direction of vec2.
     scale = np.dot(vec1, vec2) / np.linalg.norm(vec2)
@@ -19,40 +22,83 @@ def proj(vec1, vec2, scalar=False):
     else:
         return vec2 * scale
 
+
 def R_x(th):
-    return np.array([
-        1, 0, 0, 0,
-        0, np.cos(th), -np.sin(th), 0,
-        0, np.sin(th), np.cos(th), 0,
-        0, 0, 0, 1
-    ]).reshape(4,4)
+    return np.array(
+        [
+            1,
+            0,
+            0,
+            0,
+            0,
+            np.cos(th),
+            -np.sin(th),
+            0,
+            0,
+            np.sin(th),
+            np.cos(th),
+            0,
+            0,
+            0,
+            0,
+            1,
+        ]
+    ).reshape(4, 4)
+
 
 def R_y(th):
-    return np.array([
-        np.cos(th), 0, np.sin(th), 0,
-        0, 1, 0, 0,
-        -np.sin(th), 0, np.cos(th), 0,
-        0, 0, 0, 1
-    ]).reshape(4,4)
+    return np.array(
+        [
+            np.cos(th),
+            0,
+            np.sin(th),
+            0,
+            0,
+            1,
+            0,
+            0,
+            -np.sin(th),
+            0,
+            np.cos(th),
+            0,
+            0,
+            0,
+            0,
+            1,
+        ]
+    ).reshape(4, 4)
+
 
 def R_z(th):
-    return np.array([
-        np.cos(th), -np.sin(th), 0, 0,
-        np.sin(th), np.cos(th), 0, 0,
-        0, 0, 1, 0,
-        0, 0, 0, 1
-    ]).reshape(4,4)
+    return np.array(
+        [
+            np.cos(th),
+            -np.sin(th),
+            0,
+            0,
+            np.sin(th),
+            np.cos(th),
+            0,
+            0,
+            0,
+            0,
+            1,
+            0,
+            0,
+            0,
+            0,
+            1,
+        ]
+    ).reshape(4, 4)
+
 
 def T(dx, dy, dz):
-    return np.array([
-        1, 0, 0, dx,
-        0, 1, 0, dy,
-        0, 0, 1, dz,
-        0, 0, 0, 1
-    ]).reshape(4,4)
+    return np.array([1, 0, 0, dx, 0, 1, 0, dy, 0, 0, 1, dz, 0, 0, 0, 1]).reshape(4, 4)
+
 
 def to_radians(th):
-    return th*np.pi / 180
+    return th * np.pi / 180
+
 
 def R_between(v1, v2):
     if len(v1) != 3 or len(v2) != 3:
@@ -62,14 +108,11 @@ def R_between(v1, v2):
     s = np.linalg.norm(v)
     I = np.identity(3)
 
-    vX = np.array([
-        0, -v[2], v[1],
-        v[2], 0, -v[0],
-        -v[1], v[0], 0
-    ]).reshape(3,3)
-    R = I + vX + np.matmul(vX,vX) * ((1-c)/(s**2))
+    vX = np.array([0, -v[2], v[1], v[2], 0, -v[0], -v[1], v[0], 0]).reshape(3, 3)
+    R = I + vX + np.matmul(vX, vX) * ((1 - c) / (s**2))
     return R
 
+
 def approx_equal(v1, v2, epsilon=1e-6):
     if len(v1) != len(v2):
         return False
@@ -78,5 +121,6 @@ def approx_equal(v1, v2, epsilon=1e-6):
             return False
     return True
 
+
 def euclidean_dist(p1, p2):
-    return math.sqrt(sum([(a - b)** 2 for a, b in zip(p1, p2)]))
+    return math.sqrt(sum([(a - b) ** 2 for a, b in zip(p1, p2)]))
diff --git a/pomdp_py/utils/misc.py b/pomdp_py/utils/misc.py
index 172267f6..c6a798c3 100644
--- a/pomdp_py/utils/misc.py
+++ b/pomdp_py/utils/misc.py
@@ -1,6 +1,8 @@
 """Misc Python utilities"""
+
 from difflib import SequenceMatcher
 
+
 def remap(oldvalue, oldmin, oldmax, newmin, newmax):
     if oldmax - oldmin == 0:
         print("Warning in remap: the old range has size 0")
@@ -14,29 +16,30 @@ def json_safe(obj):
     elif isinstance(obj, (list, tuple)):
         return [json_safe(item) for item in obj]
     elif isinstance(obj, dict):
-        return {json_safe(key):json_safe(value) for key, value in obj.items()}
+        return {json_safe(key): json_safe(value) for key, value in obj.items()}
     else:
         return str(obj)
     return obj
 
 
 def safe_slice(arr, start, end):
-    true_start = max(0, min(len(arr)-1, start))
-    true_end = max(0, min(len(arr)-1, end))
+    true_start = max(0, min(len(arr) - 1, start))
+    true_end = max(0, min(len(arr) - 1, end))
     return arr[true_start:true_end]
 
+
 def similar(a, b):
     # Reference: https://stackoverflow.com/questions/17388213/find-the-similarity-metric-between-two-strings
     return SequenceMatcher(None, a, b).ratio()
 
 
 class special_char:
-    left = '\u2190'
-    up = '\u2191'
-    right = '\u2192'
-    down = '\u2193'
-    longleft = '\u27F5'
-    longright = '\u27F6'
+    left = "\u2190"
+    up = "\u2191"
+    right = "\u2192"
+    down = "\u2193"
+    longleft = "\u27f5"
+    longright = "\u27f6"
 
     hline = "─"
     vline = "│"
diff --git a/pomdp_py/utils/templates.py b/pomdp_py/utils/templates.py
index 9d3d146d..e7cb874a 100644
--- a/pomdp_py/utils/templates.py
+++ b/pomdp_py/utils/templates.py
@@ -1,69 +1,93 @@
 """
 Some particular implementations of the interface for convenience
 """
+
 import pomdp_py
 import random
 
+
 class SimpleState(pomdp_py.State):
     """A SimpleState is a state that stores
     one piece of hashable data and the equality
     of two states of this kind depends just on
     this data"""
+
     def __init__(self, data):
         self.data = data
+
     def __hash__(self):
         return hash(self.data)
+
     def __eq__(self, other):
         if isinstance(other, SimpleState):
             return self.data == other.data
         return False
+
     def __ne__(self, other):
         return not self.__eq__(other)
+
     def __str__(self):
         return str(self.data)
+
     def __repr__(self):
         return "SimpleState({})".format(self.data)
 
+
 class SimpleAction(pomdp_py.Action):
     """A SimpleAction is an action defined by a string name"""
+
     def __init__(self, name):
         self.name = name
+
     def __hash__(self):
         return hash(self.name)
+
     def __eq__(self, other):
         if isinstance(other, SimpleAction):
             return self.name == other.name
         return False
+
     def __ne__(self, other):
         return not self.__eq__(other)
+
     def __str__(self):
         return self.name
+
     def __repr__(self):
         return "SimpleAction({})".format(self.name)
 
+
 class SimpleObservation(pomdp_py.Observation):
     """A SimpleObservation is an observation
     with a piece of hashable data that defines
     the equality."""
+
     def __init__(self, data):
         self.data = data
+
     def __hash__(self):
         return hash(self.data)
+
     def __eq__(self, other):
         if isinstance(other, SimpleObservation):
             return self.data == other.data
         return False
+
     def __ne__(self, other):
         return not self.__eq__(other)
+
     def __str__(self):
         return str(self.data)
+
     def __repr__(self):
         return "SimpleObservation({})".format(self.data)
 
+
 class DetTransitionModel(pomdp_py.TransitionModel):
     """A DetTransitionModel is a deterministic transition model.
     A probability of 1 - epsilon is given for correct transition,
     and epsilon is given for incorrect transition."""
+
     def __init__(self, epsilon=1e-12):
         self.epsilon = epsilon
 
@@ -81,6 +105,7 @@ class DetObservationModel(pomdp_py.ObservationModel):
     """A DetTransitionModel is a deterministic transition model.
     A probability of 1 - epsilon is given for correct transition,
     and epsilon is given for incorrect transition."""
+
     def __init__(self, epsilon=1e-12):
         self.epsilon = epsilon
 
@@ -96,6 +121,7 @@ def sample(self, next_state, action):
 
 class DetRewardModel(pomdp_py.RewardModel):
     """A DetRewardModel is a deterministic reward model (the most typical kind)."""
+
     def reward_func(self, state, action, next_state):
         raise NotImplementedError
 
@@ -106,6 +132,7 @@ def sample(self, state, action, next_state):
     def argmax(self, state, action, next_state):
         return self.sample(state, action, next_state)
 
+
 class UniformPolicyModel(pomdp_py.RolloutPolicy):
     def __init__(self, actions):
         self.actions = actions
@@ -127,6 +154,7 @@ class TabularTransitionModel(pomdp_py.TransitionModel):
     given `weights` is complete, that is, it specifies the probability of all
     state-action-nextstate combinations
     """
+
     def __init__(self, weights):
         self.weights = weights
         self._states = set()
@@ -137,13 +165,16 @@ def __init__(self, weights):
     def probability(self, next_state, state, action):
         if (state, action, next_state) in self.weights:
             return self.weights[(state, action, next_state)]
-        raise ValueError("The transition probability for"\
-                         f"{(state, action, next_state)} is not defined")
+        raise ValueError(
+            "The transition probability for"
+            f"{(state, action, next_state)} is not defined"
+        )
 
     def sample(self, state, action):
         next_states = list(self._states)
-        probs = [self.probability(next_state, state, action)
-                 for next_state in next_states]
+        probs = [
+            self.probability(next_state, state, action) for next_state in next_states
+        ]
         return random.choices(next_states, weights=probs, k=1)[0]
 
     def get_all_states(self):
@@ -155,6 +186,7 @@ class TabularObservationModel(pomdp_py.ObservationModel):
     (next_state, action, observation) to a probability.  This model assumes that the
     given `weights` is complete.
     """
+
     def __init__(self, weights):
         self.weights = weights
         self._observations = set()
@@ -167,14 +199,18 @@ def probability(self, observation, next_state, action):
             return self.weights[(next_state, action, observation)]
         elif (next_state, observation) in self.weights:
             return self.weights[(next_state, observation)]
-        raise ValueError("The observation probability for"
-                         f"{(next_state, action, observation)} or {(next_state, observation)}"
-                         "is not defined")
+        raise ValueError(
+            "The observation probability for"
+            f"{(next_state, action, observation)} or {(next_state, observation)}"
+            "is not defined"
+        )
 
     def sample(self, next_state, action):
         observations = list(self._observations)
-        probs = [self.probability(observation, next_state, action)
-                 for observation in observations]
+        probs = [
+            self.probability(observation, next_state, action)
+            for observation in observations
+        ]
         return random.choices(observations, weights=probs, k=1)[0]
 
     def get_all_observations(self):
@@ -186,6 +222,7 @@ class TabularRewardModel(pomdp_py.RewardModel):
     tuple (state, action), or (state, action, next_state) to a probability.  This
     model assumes that the given `rewards` is complete.
     """
+
     def __init__(self, rewards):
         self.rewards = rewards
 
@@ -200,6 +237,8 @@ def sample(self, state, action, *args):
                 if (state, action, next_state) in self.rewards:
                     return self.rewards[(state, action, next_state)]
 
-            raise ValueError("The reward is undefined for"
-                             f"state={state}, action={action}"
-                             f"next_state={args}")
+            raise ValueError(
+                "The reward is undefined for"
+                f"state={state}, action={action}"
+                f"next_state={args}"
+            )
diff --git a/pomdp_py/utils/typ.py b/pomdp_py/utils/typ.py
index ef7e8bb4..6d37f6a4 100644
--- a/pomdp_py/utils/typ.py
+++ b/pomdp_py/utils/typ.py
@@ -3,28 +3,29 @@
 strings for the purpose of displaying them.
 """
 
+
 # Colors on terminal https://stackoverflow.com/a/287944/2893053
 class bcolors:
-    WHITE = '\033[97m'
-    CYAN = '\033[96m'
-    MAGENTA = '\033[95m'
-    BLUE = '\033[94m'
-    GREEN = '\033[92m'
-    YELLOW = '\033[93m'
-    RED = '\033[91m'
-    BOLD = '\033[1m'
-    ENDC = '\033[0m'
+    WHITE = "\033[97m"
+    CYAN = "\033[96m"
+    MAGENTA = "\033[95m"
+    BLUE = "\033[94m"
+    GREEN = "\033[92m"
+    YELLOW = "\033[93m"
+    RED = "\033[91m"
+    BOLD = "\033[1m"
+    ENDC = "\033[0m"
 
     @staticmethod
     def disable():
-        bcolors.WHITE   = ''
-        bcolors.CYAN    = ''
-        bcolors.MAGENTA = ''
-        bcolors.BLUE    = ''
-        bcolors.GREEN   = ''
-        bcolors.YELLOW  = ''
-        bcolors.RED     = ''
-        bcolors.ENDC    = ''
+        bcolors.WHITE = ""
+        bcolors.CYAN = ""
+        bcolors.MAGENTA = ""
+        bcolors.BLUE = ""
+        bcolors.GREEN = ""
+        bcolors.YELLOW = ""
+        bcolors.RED = ""
+        bcolors.ENDC = ""
 
     @staticmethod
     def s(color, content):
@@ -32,51 +33,67 @@ def s(color, content):
         `color` is a constant in `bcolors` class."""
         return color + content + bcolors.ENDC
 
+
 def info(content):
     return bcolors.s(bcolors.BLUE, content)
 
+
 def note(content):
     return bcolors.s(bcolors.YELLOW, content)
 
+
 def error(content):
     return bcolors.s(bcolors.GREEN, content)
 
+
 def warning(content):
     return bcolors.s(bcolors.YELLOW, content)
 
+
 def success(content):
     return bcolors.s(bcolors.GREEN, content)
 
+
 def bold(content):
     return bcolors.s(bcolors.BOLD, content)
 
+
 def white(content):
     return bcolors.s(bcolors.WHITE, content)
 
+
 def green(content):
     return bcolors.s(bcolors.GREEN, content)
 
+
 def cyan(content):
     return bcolors.s(bcolors.CYAN, content)
 
+
 def magenta(content):
     return bcolors.s(bcolors.MAGENTA, content)
 
+
 def blue(content):
     return bcolors.s(bcolors.BLUE, content)
 
+
 def green(content):
     return bcolors.s(bcolors.GREEN, content)
 
+
 def yellow(content):
     return bcolors.s(bcolors.YELLOW, content)
 
+
 def red(content):
     return bcolors.s(bcolors.RED, content)
 
+
 def white(content):
     return bcolors.s(bcolors.WHITE, content)
 
+
 colors = {
     "white",
     "green",
diff --git a/pyproject.toml b/pyproject.toml
index 317b1555..bd28e85c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,3 +37,43 @@ problems = [
 
 [project.urls]
 Home = "https://github.com/h2r/pomdp-py"
+
+[tool.ruff]
+# Exclude a variety of commonly ignored directories.
+exclude = [
+    ".direnv",
+    ".eggs",
+    ".git",
+    ".git-rewrite",
+    ".hg",
+    ".mypy_cache",
+    ".nox",
+    ".pants.d",
+    ".pytype",
+    ".ruff_cache",
+    ".svn",
+    ".tox",
+    ".venv",
+    "__pypackages__",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "node_modules",
+    "venv",
+    "docs",
+    ".rst",
+    ".bib",
+    ".html"
+]
+
+[tool.black]
+line-length = 88
+target-version = ["py38", "py39", "py310", "py311", "py312"]
+include = "\\.(py|pyx|pxd|pyi)$"
+preview = true
+force-exclude = '''
+/(
+  docs
+)/
+'''
diff --git a/setup.py b/setup.py
index 5b01767d..6ed6be1d 100644
--- a/setup.py
+++ b/setup.py
@@ -5,9 +5,10 @@
 import os.path
 
 
-with open("README.rst", 'r') as f:
+with open("README.rst", "r") as f:
     long_description = f.read()
 
+
 # Build cython files as extensions
 def build_extensions(pkg_name, major_submodules):
     cwd = os.path.abspath(os.path.dirname(__file__))
@@ -22,18 +23,27 @@ def build_extensions(pkg_name, major_submodules):
 
     return extensions
 
-extensions = build_extensions("pomdp_py", ["framework",
-                                           "algorithms",
-                                           "utils",
-                                           "representations.distribution",
-                                           "representations.belief",
-                                           "problems.tiger.cythonize",
-                                           "problems.rocksample.cythonize"])
-
-setup(ext_modules=cythonize(extensions,
-                            build_dir="build",
-                            compiler_directives={'language_level' : "3"}),
-      package_data={"pomdp_py": ["*.pxd", "*.pyx", "*.so", "*.c"],
-                    "pomdp_problems": ["*.pxd", "*.pyx", "*.so", "*.c"]},
-      zip_safe=False
+
+extensions = build_extensions(
+    "pomdp_py",
+    [
+        "framework",
+        "algorithms",
+        "utils",
+        "representations.distribution",
+        "representations.belief",
+        "problems.tiger.cythonize",
+        "problems.rocksample.cythonize",
+    ],
+)
+
+setup(
+    ext_modules=cythonize(
+        extensions, build_dir="build", compiler_directives={"language_level": "3"}
+    ),
+    package_data={
+        "pomdp_py": ["*.pxd", "*.pyx", "*.so", "*.c"],
+        "pomdp_problems": ["*.pxd", "*.pyx", "*.so", "*.c"],
+    },
+    zip_safe=False,
 )
diff --git a/tests/test_all.py b/tests/test_all.py
index 81f43e2e..e9b4796f 100644
--- a/tests/test_all.py
+++ b/tests/test_all.py
@@ -10,6 +10,7 @@
 ABS_DIR = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(ABS_DIR)
 
+
 def main():
     parser = argparse.ArgumentParser(description="Running tests.")
     args = parser.parse_args()
@@ -17,12 +18,16 @@ def main():
     # load the test modules
     tests = []
     for fname in sorted(os.listdir(ABS_DIR)):
-        if fname != "test_all.py" and fname.startswith("test") and fname.endswith(".py"):
+        if (
+            fname != "test_all.py"
+            and fname.startswith("test")
+            and fname.endswith(".py")
+        ):
             test_module = importlib.import_module(fname.split(".py")[0])
             tests.append(test_module)
 
     for i, test_module in enumerate(tests):
-        print(typ.bold("[{}/{}] {}".format(i+1, len(tests), test_module.description)))
+        print(typ.bold("[{}/{}] {}".format(i + 1, len(tests), test_module.description)))
 
         old_stdout = sys.stdout
         try:
@@ -31,5 +36,6 @@ def main():
             sys.stdout = old_stdout
             print("   Error:", str(ex))
 
+
 if __name__ == "__main__":
     main()
diff --git a/tests/test_conversion_pomdp-solve.py b/tests/test_conversion_pomdp-solve.py
index 399432d7..4023b76b 100644
--- a/tests/test_conversion_pomdp-solve.py
+++ b/tests/test_conversion_pomdp-solve.py
@@ -10,13 +10,15 @@
 from pomdp_py.problems.tiger import make_tiger
 
 
-description="testing conversion to .pomdp file"
+description = "testing conversion to .pomdp file"
+
 
 def remove_files(pattern):
     file_list = glob.glob(pattern)
     for file_path in file_list:
         os.remove(file_path)
 
+
 def test_pomdp_file_conversion(pomdp_solve_path):
     print("[testing] test_pomdp_file_conversion")
     tiger = make_tiger()
@@ -27,8 +29,9 @@ def test_pomdp_file_conversion(pomdp_solve_path):
     assert os.path.exists(filename)
 
     print("[testing] Running pomdp-solve on generated %s" % filename)
-    proc = subprocess.Popen([pomdp_solve_path, "-pomdp", filename],
-                            stdout=subprocess.PIPE)
+    proc = subprocess.Popen(
+        [pomdp_solve_path, "-pomdp", filename], stdout=subprocess.PIPE
+    )
     solution_found = False
     for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"):
         if "Solution found" in line:
@@ -43,21 +46,26 @@ def test_pomdp_file_conversion(pomdp_solve_path):
     remove_files("./*.pg")
     remove_files("./*.alpha")
 
+
 def _check_pomdp_solve():
     pomdp_solve_path = os.getenv("POMDP_SOLVE_PATH")
     if pomdp_solve_path is None or not os.path.exists(pomdp_solve_path):
-        raise FileNotFoundError("To run this test, download pomdp-solve from "
-                                "https://www.pomdp.org/code/. Then, follow the "
-                                "instructions on this web page to compile this software. "
-                                "Finally, set the environment variable POMDP_SOLVE_PATH "
-                                "to be the path to the pomdp-solve binary file "
-                                "generated after compilation, likely located at "
-                                "/path/to/pomdp-solve-<version>/src/pomdp-solve ")
+        raise FileNotFoundError(
+            "To run this test, download pomdp-solve from "
+            "https://www.pomdp.org/code/. Then, follow the "
+            "instructions on this web page to compile this software. "
+            "Finally, set the environment variable POMDP_SOLVE_PATH "
+            "to be the path to the pomdp-solve binary file "
+            "generated after compilation, likely located at "
+            "/path/to/pomdp-solve-<version>/src/pomdp-solve "
+        )
     return pomdp_solve_path
 
+
 def run():
     pomdp_solve_path = _check_pomdp_solve()
     test_pomdp_file_conversion(pomdp_solve_path)
 
+
 if __name__ == "__main__":
     run()
diff --git a/tests/test_conversion_pomdpx.py b/tests/test_conversion_pomdpx.py
index 958743ce..f905d431 100644
--- a/tests/test_conversion_pomdpx.py
+++ b/tests/test_conversion_pomdpx.py
@@ -5,7 +5,8 @@
 from pomdp_py.problems.tiger import make_tiger
 import os
 
-description="testing conversion to .pomdpx file"
+description = "testing conversion to .pomdpx file"
+
 
 def test_pomdpx_file_conversion(pomdpconvert_path):
     """
@@ -19,27 +20,31 @@ def test_pomdpx_file_conversion(pomdpconvert_path):
 
     filename = "./test_tiger.POMDPX"
     print("[testing] converting to .pomdpx file")
-    to_pomdpx_file(tiger.agent, pomdpconvert_path,
-                   output_path=filename,
-                   discount_factor=0.95)
+    to_pomdpx_file(
+        tiger.agent, pomdpconvert_path, output_path=filename, discount_factor=0.95
+    )
     assert os.path.exists(filename), ".pomdpx file not created."
     print("Pass.")
 
     # Remove file
     os.remove(filename)
 
+
 def _check_pomdpconvert():
     pomdpconvert_path = os.getenv("POMDPCONVERT_PATH")
     if pomdpconvert_path is None or not os.path.exists(pomdpconvert_path):
-        raise FileNotFoundError("To run this test, download sarsop from "
-                                "https://github.com/AdaCompNUS/sarsop. Then, follow the "
-                                "instructions on this web page to compile this software. "
-                                "Finally, set the environment variable POMDPCONVERT_PATH "
-                                "to be the path to the pomdpconvert binary file "
-                                "generated after compilation, likely located at "
-                                "/path/to/sarsop/src/pomdpconvert")
+        raise FileNotFoundError(
+            "To run this test, download sarsop from "
+            "https://github.com/AdaCompNUS/sarsop. Then, follow the "
+            "instructions on this web page to compile this software. "
+            "Finally, set the environment variable POMDPCONVERT_PATH "
+            "to be the path to the pomdpconvert binary file "
+            "generated after compilation, likely located at "
+            "/path/to/sarsop/src/pomdpconvert"
+        )
     return pomdpconvert_path
 
+
 def run():
     pomdpconvert_path = _check_pomdpconvert()
     test_pomdpx_file_conversion(pomdpconvert_path)
diff --git a/tests/test_hashing_pickling.py b/tests/test_hashing_pickling.py
index 25b4859c..8c20824d 100644
--- a/tests/test_hashing_pickling.py
+++ b/tests/test_hashing_pickling.py
@@ -5,7 +5,8 @@
 import subprocess
 import textwrap
 
-description="testing hashing & pickling some objects"
+description = "testing hashing & pickling some objects"
+
 
 def test_hashing_pickling():
     objstate1 = pomdp_py.ObjectState("apple", {"color": "red", "weight": 1.5})
@@ -25,7 +26,7 @@ def test_hashing_pickling():
     assert hash(objstate3) == hash(objstate1)
     assert objstate3 != objstate1
 
-    oos1 = pomdp_py.OOState({1:objstate1, 2:objstate2})
+    oos1 = pomdp_py.OOState({1: objstate1, 2: objstate2})
     assert hash(oos1.copy()) == hash(oos1)
 
     # save oos1 as a pickle file
@@ -52,22 +53,25 @@ def test_hashing_pickling():
     with open(temp_prog_file, "w") as f:
         f.write(prog)
 
-    proc = subprocess.Popen(["python", temp_prog_file],
-                            stdout=subprocess.PIPE)
+    proc = subprocess.Popen(["python", temp_prog_file], stdout=subprocess.PIPE)
     passed = False
     try:
         for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"):
             if "Passed" in line:
                 passed = True
                 break
-        assert passed, "Something wrong - pickled object does not equal to original object"
+        assert (
+            passed
+        ), "Something wrong - pickled object does not equal to original object"
         print("Pass.")
     finally:
         os.remove(temp_oos1_file)
         os.remove(temp_prog_file)
 
+
 def run():
     test_hashing_pickling()
 
+
 if __name__ == "__main__":
     run()
diff --git a/tests/test_particles.py b/tests/test_particles.py
index 64e5ed45..5470176c 100644
--- a/tests/test_particles.py
+++ b/tests/test_particles.py
@@ -3,6 +3,7 @@
 
 description = "testing particle representation"
 
+
 def test_particles():
     random_dist = {}
     total_prob = 0
@@ -12,7 +13,9 @@ def test_particles():
     for v in random_dist:
         random_dist[v] /= total_prob
 
-    particles = pomdp_py.Particles.from_histogram(pomdp_py.Histogram(random_dist), num_particles=int(1e6))
+    particles = pomdp_py.Particles.from_histogram(
+        pomdp_py.Histogram(random_dist), num_particles=int(1e6)
+    )
 
     for v in random_dist:
         assert abs(particles[v] - random_dist[v]) <= 2e-3
@@ -41,7 +44,9 @@ def test_weighted_particles():
     for v in random_dist:
         random_dist[v] /= total_prob
 
-    particles = pomdp_py.WeightedParticles.from_histogram(pomdp_py.Histogram(random_dist))
+    particles = pomdp_py.WeightedParticles.from_histogram(
+        pomdp_py.Histogram(random_dist)
+    )
 
     assert abs(sum(particles[v] for v, _ in particles) - 1.0) <= 1e-6
 
@@ -60,6 +65,7 @@ def test_weighted_particles():
 
     assert particles.mpe() == pomdp_py.Histogram(random_dist).mpe()
 
+
 def run():
     test_particles()
     test_weighted_particles()
diff --git a/tests/test_sarsop.py b/tests/test_sarsop.py
index 82d490ca..cd48ed78 100644
--- a/tests/test_sarsop.py
+++ b/tests/test_sarsop.py
@@ -7,7 +7,8 @@
 import os
 import io
 
-description="testing sarsop"
+description = "testing sarsop"
+
 
 def test_sarsop(pomdpsol_path):
     print("[testing] test_sarsop")
@@ -15,13 +16,18 @@ def test_sarsop(pomdpsol_path):
 
     # Building a policy graph
     print("[testing] solving the tiger problem...")
-    policy = sarsop(tiger.agent, pomdpsol_path, discount_factor=0.95,
-                    timeout=10, memory=20, precision=0.000001,
-                    remove_generated_files=True,
-                    logfile="test_sarsop.log")
+    policy = sarsop(
+        tiger.agent,
+        pomdpsol_path,
+        discount_factor=0.95,
+        timeout=10,
+        memory=20,
+        precision=0.000001,
+        remove_generated_files=True,
+        logfile="test_sarsop.log",
+    )
 
-    assert str(policy.plan(tiger.agent)) == "listen",\
-        "Bad solution. Test failed."
+    assert str(policy.plan(tiger.agent)) == "listen", "Bad solution. Test failed."
 
     assert os.path.exists("test_sarsop.log")
     os.remove("test_sarsop.log")
@@ -34,14 +40,20 @@ def test_sarsop(pomdpsol_path):
         action = policy.plan(tiger.agent)
         observation = tiger.agent.observation_model.sample(true_state, action)
         reward = tiger.env.reward_model.sample(true_state, action, None)
-        print("[testing] running computed policy graph"\
-              "(step=%d, action=%s, observation=%s, reward=%d)" % (step, action, observation, reward))
+        print(
+            "[testing] running computed policy graph"
+            "(step=%d, action=%s, observation=%s, reward=%d)"
+            % (step, action, observation, reward)
+        )
 
         # belief update
-        new_belief = pomdp_py.update_histogram_belief(tiger.agent.cur_belief,
-                                                      action, observation,
-                                                      tiger.agent.observation_model,
-                                                      tiger.agent.transition_model)
+        new_belief = pomdp_py.update_histogram_belief(
+            tiger.agent.cur_belief,
+            action,
+            observation,
+            tiger.agent.observation_model,
+            tiger.agent.transition_model,
+        )
         tiger.agent.set_belief(new_belief)
 
         assert reward == -1 or reward == 10, "Reward is negative. Failed."
@@ -50,18 +62,22 @@ def test_sarsop(pomdpsol_path):
     assert got_high_reward, "Should have gotten high reward. Failed."
     print("Pass.")
 
+
 def _check_pomdpsol():
     pomdpsol_path = os.getenv("POMDPSOL_PATH")
     if pomdpsol_path is None or not os.path.exists(pomdpsol_path):
-        raise FileNotFoundError("To run this test, download sarsop from"
-                                "https://github.com/AdaCompNUS/sarsop. Then, follow the "
-                                "instructions on this web page to compile this software. "
-                                "Finally, set the environment variable POMDPSOL_PATH "
-                                "to be the path to the pomdpsol binary file "
-                                "generated after compilation, likely located at "
-                                "/path/to/sarsop/src/pomdpsol")
+        raise FileNotFoundError(
+            "To run this test, download sarsop from"
+            "https://github.com/AdaCompNUS/sarsop. Then, follow the "
+            "instructions on this web page to compile this software. "
+            "Finally, set the environment variable POMDPSOL_PATH "
+            "to be the path to the pomdpsol binary file "
+            "generated after compilation, likely located at "
+            "/path/to/sarsop/src/pomdpsol"
+        )
     return pomdpsol_path
 
+
 def run():
     pomdpsol_path = _check_pomdpsol()
     test_sarsop(pomdpsol_path)
diff --git a/tests/test_tree_debugger.py b/tests/test_tree_debugger.py
index 7f9ebab7..82277b17 100644
--- a/tests/test_tree_debugger.py
+++ b/tests/test_tree_debugger.py
@@ -3,13 +3,18 @@
 import pomdp_py
 from pomdp_py.utils.debugging import TreeDebugger
 
-description="testing pomdp_py.utils.TreeDebugger"
+description = "testing pomdp_py.utils.TreeDebugger"
+
 
 def test_tree_debugger_tiger(debug_tree=False):
     tiger_problem = TigerProblem.create("tiger-left", 0.5, 0.15)
-    pouct = pomdp_py.POUCT(max_depth=4, discount_factor=0.95,
-                           num_sims=4096, exploration_const=200,
-                           rollout_policy=tiger_problem.agent.policy_model)
+    pouct = pomdp_py.POUCT(
+        max_depth=4,
+        discount_factor=0.95,
+        num_sims=4096,
+        exploration_const=200,
+        rollout_policy=tiger_problem.agent.policy_model,
+    )
 
     pouct.plan(tiger_problem.agent)
     dd = TreeDebugger(tiger_problem.agent.tree)
@@ -33,6 +38,7 @@ def test_tree_debugger_tiger(debug_tree=False):
 
     test_planner(tiger_problem, pouct, nsteps=3, debug_tree=debug_tree)
 
+
 def run(verbose=False, debug_tree=False):
     test_tree_debugger_tiger(debug_tree=debug_tree)
 
diff --git a/tests/test_vi_pruning.py b/tests/test_vi_pruning.py
index 02810a90..8e7e7a76 100644
--- a/tests/test_vi_pruning.py
+++ b/tests/test_vi_pruning.py
@@ -8,7 +8,8 @@
 import os
 import io
 
-description="testing vi_pruning (pomdp-solve)"
+description = "testing vi_pruning (pomdp-solve)"
+
 
 def test_vi_pruning(pomdp_solve_path, return_policy_graph=True):
     print("[testing] test_vi_pruning")
@@ -16,13 +17,16 @@ def test_vi_pruning(pomdp_solve_path, return_policy_graph=True):
 
     # Building a policy graph
     print("[testing] solving the tiger problem...")
-    policy = vi_pruning(tiger.agent, pomdp_solve_path, discount_factor=0.95,
-                        options=["-horizon", "100"],
-                        remove_generated_files=True,
-                        return_policy_graph=return_policy_graph)
+    policy = vi_pruning(
+        tiger.agent,
+        pomdp_solve_path,
+        discount_factor=0.95,
+        options=["-horizon", "100"],
+        remove_generated_files=True,
+        return_policy_graph=return_policy_graph,
+    )
 
-    assert str(policy.plan(tiger.agent)) == "listen",\
-        "Bad solution. Test failed."
+    assert str(policy.plan(tiger.agent)) == "listen", "Bad solution. Test failed."
 
     # Plan with the graph for several steps. So we should get high rewards
     # eventually in the tiger domain.
@@ -32,18 +36,24 @@ def test_vi_pruning(pomdp_solve_path, return_policy_graph=True):
         action = policy.plan(tiger.agent)
         observation = tiger.agent.observation_model.sample(true_state, action)
         reward = tiger.env.reward_model.sample(true_state, action, None)
-        print("[testing] simulating computed policy graph"\
-              "(step=%d, action=%s, observation=%s, reward=%d)" % (step, action, observation, reward))
+        print(
+            "[testing] simulating computed policy graph"
+            "(step=%d, action=%s, observation=%s, reward=%d)"
+            % (step, action, observation, reward)
+        )
         # No belief update needed. Just update the policy graph
         if return_policy_graph:
             # We use policy graph. No belief update is needed. Just update the policy.
             policy.update(tiger.agent, action, observation)
         else:
             # belief update is needed
-            new_belief = pomdp_py.update_histogram_belief(tiger.agent.cur_belief,
-                                                          action, observation,
-                                                          tiger.agent.observation_model,
-                                                          tiger.agent.transition_model)
+            new_belief = pomdp_py.update_histogram_belief(
+                tiger.agent.cur_belief,
+                action,
+                observation,
+                tiger.agent.observation_model,
+                tiger.agent.transition_model,
+            )
             tiger.agent.set_belief(new_belief)
 
         assert reward == -1 or reward == 10, "Reward is negative. Failed."
@@ -52,21 +62,26 @@ def test_vi_pruning(pomdp_solve_path, return_policy_graph=True):
     assert got_high_reward, "Should have gotten high reward. Failed."
     print("Pass.")
 
+
 def _check_pomdp_solve():
     pomdp_solve_path = os.getenv("POMDP_SOLVE_PATH")
     if pomdp_solve_path is None or not os.path.exists(pomdp_solve_path):
-        raise FileNotFoundError("To run this test, download pomdp-solve from "
-                                "https://www.pomdp.org/code/. Then, follow the "
-                                "instructions on this web page to compile this software. "
-                                "Finally, set the environment variable POMDP_SOLVE_PATH "
-                                "to be the path to the pomdp-solve binary file "
-                                "generated after compilation, likely located at "
-                                "/path/to/pomdp-solve-<version>/src/pomdp-solve ")
+        raise FileNotFoundError(
+            "To run this test, download pomdp-solve from "
+            "https://www.pomdp.org/code/. Then, follow the "
+            "instructions on this web page to compile this software. "
+            "Finally, set the environment variable POMDP_SOLVE_PATH "
+            "to be the path to the pomdp-solve binary file "
+            "generated after compilation, likely located at "
+            "/path/to/pomdp-solve-<version>/src/pomdp-solve "
+        )
     return pomdp_solve_path
 
+
 def run():
     pomdp_solve_path = _check_pomdp_solve()
     test_vi_pruning(pomdp_solve_path, return_policy_graph=False)
 
+
 if __name__ == "__main__":
     run()